From ae524736fe2c2786394c8e63b0f81f8edc5e4395 Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Tue, 21 Apr 2020 16:49:12 +0800 Subject: [PATCH] model zoo v1.6.0 --- benchmarks/README.md | 18 +- benchmarks/common/base_benchmark_util.py | 47 +- benchmarks/common/base_model_init.py | 44 +- benchmarks/common/platform_util.py | 141 +- .../common/tensorflow/run_tf_benchmark.py | 0 benchmarks/common/tensorflow/start.sh | 662 +-- benchmarks/common/utils/multi_instance.py | 301 + .../tensorflow/draw/README.md | 110 - .../draw/inference/fp32/__init__.py | 19 - .../draw/inference/fp32/config.json | 8 - .../draw/inference/fp32/model_init.py | 59 - .../face_detection_and_alignment/__init__.py | 19 - .../tensorflow/mtcc/README.md | 88 - .../tensorflow/mtcc/inference/__init__.py | 19 - .../mtcc/inference/fp32/__init__.py | 19 - .../mtcc/inference/fp32/model_init.py | 56 - .../tensorflow/densenet169/README.md | 52 +- .../tensorflow/inception_resnet_v2/README.md | 80 +- .../tensorflow/inceptionv3/README.md | 117 +- .../tensorflow/inceptionv4/README.md | 76 +- .../tensorflow/mobilenet_v1/README.md | 103 +- .../mobilenet_v1/inference/fp32/model_init.py | 58 +- .../mobilenet_v1/inference/int8/model_init.py | 31 +- .../tensorflow/resnet101/README.md | 92 +- .../resnet101/inference/int8/model_init.py | 2 +- .../tensorflow/resnet50/README.md | 68 +- .../tensorflow/resnet50v1_5/README.md | 130 +- .../resnet50v1_5/training}/__init__.py | 2 - .../training/common_resnet50}/__init__.py | 0 .../training/common_resnet50}/config.json | 0 .../common_resnet50/resnet50_model_init.py | 107 + .../resnet50v1_5/training/fp32/__init__.py | 0 .../resnet50v1_5/training/fp32}/model_init.py | 11 +- benchmarks/image_segmentation/__init__.py | 19 - .../image_segmentation/tensorflow/__init__.py | 19 - .../tensorflow/maskrcnn/README.md | 105 - .../tensorflow/maskrcnn/__init__.py | 19 - .../tensorflow/maskrcnn/inference/__init__.py | 19 - .../maskrcnn/inference/fp32/__init__.py | 19 - .../maskrcnn/inference/fp32/config.json | 8 - .../maskrcnn/inference/fp32/model_init.py | 59 - benchmarks/language_modeling/__init__.py | 19 - .../language_modeling/tensorflow/__init__.py | 19 - .../tensorflow/lm-1b/README.md | 108 - .../tensorflow/lm-1b/__init__.py | 19 - .../tensorflow/lm-1b/inference/__init__.py | 19 - .../lm-1b/inference/fp32/__init__.py | 19 - .../lm-1b/inference/fp32/model_init.py | 77 - .../tensorflow/gnmt/README.md | 280 - .../tensorflow/gnmt/__init__.py | 19 - .../tensorflow/gnmt/inference/__init__.py | 19 - .../gnmt/inference/fp32/__init__.py | 19 - .../gnmt/inference/fp32/config.json | 7 - .../gnmt/inference/fp32/model_init.py | 128 - .../tensorflow/gnmt/training/__init__.py | 19 - .../tensorflow/gnmt/training/fp32/__init__.py | 19 - .../tensorflow/gnmt/training/fp32/config.json | 7 - .../gnmt/training/fp32/model_init.py | 169 - .../tensorflow/mlperf_gnmt/README.md | 86 + .../tensorflow/mlperf_gnmt/__init__.py | 0 .../mlperf_gnmt/inference/__init__.py | 0 .../mlperf_gnmt/inference/fp32/__init__.py | 0 .../mlperf_gnmt/inference/fp32/config.json | 7 + .../mlperf_gnmt/inference/fp32/model_init.py | 73 + .../transformer_lt_official/README.md | 90 + .../transformer_lt_official}/__init__.py | 0 .../inference}/__init__.py | 0 .../inference/fp32}/__init__.py | 0 .../inference/fp32/config.json | 0 .../inference/fp32/model_init.py | 113 + .../transformer_lt_official/requirements.txt | 2 + benchmarks/launch_benchmark.py | 70 +- benchmarks/object_detection/__init__.py | 2 +- .../tensorflow/faster_rcnn/README.md | 332 -- .../faster_rcnn/inference/fp32/__init__.py | 19 - .../faster_rcnn/inference/fp32/config.json | 7 - .../faster_rcnn/inference/fp32/model_init.py | 95 - .../faster_rcnn/inference/int8/config.json | 7 - .../faster_rcnn/inference/int8/model_init.py | 114 - .../tensorflow/faster_rcnn/requirements.txt | 8 - .../tensorflow/rfcn/README.md | 262 +- .../rfcn/inference/fp32/config.json | 1 + .../rfcn/inference/fp32/model_init.py | 127 +- .../rfcn/inference/int8/__init__.py | 2 + .../rfcn/inference/int8/model_init.py | 50 +- .../tensorflow/rfcn/requirements.txt | 2 +- .../tensorflow/ssd-mobilenet/README.md | 188 +- .../inference/fp32/infer_detections.py | 137 - .../inference/fp32/model_init.py | 77 +- .../inference/int8/model_init.py | 57 +- .../inference/ssdmobilenet_accuracy.sh | 47 - .../tensorflow/ssd-mobilenet/requirements.txt | 2 +- .../tensorflow/ssd-resnet34/README.md | 325 +- .../ssd-resnet34/inference/fp32/model_init.py | 7 +- .../ssd-resnet34/inference/int8/model_init.py | 7 +- .../tensorflow/ssd-resnet34/requirements.txt | 9 +- .../ssd-resnet34/training/__init__.py | 19 - .../ssd-resnet34/training/fp32/__init__.py | 19 - .../ssd-resnet34/training/fp32/config.json | 7 - .../ssd-resnet34/training/fp32/model_init.py | 90 - .../tensorflow/ssd_vgg16/README.md | 306 - .../tensorflow/ssd_vgg16/__init__.py | 19 - .../ssd_vgg16/inference/__init__.py | 19 - .../ssd_vgg16/inference/config.json | 6 - .../ssd_vgg16/inference/fp32/__init__.py | 19 - .../ssd_vgg16/inference/fp32/model_init.py | 28 - .../ssd_vgg16/inference/int8/__init__.py | 19 - .../inference/ssd_vgg16_model_init.py | 107 - .../recommendation/tensorflow/ncf/README.md | 137 - .../recommendation/tensorflow/ncf/__init__.py | 19 - .../tensorflow/ncf/inference/__init__.py | 19 - .../tensorflow/ncf/inference/fp32/__init__.py | 19 - .../tensorflow/ncf/inference/fp32/config.json | 7 - .../ncf/inference/fp32/model_init.py | 78 - .../tensorflow/wide_deep/README.md | 84 +- .../wide_deep/inference/fp32/data_download.py | 9 +- .../tensorflow/wide_deep_large_ds/README.md | 235 +- .../inference/fp32/model_init.py | 40 +- .../inference/int8/model_init.py | 38 +- .../training/fp32/model_init.py | 1 - .../int8 => reinforcement}/__init__.py | 2 +- .../reinforcement/tensorflow/__init__.py | 17 + .../reinforcement/tensorflow/minigo/README.md | 265 + .../tensorflow/minigo}/__init__.py | 2 - .../tensorflow/minigo/requirements.txt | 18 + .../tensorflow/minigo/training/__init__.py | 17 + .../minigo/training/fp32}/__init__.py | 2 +- .../minigo/training/fp32/model_init.py | 78 + docs/README.md | 2 +- docs/general/tensorflow/LaunchBenchmark.md | 14 +- .../quantization/Tutorial.md | 464 +- .../quantization/resnet50_min_max_log.txt | 727 --- docs/image_recognition/tensorflow/Tutorial.md | 42 +- .../tensorflow/Tutorial.md | 246 + docs/object_detection/tensorflow/Tutorial.md | 290 - docs/recommendation/quantization/Tutorial.md | 334 -- docs/recommendation/tensorflow/Tutorial.md | 27 +- models/__init__.py | 1 + .../tensorflow/draw/inference/fp32/LICENSE | 201 - .../draw/inference/fp32/draw_inf.py | 302 - .../face_detection_and_alignment/__init__.py | 19 - .../tensorflow/__init__.py | 19 - .../tensorflow/mtcc/__init__.py | 19 - .../tensorflow/mtcc/inference/__init__.py | 19 - .../mtcc/inference/fp32/MtcnnDetector.py | 467 -- .../mtcc/inference/fp32/__init__.py | 19 - .../mtcc/inference/fp32/detector.py | 96 - .../mtcc/inference/fp32/fcn_detector.py | 69 - .../mtcc/inference/fp32/one_image_test.py | 143 - models/image_recognition/__init__.py | 1 + .../image_recognition/tensorflow/__init__.py | 1 + .../densenet169/inference/fp32/accuracy.py | 190 +- .../densenet169/inference/fp32/benchmark.py | 225 +- .../densenet169/inference/fp32/cnn_util.py | 11 +- .../densenet169/inference/fp32/dataset.py | 80 +- .../inference/fp32/densenet_preprocessing.py | 524 +- .../inference/fp32/image_preprocessing.py | 721 ++- .../inception_resnet_v2/cnn_util.py | 11 +- .../inception_resnet_v2/dataset_factory.py | 40 +- .../inception_resnet_v2/dataset_utils.py | 178 +- .../inception_resnet_v2/datasets.py | 136 +- .../eval_image_classifier_accuracy.py | 212 +- .../eval_image_classifier_benchmark.py | 153 +- .../inception_resnet_v2/imagenet.py | 264 +- .../inception_preprocessing.py | 522 +- .../inception_resnet_v2.py | 690 ++- .../inception_resnet_v2/nets_factory.py | 80 +- .../inception_resnet_v2/preprocessing.py | 1050 ++-- .../preprocessing_factory.py | 42 +- .../tensorflow/inceptionv3/fp32/datasets.py | 88 +- .../fp32/eval_image_classifier_inference.py | 382 +- .../inceptionv3/fp32/preprocessing.py | 236 +- .../tensorflow/inceptionv3/int8/accuracy.py | 201 +- .../tensorflow/inceptionv3/int8/benchmark.py | 293 +- .../inceptionv3/int8/calibration.py | 202 +- .../tensorflow/inceptionv3/int8/cnn_util.py | 10 +- .../tensorflow/inceptionv3/int8/datasets.py | 107 +- .../inceptionv3/int8/preprocessing.py | 93 +- .../int8/preprocessing_benchmark.py | 232 +- .../inceptionv4/inference/accuracy.py | 45 +- .../inceptionv4/inference/benchmark.py | 220 +- .../inceptionv4/inference/cnn_util.py | 10 +- .../inceptionv4/inference/datasets.py | 226 +- .../inceptionv4/inference/preprocessing.py | 102 +- .../mobilenet_v1/inference/fp32/accuracy.py | 30 +- .../inference/fp32/accuracy_datasets.py | 226 +- .../inference/fp32/accuracy_preprocessing.py | 121 +- .../mobilenet_v1/inference/fp32/benchmark.py | 144 + .../mobilenet_v1/inference/fp32/cnn_util.py | 10 +- .../inference/fp32/eval_image_classifier.py | 299 +- .../mobilenet_v1/inference/int8/__init__.py | 1 + .../mobilenet_v1/inference/int8/accuracy.py | 194 +- .../mobilenet_v1/inference/int8/benchmark.py | 184 +- .../inference/int8/calibration.py | 135 + .../mobilenet_v1/inference/int8/cnn_util.py | 11 +- .../mobilenet_v1/inference/int8/datasets.py | 229 +- .../inference/int8/preprocessing.py | 1092 ++-- .../resnet101/inference/__init__.py | 1 + .../resnet101/inference/datasets.py | 63 +- .../eval_image_classifier_inference.py | 398 +- .../resnet101/inference/preprocessing.py | 249 +- .../resnet101/inference/vgg_preprocessing.py | 532 +- .../tensorflow/resnet101/int8/calibration.py | 232 +- .../tensorflow/resnet101/int8/cnn_util.py | 11 +- .../tensorflow/resnet101/int8/datasets.py | 80 +- .../resnet101/int8/preprocessing.py | 721 ++- .../resnet101/int8/vgg_preprocessing.py | 532 +- .../tensorflow/resnet50/__init__.py | 1 + .../tensorflow/resnet50/inference/__init__.py | 1 + .../tensorflow/resnet50/inference/datasets.py | 67 +- .../eval_image_classifier_inference.py | 467 +- .../resnet50/inference/preprocessing.py | 246 +- .../tensorflow/resnet50/int8/__init__.py | 1 + .../tensorflow/resnet50/int8/benchmark.py | 328 +- .../tensorflow/resnet50/int8/cnn_util.py | 11 +- .../tensorflow/resnet50/int8/datasets.py | 108 +- .../int8/generate_calibration_data.py | 246 +- .../tensorflow/resnet50/int8/preprocessing.py | 717 ++- .../resnet50/int8/preprocessing_benchmark.py | 234 +- .../tensorflow/resnet50v1_5/__init__.py | 1 + .../resnet50v1_5/inference/__init__.py | 1 + .../resnet50v1_5/inference/datasets.py | 67 +- .../eval_image_classifier_inference.py | 471 +- .../resnet50v1_5/inference/preprocessing.py | 250 +- .../tensorflow/resnet50v1_5/int8/__init__.py | 1 + .../tensorflow/resnet50v1_5/int8/benchmark.py | 328 +- .../tensorflow/resnet50v1_5/int8/cnn_util.py | 11 +- .../tensorflow/resnet50v1_5/int8/datasets.py | 108 +- .../int8/generate_calibration_data.py | 246 +- .../resnet50v1_5/int8/preprocessing.py | 717 ++- .../int8/preprocessing_benchmark.py | 236 +- .../resnet50v1_5/training/__init__.py | 0 .../training/mlperf_compliance/__init__.py | 1 + .../training/mlperf_compliance/_gnmt_tags.py | 51 + .../mlperf_compliance/_maskrcnn_tags.py | 53 + .../training/mlperf_compliance/_ncf_tags.py | 60 + .../mlperf_compliance/_resnet_tags.py | 47 + .../training/mlperf_compliance/_ssd_tags.py | 42 + .../mlperf_compliance/_transformer_tags.py | 35 + .../training/mlperf_compliance/mlperf_log.py | 202 + .../mlperf_compliance/resnet_log_helper.py | 84 + .../training/mlperf_compliance/tags.py | 620 ++ .../mlperf_compliance/test_tag_set.py | 69 + .../mlperf_compliance/tf_mlperf_log.py | 95 + .../training/mlperf_resnet/__init__.py | 0 .../training/mlperf_resnet/imagenet_main.py | 351 ++ .../mlperf_resnet/imagenet_preprocessing.py | 290 + .../training/mlperf_resnet/resnet_model.py | 467 ++ .../training/mlperf_resnet/resnet_run_loop.py | 692 +++ .../training/mlperf_utils/__init__.py | 0 .../mlperf_utils/arg_parsers/__init__.py | 0 .../mlperf_utils/arg_parsers/parsers.py | 413 ++ .../training/mlperf_utils/export/__init__.py | 0 .../training/mlperf_utils/export/export.py | 49 + .../training/mlperf_utils/logs/__init__.py | 0 .../mlperf_utils/logs/benchmark_uploader.py | 129 + .../training/mlperf_utils/logs/hooks.py | 117 + .../mlperf_utils/logs/hooks_helper.py | 159 + .../training/mlperf_utils/logs/logger.py | 196 + .../training/mlperf_utils/logs/metric_hook.py | 106 + .../training/mlperf_utils/misc/__init__.py | 0 .../mlperf_utils/misc/model_helpers.py | 55 + models/image_segmentation/__init__.py | 19 - .../image_segmentation/tensorflow/__init__.py | 19 - .../tensorflow/maskrcnn/__init__.py | 19 - .../tensorflow/maskrcnn/coco.py | 588 -- .../tensorflow/maskrcnn/config.py | 188 - .../tensorflow/maskrcnn/model.py | 2612 -------- .../tensorflow/maskrcnn/utils.py | 749 --- .../tensorflow/gnmt/__init__.py | 19 - .../tensorflow/gnmt/inference/__init__.py | 19 - .../tensorflow/gnmt/inference/fp32/.gitignore | 4 - .../gnmt/inference/fp32/__init__.py | 19 - .../gnmt/inference/fp32/attention_model.py | 194 - .../gnmt/inference/fp32/gnmt_model.py | 334 -- .../gnmt/inference/fp32/inference.py | 277 - .../tensorflow/gnmt/inference/fp32/model.py | 899 --- .../gnmt/inference/fp32/model_helper.py | 664 --- .../tensorflow/gnmt/inference/fp32/nmt.py | 704 --- .../gnmt/inference/fp32/scripts/bleu.py | 112 - .../fp32/scripts/download_iwslt15.sh | 30 - .../gnmt/inference/fp32/scripts/rouge.py | 352 -- .../inference/fp32/scripts/wmt16_en_de.sh | 158 - .../fp32/standard_hparams/iwslt15.json | 35 - .../standard_hparams/iwslt15_internal.json | 33 - .../fp32/standard_hparams/wmt16.json | 35 - .../standard_hparams/wmt16_gnmt_4_layer.json | 36 - .../wmt16_gnmt_4_layer_internal.json | 34 - .../standard_hparams/wmt16_gnmt_8_layer.json | 36 - .../wmt16_gnmt_8_layer_internal.json | 35 - .../fp32/standard_hparams/wmt16_internal.json | 33 - .../tensorflow/gnmt/inference/fp32/train.py | 750 --- .../gnmt/inference/fp32/utils/__init__.py | 19 - .../inference/fp32/utils/evaluation_utils.py | 183 - .../inference/fp32/utils/iterator_utils.py | 248 - .../gnmt/inference/fp32/utils/misc_utils.py | 183 - .../gnmt/inference/fp32/utils/nmt_utils.py | 144 - .../fp32/utils/standard_hparams_utils.py | 132 - .../gnmt/inference/fp32/utils/vocab_utils.py | 197 - .../tensorflow/gnmt/training/__init__.py | 19 - .../gnmt/training/fp32/multi_instances.patch | 909 --- .../tensorflow/gnmt/training/requirements.txt | 1 - .../tensorflow/mlperf_gnmt/__init__.py | 0 .../tensorflow/mlperf_gnmt/fp32/__init__.py | 0 .../tensorflow/mlperf_gnmt/fp32/bleu.py | 172 + .../mlperf_gnmt/fp32/evaluation_utils.py | 183 + .../tensorflow/mlperf_gnmt/fp32/hparam.py | 536 ++ .../tensorflow/mlperf_gnmt/fp32/misc_utils.py | 182 + .../tensorflow/mlperf_gnmt/fp32/nmt_utils.py | 111 + .../tensorflow/mlperf_gnmt/fp32/rouge.py | 352 ++ .../mlperf_gnmt/fp32/run_inference.py | 120 + .../inference/fp32/compute_bleu.py | 141 + .../inference/fp32/infer_ab.py | 232 + .../inference/fp32/official/utils/__init__.py | 0 .../fp32/official/utils/flags/README.md | 97 + .../fp32/official/utils/flags/__init__.py | 0 .../fp32/official/utils/flags/_base.py | 163 + .../fp32/official/utils/flags/_benchmark.py | 105 + .../fp32/official/utils/flags/_conventions.py | 54 + .../fp32/official/utils/flags/_device.py | 85 + .../official/utils/flags/_distribution.py | 54 + .../fp32/official/utils/flags/_misc.py | 50 + .../fp32/official/utils/flags/_performance.py | 299 + .../fp32/official/utils/flags/core.py | 132 + .../fp32/official/utils/flags/flags_test.py | 162 + .../fp32/official/utils/flags/guidelines.md | 65 + .../fp32/official/utils/hyperparams_flags.py | 119 + .../fp32/official/utils/logs/__init__.py | 0 .../fp32/official/utils/logs/cloud_lib.py | 34 + .../official/utils/logs/cloud_lib_test.py | 48 + .../fp32/official/utils/logs/guidelines.md | 58 + .../fp32/official/utils/logs/hooks.py | 130 + .../fp32/official/utils/logs/hooks_helper.py | 172 + .../official/utils/logs/hooks_helper_test.py | 73 + .../fp32/official/utils/logs/hooks_test.py | 158 + .../fp32/official/utils/logs/logger.py | 423 ++ .../fp32/official/utils/logs/logger_test.py | 365 ++ .../fp32/official/utils/logs/metric_hook.py | 97 + .../official/utils/logs/metric_hook_test.py | 217 + .../fp32/official/utils/logs/mlperf_helper.py | 193 + .../inference/fp32/utils/__init__.py | 0 .../inference/fp32/utils/metrics.py | 490 ++ .../inference/fp32/utils/tokenizer.py | 620 ++ .../inference/fp32/utils/tokenizer_test.py | 182 + models/object_detection/__init__.py | 1 + .../object_detection/tensorflow/__init__.py | 1 + .../tensorflow/faster_rcnn/__init__.py | 19 - .../faster_rcnn/inference/__init__.py | 19 - .../faster_rcnn/inference/fp32/__init__.py | 19 - .../inference/fp32/coco_accuracy.sh | 59 - .../inference/fp32/dataset_util.py | 160 - .../faster_rcnn/inference/fp32/eval.py | 165 - .../faster_rcnn/inference/fp32/eval_util.py | 669 --- .../faster_rcnn/inference/fp32/evaluator.py | 297 - .../inference/fp32/run_frozen_graph_rcnn.py | 222 - .../faster_rcnn/inference/int8/coco_int8.sh | 50 - .../inference/int8/run_frozen_graph_rcnn.py | 222 - .../faster_rcnn/inference/tf_models.patch | 68 - .../tensorflow/rfcn/__init__.py | 1 + .../tensorflow/rfcn/inference/__init__.py | 1 + .../rfcn/inference/fp32/__init__.py | 1 + .../rfcn/inference/fp32/coco_mAP.sh | 18 +- .../rfcn/inference/fp32/dataset_util.py | 176 +- .../tensorflow/rfcn/inference/fp32/eval.py | 122 +- .../rfcn/inference/fp32/eval_util.py | 1088 ++-- .../rfcn/inference/fp32/evaluator.py | 455 +- .../rfcn/inference/fp32/run_rfcn_inference.py | 84 +- .../rfcn/inference/int8/__init__.py | 2 + .../rfcn/inference/int8/coco_mAP.sh | 23 +- .../rfcn/inference/int8/run_rfcn_inference.py | 66 +- .../tensorflow/rfcn/inference/tf-2.0.patch | 617 ++ .../tensorflow/rfcn/inference/tf_models.patch | 68 - .../inference/coco_detection_evaluator.py | 105 + .../ssd-mobilenet/inference/coco_label_map.py | 103 + .../ssd-mobilenet/inference/coco_tools.py | 530 ++ .../inference/detection_inference.patch | 33 - .../ssd-mobilenet/inference/fp32}/__init__.py | 0 .../inference/fp32/infer_detections.py | 269 + .../ssd-mobilenet/inference/int8/coco_int8.sh | 49 - .../inference/int8/infer_detections.py | 282 + .../inference/int8/run_frozen_graph_ssdmob.py | 224 - .../inference/ssdmobilenet_preprocess.pb | Bin 0 -> 9162 bytes .../tensorflow/ssd-resnet34/__init__.py | 1 + .../ssd-resnet34/inference/__init__.py | 1 + .../ssd-resnet34/inference/fp32/__init__.py | 1 + .../inference/fp32/infer_detections.py | 337 +- .../ssd-resnet34/inference/int8/__init__.py | 1 + .../inference/int8/infer_detections.py | 337 +- .../tensorflow_benchmarks_tf2.0.patch | 655 ++ .../inference/tensorflow_models_tf2.0.patch | 22 + .../ssd-resnet34/training/__init__.py | 19 - .../ssd-resnet34/training/fp32/__init__.py | 19 - .../training/fp32/benchmark_v1.13.diff | 26 - .../tensorflow/ssd_vgg16/__init__.py | 19 - .../ssd_vgg16/inference/__init__.py | 19 - .../ssd_vgg16/inference/anchor_manipulator.py | 365 -- .../ssd_vgg16/inference/eval_ssd.py | 319 - .../inference/generate_coco_records.py | 212 - .../ssd_vgg16/inference/validate_ssd_vgg16.py | 113 - models/recommendation/__init__.py | 1 + models/recommendation/tensorflow/__init__.py | 1 + .../recommendation/tensorflow/ncf/__init__.py | 19 - .../tensorflow/ncf/inference/__init__.py | 19 - .../tensorflow/ncf/inference/fp32/__init__.py | 19 - .../tensorflow/ncf/inference/fp32/ncf_main.py | 594 -- .../inference/fp32/wide_deep_inference.py | 19 +- .../tensorflow/wide_deep_large_ds/__init__.py | 1 + .../featurecolumn_graph_optimization.py | 57 +- .../dataset/preprocess_csv_tfrecords.py | 68 +- .../wide_deep_large_ds/inference/__init__.py | 1 + .../wide_deep_large_ds/inference/inference.py | 99 +- .../inference/parallel_inference.py | 235 + .../wide_deep_large_ds/training/train.py | 48 +- .../reinforcement}/__init__.py | 3 +- .../reinforcement/tensorflow}/__init__.py | 3 +- .../tensorflow/minigo/__init__.py | 17 + .../tensorflow/minigo/training/__init__.py | 17 + .../fp32/avoid-repeated-clone-multinode.patch | 50 + .../avoid-repeated-clone-singlenode.patch | 49 + .../fp32/bazel-clean-large-scale.patch | 16 + .../fp32/bazel-clean-single-node.patch | 16 + .../minigo/training/fp32/get-data.patch | 40 + .../training/fp32/large-scale-no-bg.patch | 16 + .../minigo/training/fp32/minigo_mlperf.patch | 2445 ++++++++ .../fp32/minigo_mlperf_large_scale.patch | 5265 +++++++++++++++++ .../minigo/training/fp32/mlperf_split.patch | 510 ++ .../minigo/training/requirements.txt | 18 + 427 files changed, 36254 insertions(+), 33543 deletions(-) mode change 100644 => 100755 benchmarks/common/tensorflow/run_tf_benchmark.py create mode 100644 benchmarks/common/utils/multi_instance.py delete mode 100644 benchmarks/content_creation/tensorflow/draw/README.md delete mode 100644 benchmarks/content_creation/tensorflow/draw/inference/fp32/__init__.py delete mode 100644 benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json delete mode 100644 benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py delete mode 100644 benchmarks/face_detection_and_alignment/__init__.py delete mode 100644 benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md delete mode 100644 benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py delete mode 100644 benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py delete mode 100644 benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py rename benchmarks/{object_detection/tensorflow/faster_rcnn => image_recognition/tensorflow/resnet50v1_5/training}/__init__.py (99%) rename {models/language_translation/tensorflow/gnmt/inference/fp32/scripts => benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50}/__init__.py (100%) rename benchmarks/{face_detection_and_alignment/tensorflow/mtcc/inference/fp32 => image_recognition/tensorflow/resnet50v1_5/training/common_resnet50}/config.json (100%) create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/resnet50_model_init.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/__init__.py rename benchmarks/{object_detection/tensorflow/ssd_vgg16/inference/int8 => image_recognition/tensorflow/resnet50v1_5/training/fp32}/model_init.py (69%) delete mode 100644 benchmarks/image_segmentation/__init__.py delete mode 100644 benchmarks/image_segmentation/tensorflow/__init__.py delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/README.md delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/__init__.py delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/inference/__init__.py delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/__init__.py delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json delete mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py delete mode 100644 benchmarks/language_modeling/__init__.py delete mode 100644 benchmarks/language_modeling/tensorflow/__init__.py delete mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/README.md delete mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/__init__.py delete mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py delete mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py delete mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/README.md delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/__init__.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/inference/__init__.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/inference/fp32/__init__.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/training/__init__.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/training/fp32/__init__.py delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/training/fp32/config.json delete mode 100644 benchmarks/language_translation/tensorflow/gnmt/training/fp32/model_init.py create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/__init__.py create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/__init__.py create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/__init__.py create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/config.json create mode 100644 benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/model_init.py create mode 100644 benchmarks/language_translation/tensorflow/transformer_lt_official/README.md rename benchmarks/{content_creation => language_translation/tensorflow/transformer_lt_official}/__init__.py (100%) rename benchmarks/{content_creation/tensorflow => language_translation/tensorflow/transformer_lt_official/inference}/__init__.py (100%) rename benchmarks/{content_creation/tensorflow/draw => language_translation/tensorflow/transformer_lt_official/inference/fp32}/__init__.py (100%) rename benchmarks/{language_modeling/tensorflow/lm-1b => language_translation/tensorflow/transformer_lt_official}/inference/fp32/config.json (100%) create mode 100644 benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py create mode 100644 benchmarks/language_translation/tensorflow/transformer_lt_official/requirements.txt mode change 100644 => 100755 benchmarks/launch_benchmark.py delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/README.md delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py delete mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/requirements.txt delete mode 100644 benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh delete mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/training/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/config.json delete mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/model_init.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/README.md delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py delete mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py delete mode 100644 benchmarks/recommendation/tensorflow/ncf/README.md delete mode 100644 benchmarks/recommendation/tensorflow/ncf/__init__.py delete mode 100644 benchmarks/recommendation/tensorflow/ncf/inference/__init__.py delete mode 100644 benchmarks/recommendation/tensorflow/ncf/inference/fp32/__init__.py delete mode 100644 benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json delete mode 100644 benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py rename benchmarks/{object_detection/tensorflow/faster_rcnn/inference/int8 => reinforcement}/__init__.py (93%) create mode 100644 benchmarks/reinforcement/tensorflow/__init__.py create mode 100644 benchmarks/reinforcement/tensorflow/minigo/README.md rename benchmarks/{object_detection/tensorflow/faster_rcnn/inference => reinforcement/tensorflow/minigo}/__init__.py (99%) create mode 100644 benchmarks/reinforcement/tensorflow/minigo/requirements.txt create mode 100644 benchmarks/reinforcement/tensorflow/minigo/training/__init__.py rename {models/object_detection/tensorflow/faster_rcnn/inference/int8 => benchmarks/reinforcement/tensorflow/minigo/training/fp32}/__init__.py (93%) create mode 100644 benchmarks/reinforcement/tensorflow/minigo/training/fp32/model_init.py delete mode 100644 docs/image_recognition/quantization/resnet50_min_max_log.txt create mode 100644 docs/language_translation/tensorflow/Tutorial.md delete mode 100755 docs/object_detection/tensorflow/Tutorial.md delete mode 100644 docs/recommendation/quantization/Tutorial.md delete mode 100644 models/content_creation/tensorflow/draw/inference/fp32/LICENSE delete mode 100644 models/content_creation/tensorflow/draw/inference/fp32/draw_inf.py delete mode 100644 models/face_detection_and_alignment/__init__.py delete mode 100644 models/face_detection_and_alignment/tensorflow/__init__.py delete mode 100644 models/face_detection_and_alignment/tensorflow/mtcc/__init__.py delete mode 100644 models/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py delete mode 100755 models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/MtcnnDetector.py delete mode 100644 models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py delete mode 100755 models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/detector.py delete mode 100755 models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/fcn_detector.py delete mode 100644 models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/one_image_test.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/benchmark.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/calibration.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_gnmt_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_maskrcnn_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ncf_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_resnet_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ssd_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_transformer_tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/mlperf_log.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/resnet_log_helper.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tags.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/test_tag_set.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tf_mlperf_log.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_main.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_preprocessing.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_model.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_run_loop.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/parsers.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/export.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/benchmark_uploader.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks_helper.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/logger.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/metric_hook.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/model_helpers.py delete mode 100644 models/image_segmentation/__init__.py delete mode 100644 models/image_segmentation/tensorflow/__init__.py delete mode 100644 models/image_segmentation/tensorflow/maskrcnn/__init__.py delete mode 100644 models/image_segmentation/tensorflow/maskrcnn/coco.py delete mode 100644 models/image_segmentation/tensorflow/maskrcnn/config.py delete mode 100644 models/image_segmentation/tensorflow/maskrcnn/model.py delete mode 100644 models/image_segmentation/tensorflow/maskrcnn/utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/__init__.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/__init__.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/.gitignore delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/__init__.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/attention_model.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/gnmt_model.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/inference.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/model.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/model_helper.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/nmt.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/scripts/bleu.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/scripts/download_iwslt15.sh delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/scripts/rouge.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/scripts/wmt16_en_de.sh delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15_internal.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer_internal.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_internal.json delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/train.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/__init__.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/evaluation_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/iterator_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/misc_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/nmt_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/standard_hparams_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/inference/fp32/utils/vocab_utils.py delete mode 100644 models/language_translation/tensorflow/gnmt/training/__init__.py delete mode 100644 models/language_translation/tensorflow/gnmt/training/fp32/multi_instances.patch delete mode 100644 models/language_translation/tensorflow/gnmt/training/requirements.txt create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/__init__.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/__init__.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/bleu.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/evaluation_utils.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/hparam.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/misc_utils.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/nmt_utils.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/rouge.py create mode 100644 models/language_translation/tensorflow/mlperf_gnmt/fp32/run_inference.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/compute_bleu.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/__init__.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/README.md create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/__init__.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_base.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_benchmark.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_conventions.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_device.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_distribution.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_misc.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_performance.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/core.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/flags_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/guidelines.md create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/hyperparams_flags.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/__init__.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/guidelines.md create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook_test.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/mlperf_helper.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/__init__.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/metrics.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer.py create mode 100644 models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer_test.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/__init__.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/__init__.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/coco_accuracy.sh delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/dataset_util.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval_util.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/evaluator.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/fp32/run_frozen_graph_rcnn.py delete mode 100755 models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/int8/run_frozen_graph_rcnn.py delete mode 100644 models/object_detection/tensorflow/faster_rcnn/inference/tf_models.patch mode change 100755 => 100644 models/object_detection/tensorflow/rfcn/inference/fp32/run_rfcn_inference.py mode change 100755 => 100644 models/object_detection/tensorflow/rfcn/inference/int8/run_rfcn_inference.py create mode 100644 models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch delete mode 100644 models/object_detection/tensorflow/rfcn/inference/tf_models.patch create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/coco_detection_evaluator.py create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/coco_label_map.py create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/coco_tools.py delete mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/detection_inference.patch rename {benchmarks/content_creation/tensorflow/draw/inference => models/object_detection/tensorflow/ssd-mobilenet/inference/fp32}/__init__.py (100%) create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py delete mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/int8/coco_int8.sh create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/int8/infer_detections.py delete mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py create mode 100644 models/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_preprocess.pb create mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_benchmarks_tf2.0.patch create mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_models_tf2.0.patch delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/training/__init__.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/training/fp32/benchmark_v1.13.diff delete mode 100644 models/object_detection/tensorflow/ssd_vgg16/__init__.py delete mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py delete mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py delete mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py delete mode 100755 models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py delete mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py delete mode 100644 models/recommendation/tensorflow/ncf/__init__.py delete mode 100644 models/recommendation/tensorflow/ncf/inference/__init__.py delete mode 100644 models/recommendation/tensorflow/ncf/inference/fp32/__init__.py delete mode 100644 models/recommendation/tensorflow/ncf/inference/fp32/ncf_main.py create mode 100755 models/recommendation/tensorflow/wide_deep_large_ds/inference/parallel_inference.py rename {benchmarks/face_detection_and_alignment/tensorflow => models/reinforcement}/__init__.py (93%) rename {benchmarks/face_detection_and_alignment/tensorflow/mtcc => models/reinforcement/tensorflow}/__init__.py (93%) create mode 100644 models/reinforcement/tensorflow/minigo/__init__.py create mode 100644 models/reinforcement/tensorflow/minigo/training/__init__.py create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-multinode.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-singlenode.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-large-scale.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-single-node.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/get-data.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/large-scale-no-bg.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf_large_scale.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/fp32/mlperf_split.patch create mode 100644 models/reinforcement/tensorflow/minigo/training/requirements.txt diff --git a/benchmarks/README.md b/benchmarks/README.md index 132205a65..e23914311 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -6,7 +6,7 @@ Training and inference scripts with Intel-optimized MKL The model scripts can be run on Linux and require the following dependencies to be installed: -* [Docker](https://docs.docker.com/install/) also support bare metal run +* [Docker](https://docs.docker.com/install/) * [Python](https://www.python.org/downloads/) 3.5 or later * [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) * `wget` for downloading pre-trained models @@ -15,29 +15,21 @@ dependencies to be installed: | Use Case | Framework | Model | Mode | Instructions | | -----------------------| --------------| ------------------- | --------- |------------------------------| -| Content Creation | TensorFlow | [DRAW](https://arxiv.org/pdf/1502.04623.pdf) | Inference | [FP32](content_creation/tensorflow/draw/README.md#fp32-inference-instructions) | -| Face Detection and Alignment | TensorFlow | [MTCC](https://arxiv.org/pdf/1604.02878.pdf) | Inference | [FP32](face_detection_and_alignment/tensorflow/mtcc/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [DenseNet169](https://arxiv.org/pdf/1608.06993.pdf) | Inference | [FP32](image_recognition/tensorflow/densenet169/README.md#fp32-inference-instructions) | -| Image Recognition | TensorFlow | [Inception ResNet V2](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inception_resnet_v2/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inception_resnet_v2/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V3](https://arxiv.org/pdf/1512.00567.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv3/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv3/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V4](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv4/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv4/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [MobileNet V1*](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](image_recognition/tensorflow/mobilenet_v1/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/mobilenet_v1/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 101](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet101/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet101/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 50v1.5*](https://github.com/tensorflow/models/tree/master/official/resnet) | Inference | [Int8](image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) | -| Image Segmentation | TensorFlow | [Mask R-CNN](https://arxiv.org/pdf/1703.06870.pdf) | Inference | [FP32](image_segmentation/tensorflow/maskrcnn/README.md#fp32-inference-instructions) | -| Language Modeling | TensorFlow | [LM-1B](https://arxiv.org/pdf/1602.02410.pdf) | Inference | [FP32](language_modeling/tensorflow/lm-1b/README.md#fp32-inference-instructions) | -| Language Translation | TensorFlow | [GNMT](https://arxiv.org/pdf/1609.08144.pdf) | Inference | [FP32](language_translation/tensorflow/gnmt/README.md#fp32-inference-instructions) | -| Language Translation | TensorFlow | [GNMT](https://arxiv.org/pdf/1609.08144.pdf) | Training | [FP32](language_translation/tensorflow/gnmt/README.md#fp32-training-instructions) | +| Image Recognition | TensorFlow | [ResNet 50v1.5*](https://github.com/tensorflow/models/tree/master/official/resnet) | Training | [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-training-instructions) | +| Reinforcement | TensorFlow | [MiniGo](https://arxiv.org/abs/1712.01815.pdf) | Training | [FP32](reinforcement/tensorflow/minigo/README.md#fp32-training-instructions)| +| Language Translation | TensorFlow | [GNMT*](https://arxiv.org/pdf/1609.08144.pdf) | Inference | [FP32](language_translation/tensorflow/mlperf_gnmt/README.md#fp32-inference-instructions) | +| Language Translation | TensorFlow | [Transformer_LT_Official ](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_lt_official/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet*](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-ResNet34*](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | [SSD-ResNet34*](https://arxiv.org/pdf/1512.02325.pdf) | Training | [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-training-instructions) | -| Object Detection | TensorFlow | [SSD-VGG16](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | -| Recommendation | TensorFlow | [NCF](https://arxiv.org/pdf/1708.05031.pdf) | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | -| Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Training | [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-training-instructions) | | Recommendation | TensorFlow | [Wide & Deep](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) | *Means the model is belong to [MLPerf](https://mlperf.org/) models, will long term support. diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index 9f3f15771..6fc1c47bb 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2018-2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -67,8 +67,8 @@ def _define_args(self): self._common_arg_parser.add_argument( "-p", "--precision", - help="Specify the model precision to use: fp32, int8, or bfloat16", - required=required_arg, choices=["fp32", "int8", "bfloat16"], + help="Specify the model precision to use: fp32, int8", + required=required_arg, choices=["fp32", "int8"], dest="precision") self._common_arg_parser.add_argument( @@ -87,6 +87,16 @@ def _define_args(self): dest="batch_size", default=-1, type=check_positive_number_or_equal_to_negative_one) + self._common_arg_parser.add_argument( + "--mpi_num_processes", type=check_positive_number, + help="The number of MPI processes", + dest="mpi", default=None) + + self._common_arg_parser.add_argument( + "--mpi_num_processes_per_socket", type=check_positive_number, + help="Specify how many MPI processes to launch per socket", + dest="num_mpi", default=1) + self._common_arg_parser.add_argument( "-d", "--data-location", help="Specify the location of the data. If this parameter is not " @@ -107,6 +117,11 @@ def _define_args(self): " specified or is -1, all cores will be used.", dest="num_cores", type=int, default=-1) + self._common_arg_parser.add_argument( + "--num-instances", type=check_positive_number, + help="Specify the number of instances to run.", + dest="num_instances", default=1) + self._common_arg_parser.add_argument( "-a", "--num-intra-threads", type=check_positive_number, help="Specify the number of threads within the layer", @@ -117,23 +132,6 @@ def _define_args(self): help="Specify the number threads between layers", dest="num_inter_threads", default=None) - self._common_arg_parser.add_argument( - "-np", "--num-processes", type=check_positive_number, - help="Specify the number of processes to run on as mpirun '-np' " - "input for multi-instance execution. ", - dest="num_processes", default=1) - - self._common_arg_parser.add_argument( - "-ppn", "--num-processes-per-node", type=check_positive_number, - help="Specify the number of processes per node as mpirun '-ppn' " - "input for multi-instance execution. ", - dest="num_processes_per_node", default=1) - - self._common_arg_parser.add_argument( - "-ts", "--num-train-steps", type=check_positive_number, - help="Specify the number of training steps ", - dest="num_train_steps", default=1) - self._common_arg_parser.add_argument( "--data-num-intra-threads", type=check_positive_number, help="The number intra op threads for the data layer config", @@ -158,6 +156,7 @@ def _define_args(self): "-g", "--in-graph", help="Full path to the input graph ", dest="input_graph", default=None, type=check_valid_filename) + self._common_arg_parser.add_argument( "-k", "--benchmark-only", help="For benchmark measurement only. If neither --benchmark-only " @@ -221,11 +220,11 @@ def _define_args(self): def _validate_args(self): """validate the args and initializes platform_util""" # check if socket id is in socket number range - num_numas = self._platform_util.num_numa_nodes + num_sockets = self._platform_util.num_cpu_sockets args = self.args - if not -1 <= args.socket_id < num_numas: - raise ValueError("Socket id must be within NUMA number range: " - "[0, {}].".format(num_numas - 1)) + if not -1 <= args.socket_id < num_sockets: + raise ValueError("Socket id must be within socket number range: " + "[0, {}].".format(num_sockets - 1)) # check number of cores num_logical_cores_per_socket = \ diff --git a/benchmarks/common/base_model_init.py b/benchmarks/common/base_model_init.py index 00b2f3a53..446a88311 100644 --- a/benchmarks/common/base_model_init.py +++ b/benchmarks/common/base_model_init.py @@ -57,6 +57,18 @@ def __init__(self, args, custom_args=[], platform_util=None): if not platform_util: raise ValueError("Did not find any platform info.") + # Invoke mpirun if mpi_num_processes env is not None + if os.environ["MPI_NUM_PROCESSES"] != "None": + if os.environ["MPI_NUM_PROCESSES_PER_SOCKET"] == "1": + # Map by socket using OpenMPI by default (PPS=1). + self.python_exe = "mpirun --allow-run-as-root -n " + os.environ["MPI_NUM_PROCESSES"] + " --map-by socket " + self.python_exe + else: + # number of processes per socket (pps) + pps = int(os.environ["MPI_NUM_PROCESSES_PER_SOCKET"]) + split_a_socket = str(platform_util.num_cores_per_socket // pps) + # Launch pps MPI processes over one socket + self.python_exe = "mpirun --allow-run-as-root -n " + os.environ["MPI_NUM_PROCESSES"] + " --map-by ppr:" + str(pps) + ":socket:pe=" + split_a_socket + " --cpus-per-proc " + split_a_socket + " " + self.python_exe + def run_command(self, cmd): """ Prints debug messages when verbose is enabled, and then runs the @@ -75,6 +87,8 @@ def get_command_prefix(self, socket_id, numactl=True): Returns the command prefix with: - LD_PRELOAD for int8 models (if tcmalloc is not disabled) - The numactl command with --cpunodebind and --membind set to the specified socket_id (if numactl=True) + + Should be used only for single instance. """ command = "" @@ -97,36 +111,6 @@ def get_command_prefix(self, socket_id, numactl=True): return command - def get_multi_instance_train_prefix(self, option_list=None): - """ - Returns the multi-instance train command prefix with: - - Define the number of processes. - - Define the processes per each node. - - Set the other parameters with the format of parameter list "option:value", such as environment variable - "-genv:I_MPI_ASYNC_PROGRESS=1". - """ - command = "mpirun " - if self.args.num_processes: - if self.args.num_processes > 0: - command += "-n {} ".format(self.args.num_processes) - else: - print("Warning: {} is not a valid value.".format(self.args.num_processes)) - - if self.args.num_processes_per_node: - if self.args.num_processes_per_node > 0: - command += "-ppn {} ".format(self.args.num_processes_per_node) - else: - print("Warning: {} is not a valid value.".format(self.args.num_processes_per_node)) - - if option_list: - for item in option_list: - if item.count(':') != 1: - print("Warning: {} does not follow the option_list definition.".format(item)) - else: - option, value = item.split(':') - command += "{} {} ".format(option, value) - return command - def add_args_to_command(self, command, arg_list): """ Add args that are specified in the arg list to the command. batch_size diff --git a/benchmarks/common/platform_util.py b/benchmarks/common/platform_util.py index e1019b8df..c8bdae245 100644 --- a/benchmarks/common/platform_util.py +++ b/benchmarks/common/platform_util.py @@ -23,9 +23,9 @@ from __future__ import print_function import os +import re import platform as system_platform import subprocess -import shlex import sys NUMA_NODES_STR_ = "NUMA node(s)" @@ -35,6 +35,145 @@ LOGICAL_CPUS_STR_ = "CPU(s)" +class CPUInfo(): + """CPU information class.""" + + def __init__(self): + """Initialize CPU information class.""" + self._binding_data = CPUInfo._sort_membind_info(self._get_core_membind_info()) + + @staticmethod + def _get_core_membind_info(): + """ + Return sorted information about cores and memory binding. + E.g. + CPU ID, Socket ID, Node ID, HT CPU ID, + 0 , 0 , 0 , 0 + 1 , 0 , 0 , 1 + :return: list with cpu, sockets, ht core and memory binding information + :rtype: List[List[str, Any]] + """ + args = ["lscpu", "--parse=CPU,Core,Socket,Node"] + process_lscpu = subprocess.check_output(args, universal_newlines=True).split("\n") + + # Get information about core, node, socket and cpu + bind_info = [] + for line in process_lscpu: + pattern = r"^([\d]+,[\d]+,[\d]+,[\d]+)" + regex_out = re.search(pattern, line) + if regex_out: + bind_info.append(regex_out.group(1).strip().split(",")) + + return bind_info + + @staticmethod + def _sort_membind_info(membind_bind_info): + """ + Sore membind info data. + :param membind_bind_info: raw membind info data + :type membind_bind_info: List[List[str]] + :return: sorted membind info + :rtype: List[List[Dict[str, int]]] + """ + membind_cpu_list = [] + nodes_count = int(max(element[2] for element in membind_bind_info)) + 1 + # Sort list by Node id + for node_number in range(nodes_count): + node_core_list = [] + core_info = {} + for entry in membind_bind_info: + cpu_id = int(entry[0]) + core_id = int(entry[1]) + node_id = int(entry[2]) + socket_id = int(entry[3]) + + # Skip nodes other than current node number + if node_number != node_id: + continue + + # Add core info + if cpu_id == core_id: + core_info.update({ + core_id: { + "cpu_id": cpu_id, + "node_id": node_id, + "socket_id": socket_id, + }, + }) + else: + # Add information about Hyper Threading + core_info[core_id]["ht_cpu_id"] = cpu_id + + # Change dict of dicts to list of dicts + for iterator in range(len(core_info)): + curr_core_id = len(core_info) * node_number + iterator + single_core_info = core_info.get(curr_core_id) + if single_core_info: + node_core_list.append(single_core_info) + + membind_cpu_list.append(node_core_list) + + return membind_cpu_list + + @property + def sockets(self): + """ + Return count of sockets available on server. + :return: available cores + :rtype: int + """ + available_sockets = len(self._binding_data) + return int(available_sockets) + + @property + def cores(self): + """ + Return amount of cores available on server. + :return: amount of cores + :rtype: int + """ + available_cores = self.cores_per_socket * self.sockets + return int(available_cores) # type: ignore + + @property + def cores_per_socket(self): + """ + Return amount of available cores per socket. + :return: amount of cores + :rtype: int + """ + available_cores_per_socket = len(self._binding_data[0]) + return available_cores_per_socket + + @property + def binding_information(self): + """ + Return information about cores and memory binding. + Format: + [ + [ # socket 0 + { # Core 0 + "cpu_id": 0, + "node_id": 0, + "socket_id": 0, + "ht_cpu_id": 56 + } + ], + [ # socket 1 + { # Core 0 + "cpu_id": 28, + "node_id": 1, + "socket_id": 1, + "ht_cpu_id": 84 + } + ] + ] + :return: dict with cpu, sockets, ht core and memory binding information + :rtype: List[List[Dict[str, int]]] + """ + return self._binding_data + + class PlatformUtil: ''' This module implements a platform utility that exposes functions that diff --git a/benchmarks/common/tensorflow/run_tf_benchmark.py b/benchmarks/common/tensorflow/run_tf_benchmark.py old mode 100644 new mode 100755 diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index b180665eb..5abcaf181 100644 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -49,33 +49,41 @@ echo " DISABLE_TCMALLOC: ${DISABLE_TCMALLOC}" echo " TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD: ${TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD}" echo " NOINSTALL: ${NOINSTALL}" echo " OUTPUT_DIR: ${OUTPUT_DIR}" +echo " MPI_NUM_PROCESSES: ${MPI_NUM_PROCESSES}" +echo " MPI_NUM_PEOCESSES_PER_SOCKET: ${MPI_NUM_PROCESSES_PER_SOCKET}" -# Only inference and training are supported right now +# Only inference is supported right now if [ ${MODE} != "inference" ] && [ ${MODE} != "training" ]; then echo "${MODE} mode is not supported" exit 1 fi -if [[ ${NOINSTALL} != "True" && ${DOCKER} == "True" ]]; then +if [[ ${NOINSTALL} != "True" ]]; then ## install common dependencies apt update apt full-upgrade -y # Set env var before installs so that user interaction is not required export DEBIAN_FRONTEND=noninteractive - apt-get install python-tk numactl -y + apt-get install gcc-8 g++-8 cmake python-tk numactl -y + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 apt install -y libsm6 libxext6 pip install --upgrade pip pip install requests - # install libgoogle-perftools-dev for tcmalloc + # install google-perftools for tcmalloc if [[ ${DISABLE_TCMALLOC} != "True" ]]; then - apt-get install --no-install-recommends --fix-missing google-perftools -y - if [ ! -f /usr/lib/libtcmalloc.so ]; then - apt-get install --no-install-recommends --fix-missing libgoogle-perftools-dev -y - if [ ! -f /usr/lib/libtcmalloc.so ]; then - ln -sf /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so - fi - fi + apt-get install google-perftools -y + fi + + if [[ ${MPI_NUM_PROCESSES} != "None" ]]; then + ## Installing OpenMPI + apt-get install openmpi-bin openmpi-common openssh-client openssh-server libopenmpi-dev -y + # Horovod Installation + export HOROVOD_WITHOUT_PYTORCH=1 + export HOROVOD_WITHOUT_MXNET=1 + # TODO: lock a horovod commit + pip install --no-cache-dir horovod fi fi @@ -148,14 +156,12 @@ CMD="${PYTHON_EXE} ${RUN_SCRIPT_PATH} \ --batch-size=${BATCH_SIZE} \ --socket-id=${SOCKET_ID} \ --output-dir=${OUTPUT_DIR} \ ---num-processes=${NUM_PROCESSES} \ ---num-processes-per-node=${NUM_PROCESSES_PER_NODE} \ ---num-train-steps=${NUM_TRAIN_STEPS} \ ${accuracy_only_arg} \ ${benchmark_only_arg} \ ${output_results_arg} \ ${verbose_arg}" + if [ ${MOUNT_EXTERNAL_MODELS_SOURCE} != "None" ]; then CMD="${CMD} --model-source-dir=${MOUNT_EXTERNAL_MODELS_SOURCE}" fi @@ -199,7 +205,7 @@ function install_protoc() { if [ ! -f "bin/protoc" ]; then install_location=$1 echo "protoc not found, installing protoc from ${install_location}" - apt-get -y install wget unzip + apt-get -y install wget wget -O protobuf.zip ${install_location} unzip -o protobuf.zip rm protobuf.zip @@ -251,6 +257,8 @@ function add_arg() { function add_steps_args() { # returns string with --steps and --warmup_steps, if there are values specified local steps_arg="" + local trainepochs_arg="" + local epochsbtweval_arg="" local warmup_steps_arg="" local kmp_blocktime_arg="" @@ -258,6 +266,14 @@ function add_steps_args() { steps_arg="--steps=${steps}" fi + if [ -n "${train_epochs}" ]; then + trainepochs_arg="--train_epochs=${train_epochs}" + fi + + if [ -n "${epochs_between_evals}" ]; then + epochsbtweval_arg="--epochs_between_evals=${epochs_between_evals}" + fi + if [ -n "${warmup_steps}" ]; then warmup_steps_arg="--warmup-steps=${warmup_steps}" fi @@ -266,7 +282,7 @@ function add_steps_args() { kmp_blocktime_arg="--kmp-blocktime=${kmp_blocktime}" fi - echo "${steps_arg} ${warmup_steps_arg} ${kmp_blocktime_arg}" + echo "${steps_arg} ${trainepochs_arg} ${epochsbtweval_arg} ${warmup_steps_arg} ${kmp_blocktime_arg}" } function add_calibration_arg() { @@ -300,106 +316,6 @@ function densenet169() { fi } -# DRAW model -function draw() { - if [ ${PRECISION} == "fp32" ]; then - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi -} - -# Faster R-CNN (ResNet50) model -function faster_rcnn() { - export PYTHONPATH=$PYTHONPATH:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/slim - original_dir=$(pwd) - - # install dependencies - pip install -r "${MOUNT_BENCHMARK}/object_detection/tensorflow/faster_rcnn/requirements.txt" - - cd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research" - # install protoc v3.3.0, if necessary, then compile protoc files - install_protoc "https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" - - # apply patch for tensorflow models - cd ${MOUNT_EXTERNAL_MODELS_SOURCE} - git apply ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/tf_models.patch - - if [ ${PRECISION} == "fp32" ]; then - if [ -n "${steps}" ] && [ ${BENCHMARK_ONLY} == "True" ]; then - CMD="${CMD} --steps=${steps}" - fi - elif [ ${PRECISION} == "int8" ]; then - if [ -n "${steps}" ] && [ ${BENCHMARK_ONLY} == "True" ]; then - CMD="${CMD} --steps=${steps}" - fi - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi - cd $original_dir - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model -} - -# GNMT model -function gnmt() { - export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK}:${MOUNT_EXTERNAL_MODELS_SOURCE} - if [ ${MODE} == "training" ]; then - if [ ${PRECISION} == "fp32" ]; then - # build the model source - original_dir=$(pwd) - model_source_dir="${INTELAI_MODELS}/${MODE}/${PRECISION}" - - if [ ${NOINSTALL} != "True" ]; then - model_source_dir="${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/${PRECISION}" - # install dependencies - apt-get update - apt-get install cpio - # Enter the docker mount directory /l_mpi and install the intel mpi with silent mode - cd /l_mpi - sh install.sh --silent silent.cfg - source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 - pip install -r "${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/requirements.txt" - fi - # Prepare the model source - cd ${model_source_dir} - export PYTHONPATH=${PYTHONPATH}:${model_source_dir}/nmt/nmt - rm nmt -rf - git clone https://github.com/tensorflow/nmt.git - cd nmt - git checkout b278487980832417ad8ac701c672b5c3dc7fa553 - git apply ../multi_instances.patch - cd $original_dir - CMD="${CMD} $(add_arg "--src" ${src}) $(add_arg "--tgt" ${tgt}) \ - $(add_arg "--vocab_prefix" ${vocab_prefix}) \ - $(add_arg "--train_prefix" ${train_prefix}) \ - $(add_arg "--dev_prefix" ${dev_prefix}) $(add_arg "--test_prefix" ${test_prefix}) \ - $(add_arg "--num_units" ${num_units}) \ - $(add_arg "--dropout" ${dropout}) \ - $(add_arg "--hparams_path" ${hparams_path})" - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} not supported for ${MODEL_NAME}" - exit 1 - fi - fi - - if [ ${MODE} == "inference" ]; then - if [ ${PRECISION} == "fp32" ]; then - export PYTHONPATH=${PYTHONPATH}:${MOUNT_INTELAI_MODELS_SOURCE}/${MODE} - CMD="${CMD} $(add_arg "--src" ${src}) $(add_arg "--tgt" ${tgt}) $(add_arg "--hparams_path" ${hparams_path}) \ - $(add_arg "--vocab_prefix" ${vocab_prefix}) $(add_arg "--inference_input_file" ${inference_input_file}) \ - $(add_arg "--inference_output_file" ${inference_output_file}) $(add_arg "--inference_ref_file" ${inference_ref_file}) \ - $(add_arg "--infer_mode" ${infer_mode})" - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} not supported for ${MODEL_NAME}" - exit 1 - fi - fi -} - # inceptionv4 model function inceptionv4() { # For accuracy, dataset location is required @@ -422,50 +338,118 @@ function inceptionv4() { fi } -# inception_resnet_v2 model -function inception_resnet_v2() { - # For accuracy, dataset location is required, see README for more information. - if [ "${DATASET_LOCATION_VOL}" == None ] && [ ${ACCURACY_ONLY} == "True" ]; then - echo "No Data directory specified, accuracy will not be calculated." - exit 1 - fi - - if [ ${PRECISION} == "int8" ] || [ ${PRECISION} == "fp32" ]; then - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi -} - -# language modeling lm-1b -function lm-1b() { - if [ ${PRECISION} == "fp32" ]; then - CMD="${CMD} $(add_steps_args)" - - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi -} +# MiniGo model +function minigo() { + if [ ${MODE} == "training" ] && [ ${PRECISION} == "fp32" ]; then + original_dir=$(pwd) + local MODEL_DIR=${EXTERNAL_MODELS_SOURCE_DIRECTORY} + local INTELAI_MODEL_DIR=${INTELAI_MODELS} + local BENCHMARK_DIR=${BENCHMARK_SCRIPTS} + + if [ ${DOCKER} == "True" ]; then + MODEL_DIR=${MOUNT_EXTERNAL_MODELS_SOURCE} + INTELAI_MODEL_DIR=${MOUNT_INTELAI_MODELS_SOURCE} + BENCHMARK_DIR=${MOUNT_BENCHMARK} + # install dependencies + apt-get update && apt-get install -y cpio + # pip3 install -r ${MODEL_DIR}/requirements.txt + pip install -r ${BENCHMARK_DIR}/reinforcement/tensorflow/minigo/requirements.txt + if [ ! -f "bazel-0.22.0-installer-linux-x86_64.sh" ];then + wget https://github.com/bazelbuild/bazel/releases/download/0.22.0/bazel-0.22.0-installer-linux-x86_64.sh + chmod 755 bazel-0.22.0-installer-linux-x86_64.sh + fi + ./bazel-0.22.0-installer-linux-x86_64.sh --prefix=/tmp/bazel + rm /root/.bazelrc + export PATH=/tmp/bazel/bin:$PATH + cd /l_mpi + sh install.sh --silent silent.cfg + source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 + pip install mpi4py + fi -# Mask R-CNN model -function maskrcnn() { - if [ ${PRECISION} == "fp32" ]; then - original_dir=$(pwd) + pip install -r ${BENCHMARK_DIR}/reinforcement/tensorflow/minigo/requirements.txt + if [ "${EXTERNAL_MODELS_SOURCE_DIRECTORY}" == "None" ]; then + echo "You are supposed to provide model dir." + exit 1 + fi + + # MODEL_DIR is the official mlperf minigo repo + cd ${MODEL_DIR} + git checkout 60ecb12f29582227a473fdc7cd09c2605f42bcd6 + + # delete the previous patch influence + git reset --hard + git clean -fd + rm -rf ./ml_perf/flags/9.mn/ + + # remove the quantization tools downloaded before + rm -rf ${MODEL_DIR}/ml_perf/tools/ + rm -rf ${MODEL_DIR}/cc/ml_perf/tools/ + + if [ "${large_scale}" == "True" ]; then + # multi-node mode + git apply ${INTELAI_MODEL_DIR}/training/fp32/minigo_mlperf_large_scale.patch + git apply ${INTELAI_MODEL_DIR}/training/fp32/avoid-repeated-clone-multinode.patch + git apply ${INTELAI_MODEL_DIR}/training/fp32/bazel-clean-large-scale.patch + # git apply ${INTELAI_MODEL_DIR}/training/fp32/large-scale-no-bg.patch + else + # single-node mode + git apply ${INTELAI_MODEL_DIR}/training/fp32/minigo_mlperf.patch + git apply ${INTELAI_MODEL_DIR}/training/fp32/mlperf_split.patch + git apply ${INTELAI_MODEL_DIR}/training/fp32/avoid-repeated-clone-singlenode.patch + git apply ${INTELAI_MODEL_DIR}/training/fp32/bazel-clean-single-node.patch + fi - if [ ${NOINSTALL} != "True" ]; then - # install dependencies - pip install matplotlib==3.0.3 pycocotools cython scikit-image==0.15.0 keras scipy==1.2.1 numpy==1.17.4 - python_path=`which python` - pycocotools_path=`dirname $python_path`/../lib/python*/*/pycocotools - sed -i "s;unicode;str;g" $pycocotools_path/coco.py - fi - cd ${original_dir} - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + # generate the flags with specified iterations + if [ -z "$steps" ];then + steps=30 + fi + mv ml_perf/flags/9/rl_loop.flags ml_perf/flags/9/rl_loop.flags-org + sed "s/iterations=30/iterations=${steps}/g" ml_perf/flags/9/rl_loop.flags-org &> ml_perf/flags/9/rl_loop.flags + mv ml_perf/flags/9/train.flags ml_perf/flags/9/train.flags-org + sed "s/train_batch_size=8192/train_batch_size=4096/g" ml_perf/flags/9/train.flags-org &> ml_perf/flags/9/train.flags + + # MiniGo need specified tensorflow version and to build selfplay part with tensorflow c lib. + rm -rf cc/minigo_tf/tensorflow-*.data + rm -rf cc/minigo_tf/tensorflow-*.dist-info + chmod +777 ./cc/configure_tensorflow.sh + chmod +777 ./build.sh + ./cc/configure_tensorflow.sh + pip uninstall -y ./cc/tensorflow_pkg/tensorflow-*.whl + pip uninstall -y tensorflow + pip uninstall -y intel-tensorflow + pip install ./cc/tensorflow_pkg/tensorflow-*.whl + ./build.sh + + # ensure horovod installed + pip install horovod==0.15.1 + + + # set the python path for quantization tools + export PYTHONPATH=${PYTHONPATH}:${MODEL_DIR}/cc/ml_perf/tools/api/intel_quantization:${MODEL_DIR}/ml_perf/tools/api/intel_quantization + + # freeze the tfrecord and target to the checkpoint for training + git apply ${INTELAI_MODEL_DIR}/training/fp32/get-data.patch + BOARD_SIZE=9 python ml_perf/get_data.py + + # $HOSTLIST.txt contains all the ip address + + if [ ! $multi_node ];then + unset -v HOSTLIST + else + export HOSTLIST=${BENCHMARK_DIR}/node_list + fi + + cd ${original_dir} + CMD="${CMD} \ + $(add_arg "--large-scale" ${large_scale}) \ + $(add_arg "--num-train-nodes" ${num_train_nodes}) \ + $(add_arg "--num-eval-nodes" ${num_eval_nodes}) \ + $(add_arg "--quantization" ${quantization}) \ + $(add_arg "--multi-node" ${multi_node})" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + echo "MODE=${MODE} PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" exit 1 fi } @@ -473,32 +457,16 @@ function maskrcnn() { # mobilenet_v1 model function mobilenet_v1() { if [ ${PRECISION} == "fp32" ]; then - export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/slim + CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \ + $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) \ + $(add_arg "--input_layer" ${input_layer}) $(add_arg "--output_layer" ${output_layer})" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model elif [ ${PRECISION} == "int8" ]; then - CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \ - $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) $(add_arg "--input_layer" ${input_layer}) \ - $(add_arg "--output_layer" ${output_layer})" - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi -} - -# MTCC model -function mtcc() { - if [ ${PRECISION} == "fp32" ]; then - if [ ! -d "${DATASET_LOCATION}" ]; then - echo "No Data location specified, please follow MTCC README instaructions to download the dataset." - exit 1 - fi - if [ ${NOINSTALL} != "True" ]; then - # install dependencies - pip install opencv-python - pip install easydict - fi - export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}:${MOUNT_EXTERNAL_MODELS_SOURCE}/Detection:${MOUNT_INTELAI_MODELS_SOURCE}/inference/fp32:${MOUNT_INTELAI_MODELS_SOURCE}/inference/fp32/Detection + CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \ + $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) \ + $(add_arg "--input_layer" ${input_layer}) $(add_arg "--output_layer" ${output_layer}) \ + $(add_calibration_arg)" PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else @@ -507,29 +475,30 @@ function mtcc() { fi } -# NCF model -function ncf() { - if [ ${PRECISION} == "fp32" ]; then - # For nfc, if dataset location is empty, script downloads dataset at given location. - if [ ! -d "${DATASET_LOCATION}" ]; then - mkdir -p /dataset - fi - - export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE} +# ResNet101, InceptionV3 model +function resnet101_inceptionv3() { + export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK} - if [ ${NOINSTALL} != "True" ]; then - pip install -r ${MOUNT_EXTERNAL_MODELS_SOURCE}/official/requirements.txt + # For accuracy, dataset location is required. + if [ "${DATASET_LOCATION_VOL}" == "None" ] && [ ${ACCURACY_ONLY} == "True" ]; then + echo "No Data directory specified, accuracy will not be calculated." + exit 1 fi - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" - exit 1 - fi + if [ ${PRECISION} == "int8" ]; then + CMD="${CMD} $(add_steps_args) $(add_calibration_arg)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + elif [ ${PRECISION} == "fp32" ]; then + CMD="${CMD} $(add_steps_args)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi } -# ResNet50, ResNet101, InceptionV3 model -function resnet50_101_inceptionv3() { +# ResNet50 model +function resnet50() { export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK} # For accuracy, dataset location is required. @@ -550,44 +519,59 @@ function resnet50_101_inceptionv3() { fi } +# MLPerf GNMT model +function mlperf_gnmt() { + export PYTHONPATH=${PYTHONPATH}:$(pwd):${MOUNT_BENCHMARK} + + # For accuracy, dataset location is required. + if [ "${DATASET_LOCATION_VOL}" == "None" ] && [ ${ACCURACY_ONLY} == "True" ]; then + echo "No Data directory specified, accuracy will not be calculated." + exit 1 + fi + + if [ ${PRECISION} == "int8" ]; then + CMD="${CMD} $(add_steps_args) $(add_calibration_arg)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + elif [ ${PRECISION} == "fp32" ]; then + CMD="${CMD} $(add_steps_args)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi +} # R-FCN (ResNet101) model function rfcn() { - export PYTHONPATH=$PYTHONPATH:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE} + export PYTHONPATH=$PYTHONPATH:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/slim:${MOUNT_EXTERNAL_MODELS_SOURCE} - # install dependencies - pip install -r "${MOUNT_BENCHMARK}/object_detection/tensorflow/rfcn/requirements.txt" - - original_dir=$(pwd) + if [ ${NOINSTALL} != "True" ]; then + # install dependencies + for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/rfcn/requirements.txt) + do + pip install $line + done + original_dir=$(pwd) - cd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research" - # install protoc v3.3.0, if necessary, then compile protoc files - install_protoc "https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" + cd ${MOUNT_EXTERNAL_MODELS_SOURCE} + git apply --ignore-space-change --ignore-whitespace ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/tf-2.0.patch - # apply patch for tensorflow models - cd ${MOUNT_EXTERNAL_MODELS_SOURCE} - git apply ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/tf_models.patch + cd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research" + # install protoc v3.3.0, if necessary, then compile protoc files + install_protoc "https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" + fi + split_arg="" if [ -n "${split}" ] && [ ${ACCURACY_ONLY} == "True" ]; then - CMD="${CMD} --split=${split}" + split_arg="--split=${split}" fi - if [ ${PRECISION} == "int8" ]; then - - if [ -n "${steps}" ] && [ ${BENCHMARK_ONLY} == "True" ]; then - CMD="${CMD} --steps=${steps}" - fi - - - elif [ ${PRECISION} == "fp32" ]; then - - if [ -n "${steps}" ] && [ ${BENCHMARK_ONLY} == "True" ]; then - CMD="${CMD} --steps=${steps}" - fi - - else - echo "MODE:${MODE} and PRECISION=${PRECISION} not supported" + number_of_steps_arg="" + if [ -n "${number_of_steps}" ] && [ ${BENCHMARK_ONLY} == "True" ]; then + number_of_steps_arg="--number_of_steps=${number_of_steps}" fi + CMD="${CMD} ${number_of_steps_arg} ${split_arg}" + cd $original_dir PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model } @@ -603,27 +587,16 @@ function ssd_mobilenet() { exit 1 fi - export PYTHONPATH=$PYTHONPATH:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/slim:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection - - sed -i 's/return "".join/return b"".join/g' ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection/metrics/tf_example_parser.py + export PYTHONPATH=${PYTHONPATH}:${MOUNT_BENCHMARK} - OLD_PWD=${PWD} - cd ${MOUNT_EXTERNAL_MODELS_SOURCE} - git apply ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/detection_inference.patch - cd ${OLD_PWD} - - chmod -R 777 ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection/inference/detection_inference.py - sed -i.bak "s/'r'/'rb'/g" ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection/inference/detection_inference.py - - # install dependencies for both fp32 and int8 - pip install -r "${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-mobilenet/requirements.txt" - - if [ ${PRECISION} == "int8" ]; then - # install protoc v3.3.0, if necessary, then compile protoc files - install_protoc "https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" - elif [ ${PRECISION} == "fp32" ]; then - # install protoc v3.0.0, if necessary, then compile protoc files - install_protoc "https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip" + if [ ${NOINSTALL} != "True" ]; then + # install dependencies for both fp32 and int8 + apt-get update && apt-get install -y git + # install one by one to solve dependency problems + for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-mobilenet/requirements.txt) + do + pip install $line + done fi PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model @@ -631,91 +604,75 @@ function ssd_mobilenet() { # SSD-ResNet34 model function ssd-resnet34() { - if [ ${MODE} == "inference" ]; then - if [ ${PRECISION} == "fp32" ] || [ ${PRECISION} == "int8" ]; then - if [ ${NOINSTALL} != "True" ]; then - for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-resnet34/requirements.txt) - do - pip install $line - done - fi - - old_dir=${PWD} - cd /tmp - git clone --single-branch https://github.com/tensorflow/benchmarks.git - cd benchmarks - git checkout 1e7d788042dfc6d5e5cd87410c57d5eccee5c664 - cd ${old_dir} - - CMD=${CMD} run_model - else - echo "PRECISION=${PRECISION} not supported for ${MODEL_NAME}" - exit 1 - fi - elif [ ${MODE} == "training" ]; then - if [ ${PRECISION} == "fp32" ]; then - if [ ${NOINSTALL} != "True" ]; then - if [ ${DOCKER} == "True" ]; then - apt-get update && apt-get install -y cpio - - # Enter the docker mount directory /l_mpi and install the intel mpi with silent mode - cd /l_mpi - sh install.sh --silent silent.cfg - source /opt/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 - fi - for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-resnet34/requirements.txt) - do - pip install $line - done - pip install horovod - fi - - old_dir=${PWD} - cd /tmp - rm -rf benchmark_ssd-resnet34 - git clone -b cnn_tf_v1.13_compatible https://github.com/tensorflow/benchmarks.git benchmark_ssd-resnet34 - cd benchmark_ssd-resnet34 - git apply ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/${PRECISION}/benchmark_v1.13.diff - cd ${old_dir} - - CMD="${CMD} \ - $(add_arg "--weight_decay" ${weight_decay}) \ - $(add_arg "--num_warmup_batches" ${num_warmup_batches})" - local old_pythonpath=${PYTHONPATH} - export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}:${MOUNT_EXTERNAL_MODELS_SOURCE}/research - CMD=${CMD} run_model - PYTHONPATH=${old_pythonpath} - else - echo "PRECISION=${PRECISION} not supported for ${MODEL_NAME}" - exit 1 - fi - fi -} + if [ ${PRECISION} == "fp32" ] || [ ${PRECISION} == "int8" ]; then -# SSD-VGG16 model -function ssd_vgg16() { + if [ ${NOINSTALL} != "True" ]; then + for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-resnet34/requirements.txt) + do + pip install $line + done - if [ ${NOINSTALL} != "True" ]; then - pip install opencv-python Cython + old_dir=${PWD} - if [ ${ACCURACY_ONLY} == "True" ]; then - # get the python cocoapi - get_cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/coco ${MOUNT_INTELAI_MODELS_SOURCE}/inference - fi - fi + infer_dir=${MOUNT_INTELAI_MODELS_SOURCE}/inference + benchmarks_patch_path=${infer_dir}/tensorflow_benchmarks_tf2.0.patch + cd /tmp + git clone --single-branch https://github.com/tensorflow/benchmarks.git + cd benchmarks + git checkout 509b9d288937216ca7069f31cfb22aaa7db6a4a7 + git apply ${benchmarks_patch_path} - cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/dataset - cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/preprocessing - cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/utility - export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE} + model_patch_path=${infer_dir}/tensorflow_models_tf2.0.patch + cd ${MOUNT_EXTERNAL_MODELS_SOURCE} + git apply ${model_patch_path} + + cd ${old_dir} + fi + CMD=${CMD} run_model - if [ ${PRECISION} == "int8" ] || [ ${PRECISION} == "fp32" ]; then - CMD="${CMD} $(add_steps_args)" - PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else - echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + echo "PRECISION=${PRECISION} not supported for ${MODEL_NAME}" + exit 1 + fi +} + +# transformer language model from official tensorflow models +function transformer_lt_official() { + if [ ${PRECISION} == "fp32" ]; then + + if [[ -z "${file}" ]]; then + echo "transformer-language requires -- file arg to be defined" + exit 1 + fi + if [[ -z "${file_out}" ]]; then + echo "transformer-language requires -- file_out arg to be defined" + exit 1 + fi + if [[ -z "${reference}" ]]; then + echo "transformer-language requires -- reference arg to be defined" exit 1 fi + if [[ -z "${vocab_file}" ]]; then + echo "transformer-language requires -- vocab_file arg to be defined" + exit 1 + fi + + if [ ${NOINSTALL} != "True" ]; then + pip install -r "${MOUNT_BENCHMARK}/language_translation/tensorflow/transformer_lt_official/requirements.txt" + fi + + CMD="${CMD} + --in_graph=${IN_GRAPH} \ + --vocab_file=${DATASET_LOCATION}/${vocab_file} \ + --file=${DATASET_LOCATION}/${file} \ + --file_out=${OUTPUT_DIR}/${file_out} \ + --reference=${DATASET_LOCATION}/${reference}" + PYTHONPATH=${PYTHONPATH}:${MOUNT_BENCHMARK}:${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/${PRECISION} + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi } # Wide & Deep model @@ -743,13 +700,7 @@ function wide_deep_large_ds() { if [[ -z "${LIBTCMALLOC}" ]]; then echo "libtcmalloc.so.4 not found, trying to install" apt-get update - apt-get install --no-install-recommends --fix-missing google-perftools -y - if [ ! -f /usr/lib/libtcmalloc.so ]; then - apt-get install --no-install-recommends --fix-missing libgoogle-perftools-dev -y - if [ ! -f /usr/lib/libtcmalloc.so ]; then - ln -sf /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so - fi - fi + apt-get install google-perftools --fix-missing -y fi LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)" @@ -780,6 +731,23 @@ function wide_deep_large_ds() { if [ "${num_omp_threads}" != None ]; then CMD="${CMD} --num_omp_threads=${num_omp_threads}" fi + if [ "${use_parallel_batches}" == "True" ]; then + CMD="${CMD} --use_parallel_batches=${use_parallel_batches}" + else + CMD="${CMD} --use_parallel_batches=False" + fi + if [ "${num_parallel_batches}" != None ] && [ "${use_parallel_batches}" == "True" ]; then + CMD="${CMD} --num_parallel_batches=${num_parallel_batches}" + fi + if [ "${kmp_block_time}" != None ] ; then + CMD="${CMD} --kmp_block_time=${kmp_block_time}" + fi + if [ "${kmp_affinity}" != None ]; then + CMD="${CMD} --kmp_affinity=${kmp_affinity}" + fi + if [ "${kmp_settings}" != None ]; then + CMD="${CMD} --kmp_settings=${kmp_settings}" + fi if [ ${PRECISION} == "int8" ] || [ ${PRECISION} == "fp32" ]; then CMD="${CMD}" PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model @@ -787,7 +755,7 @@ function wide_deep_large_ds() { echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" exit 1 fi - fi + fi } LOGFILE=${OUTPUT_DIR}/${LOG_FILENAME} @@ -796,42 +764,30 @@ echo "Log output location: ${LOGFILE}" MODEL_NAME=$(echo ${MODEL_NAME} | tr 'A-Z' 'a-z') if [ ${MODEL_NAME} == "densenet169" ]; then densenet169 -elif [ ${MODEL_NAME} == "draw" ]; then - draw -elif [ ${MODEL_NAME} == "faster_rcnn" ]; then - faster_rcnn -elif [ ${MODEL_NAME} == "gnmt" ]; then - gnmt +elif [ ${MODEL_NAME} == "mlperf_gnmt" ]; then + mlperf_gnmt elif [ ${MODEL_NAME} == "inceptionv3" ]; then - resnet50_101_inceptionv3 + resnet101_inceptionv3 elif [ ${MODEL_NAME} == "inceptionv4" ]; then inceptionv4 -elif [ ${MODEL_NAME} == "inception_resnet_v2" ]; then - inception_resnet_v2 -elif [ ${MODEL_NAME} == "lm-1b" ]; then - lm-1b -elif [ ${MODEL_NAME} == "maskrcnn" ]; then - maskrcnn +elif [ ${MODEL_NAME} == "minigo" ]; then + minigo elif [ ${MODEL_NAME} == "mobilenet_v1" ]; then mobilenet_v1 -elif [ ${MODEL_NAME} == "mtcc" ]; then - mtcc -elif [ ${MODEL_NAME} == "ncf" ]; then - ncf elif [ ${MODEL_NAME} == "resnet101" ]; then - resnet50_101_inceptionv3 + resnet101_inceptionv3 elif [ ${MODEL_NAME} == "resnet50" ]; then - resnet50_101_inceptionv3 + resnet50 elif [ ${MODEL_NAME} == "resnet50v1_5" ]; then - resnet50_101_inceptionv3 + resnet50 elif [ ${MODEL_NAME} == "rfcn" ]; then rfcn elif [ ${MODEL_NAME} == "ssd-mobilenet" ]; then ssd_mobilenet elif [ ${MODEL_NAME} == "ssd-resnet34" ]; then ssd-resnet34 -elif [ ${MODEL_NAME} == "ssd_vgg16" ]; then - ssd_vgg16 +elif [ ${MODEL_NAME} == "transformer_lt_official" ]; then + transformer_lt_official elif [ ${MODEL_NAME} == "wide_deep" ]; then wide_deep elif [ ${MODEL_NAME} == "wide_deep_large_ds" ]; then diff --git a/benchmarks/common/utils/multi_instance.py b/benchmarks/common/utils/multi_instance.py new file mode 100644 index 000000000..9a8eefd8c --- /dev/null +++ b/benchmarks/common/utils/multi_instance.py @@ -0,0 +1,301 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""Multi instance utils module.""" + +from common.platform_util import CPUInfo + + +def buckets(array, bucket_size): + """ + Split array into multiple arrays with specified size. + :param array: array that will be splited + :type array: List[Any] + :param bucket_size_size: target arrays size + :type bucket_size_size: int + :return: list with parameters + :rtype: List[List[Any]] + """ + bucket_size_list = [] + for i in range(0, len(array), bucket_size): + bucket_size_list.append(array[i:i + bucket_size]) + + return bucket_size_list + + +class InferencePrefix: + """Multi instance class.""" + + def __init__(self, sockets=0, instances=0, cores_per_instance=0): + """ + Initialize multi instance class. + :param sockets: sockets used for execution, defaults to 0 + :type sockets: int, optional + :param instances: number of instances, defaults to 0 + :type instances: int, optional + :param cores_per_instance: number of cores that will be used by one instance, defaults to 0 + :type cores_per_instance: int, optional + """ + self._cpu_information = CPUInfo() + self._sockets = sockets + self._instances = instances + self._cores_per_instance = cores_per_instance + + @property + def is_basic_configuration(self): + """ + Check if workload is multi instance or should use core/memory binding. + :return: True if basic configuration else False + :rtype: bool + """ + # Expected single instance parameters + single_instance_params = self._platform_single_instance_args() + + # Current workload parameters + default_cores_per_instance = self._cpu_information.cores_per_socket * self.sockets + workload_params = { + "cores_per_instance": self._cores_per_instance or default_cores_per_instance, + "instance": self._instances if self._instances != 0 else 1, + "sockets": self.sockets, + } + + return single_instance_params == workload_params + + @property + def sockets(self): + """ + Return amount of sockets used for execution. + :return: amount of sockets + :rtype: int + """ + if self._sockets == 0: + sockets = self._cpu_information.sockets + else: + sockets = self._sockets + if sockets > self._cpu_information.sockets: + raise Exception("The specified number of sockets is greater " + "than the number of server available sockets.") + + return sockets + + @property + def cores_per_socket(self): + """ + Return amount of cores per socket used for execution. + :raises Exception: Cores assigned to one instance > cores available on one socket + :return: amount of cores + :rtype: int + """ + if self._cores_per_instance > 0: + if self._cores_per_instance > self._cpu_information.cores_per_socket: + raise Exception("Cores assigned to one instance is greater than amount of cores on one socket.") + + cores_per_socket = self._cpu_information.cores_per_socket - \ + self._cpu_information.cores_per_socket % self._cores_per_instance + else: + cores_per_socket = self._cpu_information.cores_per_socket + + return cores_per_socket + + @property + def cores(self): + """ + Return amount of cores used for execution. + :return: amount of cores used for execution + :rtype: int + """ + cores = self.cores_per_socket * self.sockets + return cores + + @property + def instances_per_socket(self): + """ + Return number of instances. + :return: number of instances + :rtype: int + """ + if self._instances > 0: + if self._instances % self.sockets != 0: + raise Exception("Instances could not be distributed equally between sockets. " + "Amount of instances should be divisible by socket amount. " + "{} % {} != 0".format(self._instances, self.sockets)) + + instances = int(self._instances / self.sockets) + elif self._cores_per_instance > 0: + instances = int(self.cores_per_socket / self._cores_per_instance) + + else: + instances = 0 + + return instances + + @property + def instances(self): + """ + Return total number of instances. + :return: total number of instances + :rtype: int + """ + # Set number of instances to 1 if instances_per_socket == 0 + if self.is_basic_configuration: + return 1 + else: + return (self.instances_per_socket * self.sockets) or 1 + + @property + def cores_per_instance(self): + """ + Return cores per instance. + :return: amount of cores per instance + :rtype: int + """ + if not self.is_basic_configuration: + if self._cores_per_instance > 0: + if self._cores_per_instance * self.instances_per_socket > self.cores_per_socket: + raise Exception("Total cores used on one socket > cores available on one socket. " + "{} * {} > {}".format( + self._cores_per_instance, + self.instances_per_socket, + self.cores_per_socket, + )) + + cores_per_instance = self._cores_per_instance + else: + instances_per_socket = self.instances_per_socket + if self.cores_per_socket % instances_per_socket != 0: + raise Exception("Amount of cores per socket should be divisible by amount of instances per socket.") + + cores_per_instance = self.cores_per_socket // instances_per_socket + + else: + cores_per_instance = self._cpu_information.cores + + return int(cores_per_instance) # type: ignore + + @property + def sockets_per_instance(self): + """ + Return amount of sockets per instance. + :return: amount of sockets per instance + :rtype: int + """ + if self.is_basic_configuration: + sockets = self._cpu_information.sockets + else: + sockets = 1 + + return sockets + + @staticmethod + def get_cores_range(cores, ht_cores, use_ht): + """ + Return the range of cores. + :param cores: number of cores + :param ht_cores: number of cores with hyperthreading + :param use_ht: defines if hyperthreading should be used + :return: range of cores + """ + if use_ht and ht_cores: + cores_range = "{},{}".format(cores, ht_cores) + else: + cores_range = cores + + return cores_range + + def split_cores(self): + """ + Return cores in instance buckets. + :raises Exception: 1 instance on sockets > 1 not implemented + :return: instance buckets + :rtype: Dict[str, List[List[Dict[str, Any]]]] + """ + membind_info = self._cpu_information.binding_information + cores_per_instance = self.cores_per_instance + if cores_per_instance == 0: + raise Exception("1 instance on sockets > 1 not implemented.") + + bucketed_cores = {} + for node_id in range(self.sockets): + socket_cores = membind_info[node_id][:self.cores_per_socket] + instance_buckets = buckets(socket_cores, cores_per_instance) + bucketed_cores.update({str(node_id): instance_buckets[0:self.instances_per_socket]}) + + return bucketed_cores + + def generate_multi_instance_ranges(self, use_ht=False): + """ + Create config for multi-instance execution. + :param use_ht: defines if hyperthreading should be used + :return: information about splitted cores + """ + instance_binding = [] + split_cores = self.split_cores() + for instance_buckets in split_cores.values(): + for instance_config in instance_buckets: + if len(instance_config) == 1: + cores = instance_config[0].get("cpu_id") + ht_cores = instance_config[0].get("ht_cpu_id", None) + else: + cores = "{first}-{last}".format(first=instance_config[0].get("cpu_id"), + last=instance_config[-1].get("cpu_id")) + + first_ht = instance_config[0].get("ht_cpu_id", None) + last_ht = instance_config[-1].get("ht_cpu_id", None) + if first_ht is None or last_ht is None: + ht_cores = None + else: + ht_cores = "{first}-{last}".format(first=first_ht, last=last_ht) + + cores_range = self.get_cores_range(cores, ht_cores, use_ht) + instance_binding.append({"cores_range": cores_range, + "socket_id": instance_config[0].get("socket_id")}) + + return instance_binding + + def generate_multi_instance_prefix(self, command, use_ht=False): + """ + Add 'numactl' prefix for multi-instance execution. + :param command: command that will be run using numactl + :param use_ht: defines if hyperthreading should be used + :return: array of commands if multi-instance else command + """ + if self.is_basic_configuration: + return [command] + + commands_array = [] + for instance in self.generate_multi_instance_ranges(use_ht): + numa_cmd = ["numactl", + "--membind={}".format(instance.get("socket_id")), + "--physcpubind={}".format(instance.get("cores_range"))] + + commands_array.append(numa_cmd + command) + + return commands_array + + def _platform_single_instance_args(self): + """ + Return single instance parameters for current platform. + :return: single instance parameters for current platform + :rtype: Dict[str, int] + """ + return { + "cores_per_instance": self._cpu_information.cores, + "instance": 1, + "sockets": self._cpu_information.sockets, + } diff --git a/benchmarks/content_creation/tensorflow/draw/README.md b/benchmarks/content_creation/tensorflow/draw/README.md deleted file mode 100644 index 767532d74..000000000 --- a/benchmarks/content_creation/tensorflow/draw/README.md +++ /dev/null @@ -1,110 +0,0 @@ -# DRAW - -This document has instructions for how to run DRAW for the following -modes/precisions: -* [FP32 inference](#fp32-inference-instructions) - -## FP32 Inference Instructions - -1. Save path to current directory and download the [MNIST dataset](http://yann.lecun.com/exdb/mnist/): - - ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - - $ mkdir mnist - $ cd mnist - $ wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz - $ wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz - $ wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz - $ wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz - ``` - - The mnist directory will be passed as the dataset location when we - run the model script in step 4. - -2. Download and extract the pretrained model: - ``` - cd $MODEL_WORK_DIR - - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/draw_fp32_pretrained_model.tar.gz - $ tar -xvf draw_fp32_pretrained_model.tar.gz - ``` - -3. Clone this [intelai/models](https://github.com/IntelAI/models) repo, - which contains the DRAW model scripts. - After the clone has completed, navigate to the `benchmarks` - directory in the repository. - - ``` - $ git clone https://github.com/IntelAI/models.git - $ cd models/benchmarks - ``` - -4. Run the model for either batch or online inference using the commands - below. Replace in the path to the `--data-location` with your `mnist` - dataset directory from step 1 and the `--checkpoint` files that you - downloaded and extracted in step 2. - - * Run DRAW for online inference (with `--batch-size 1`): - ``` - $ python launch_benchmark.py \ - --precision fp32 \ - --model-name draw \ - --mode inference \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --checkpoint $MODEL_WORK_DIR/draw_fp32_pretrained_model \ - --data-location $MODEL_WORK_DIR/mnist \ - --batch-size 1 \ - --socket-id 0 - ``` - * Run DRAW for batch inference (with `--batch-size 100`): - ``` - $ python launch_benchmark.py \ - --precision fp32 \ - --model-name draw \ - --mode inference \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --checkpoint $MODEL_WORK_DIR/draw_fp32_pretrained_model \ - --data-location $MODEL_WORK_DIR/mnist \ - --batch-size 100 \ - --socket-id 0 - ``` - Note that the `--verbose` or `--output-dir` flag can be added to any of the above - commands to get additional debug output or change the default output location. - -5. The log files for each run are saved at the value of `--output-dir`. - - * Below is a sample log file tail when testing online inference: - ``` - ... - Elapsed Time 0.006622 - Elapsed Time 0.006636 - Elapsed Time 0.006602 - Batchsize: 1 - Time spent per BATCH: 6.6667 ms - Total samples/sec: 149.9996 samples/s - Outputs saved in file: /home/user/mnist/draw_data.npy - Ran inference with batch size 1 - Log location outside container: {--output-dir value}/benchmark_draw_inference_fp32_20190123_012947.log - ``` - - * Below is a sample log file tail when testing batch inference: - ``` - Elapsed Time 0.028355 - Elapsed Time 0.028221 - Elapsed Time 0.028183 - Batchsize: 100 - Time spent per BATCH: 28.1952 ms - Total samples/sec: 3546.7006 samples/s - Outputs saved in file: /home/user/mnist/draw_data.npy - Ran inference with batch size 100 - Log location outside container: {--output-dir value}/benchmark_draw_inference_fp32_20190123_013432.log - ``` - -6. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/__init__.py b/benchmarks/content_creation/tensorflow/draw/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/content_creation/tensorflow/draw/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json b/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json deleted file mode 100644 index dfac18793..000000000 --- a/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1, - "KMP_HW_SUBSET": "1T" - } -} diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py b/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py deleted file mode 100644 index 6542ae1a7..000000000 --- a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - - -import os -import sys -from common.base_model_init import BaseModelInitializer - - -class ModelInitializer(BaseModelInitializer): - """ Model initializer for the DRAW model """ - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - if self.args.accuracy_only: - print("Accuracy testing for DRAW inference is not supported yet.") - sys.exit(1) - - # Set the num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads() - - # Create the command prefix with numactl and executing the script - script_path = os.path.join(self.args.intelai_models, self.args.mode, - self.args.precision, "draw_inf.py") - self.command_prefix = self.get_command_prefix(args.socket_id) + \ - " {} {} ".format(self.python_exe, script_path) - - # Add additional args to the command - self.command_prefix += "--cp {} --num_inter_threads {} " \ - "--num_intra_threads {} --bs {} --dl {} " \ - "--nw 100 --nb 200".\ - format(self.args.checkpoint, self.args.num_inter_threads, - self.args.num_intra_threads, self.args.batch_size, - self.args.data_location) - - def run(self): - self.run_command(self.command_prefix) diff --git a/benchmarks/face_detection_and_alignment/__init__.py b/benchmarks/face_detection_and_alignment/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/face_detection_and_alignment/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md deleted file mode 100644 index 76b6faaef..000000000 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# MTCC - -This document has instructions for how to run MTCC for the -following modes/precisions: -* [FP32 inference](#fp32-inference-instructions) - -Instructions for MTCC model training and inference for other precisions are coming later. - -## FP32 Inference Instructions - -1. Store path to current directory and clone the MTCC model repository [AITTSMD/MTCNN-Tensorflow](https://github.com/AITTSMD/MTCNN-Tensorflow): -``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - - $ git clone https://github.com/AITTSMD/MTCNN-Tensorflow.git - $ git checkout 3b3934d38f8d34287cc933a581537a1acfd0bb60 -``` - -2. Download and extract the [dataset](http://mmlab.ie.cuhk.edu.hk/archive/CNN/data/train.zip), `lfw_5590` will be used. -``` - $ wget http://mmlab.ie.cuhk.edu.hk/archive/CNN/data/train.zip - $ unzip train.zip - $ ls -l - drwxr-xr-x 5592 178944 Apr 15 2013 lfw_5590 - drwxr-xr-x 7878 252096 Apr 15 2013 net_7876 - -rw-r--r-- 1 519406 Apr 15 2013 testImageList.txt - -rw-r--r--@ 1 136492573 Mar 22 11:54 train.zip - -rw-r--r-- 1 1498353 Apr 15 2013 trainImageList.txt -``` - -3. Download the pre-trained model. -``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mtcc_fp32_pretrained_model.tar.gz - $ tar -xzvf mtcc_fp32_pretrained_model.tar.gz -``` - -4. Clone the [intelai/models](https://github.com/intelai/models) repo. -This repo has the launch script for running models. - -``` - $ git clone https://github.com/IntelAI/models.git - Cloning into 'models'... -``` - -5. Run the `launch_benchmark.py` script from the intelai/models repo with the appropriate parameters including: the `--model-source-dir` from step 1, `--data-location` from step 2, -and the `--checkpoint` from step 3. - -Run: -``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/lfw_5590 \ - --model-source-dir $MODEL_WORK_DIR/MTCNN-Tensorflow \ - --model-name mtcc \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --socket-id 0 \ - --checkpoint $MODEL_WORK_DIR/MTCNN_model \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` - -6. The log file is saved to the value of `--output-dir`. - -Below is a sample log file tail when running for batch inference, online inference, and accuracy: - -``` -time cost 0.459 pnet 0.166 rnet 0.144 onet 0.149 -time cost 0.508 pnet 0.034 rnet 0.010 onet 0.005 -time cost 0.548 pnet 0.028 rnet 0.008 onet 0.005 -time cost 0.585 pnet 0.025 rnet 0.007 onet 0.005 -time cost 0.627 pnet 0.028 rnet 0.009 onet 0.005 -... -time cost 220.844 pnet 0.027 rnet 0.008 onet 0.005 -time cost 220.884 pnet 0.027 rnet 0.008 onet 0.005 -Accuracy: 1.12 -Total images: 5590 -Latency is: 40.36, Throughput is: 24.78 -Ran inference with batch size -1 -Log location outside container: /home/user/models/benchmarks/common/tensorflow/logs/benchmark_mtcc_inference_fp32_20190322_221543.log -``` - -7. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py deleted file mode 100644 index 11d00ffda..000000000 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py +++ /dev/null @@ -1,56 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import os - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer (BaseModelInitializer): - - def __init__(self, args, custom_args, platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - # set num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads() - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - benchmark_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "one_image_test.py") - self.command_prefix = \ - self.get_command_prefix(self.args.socket_id) + \ - "{} ".format(self.python_exe) + benchmark_script - - self.run_cmd = \ - self.command_prefix + \ - " --num_inter_threads " + str(self.args.num_inter_threads) + \ - " --num_intra_threads " + str(self.args.num_intra_threads) + \ - " -ckpt " + self.args.checkpoint + \ - " -dl " + self.args.data_location - - def run(self): - self.run_command(self.run_cmd) diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index 1f98a3503..b5f8bd8f1 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -31,12 +31,9 @@ following modes/precisions: -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 ``` -2. Download the pretrained model and store the path to the current directory: +2. Download the pretrained model: ``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/densenet169_fp32_pretrained_model.pb - - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/densenet169_fp32_pretrained_model.pb ``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo @@ -47,28 +44,28 @@ following modes/precisions: ``` $ git clone https://github.com/IntelAI/models.git - $ cd models/benchmarks + $ cd benchmarks ``` For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 100`): ``` - $ python launch_benchmark.py \ - --model-name densenet169 \ - --precision fp32 \ - --mode inference \ - --framework tensorflow \ - --benchmark-only \ - --batch-size 100 \ - --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/densenet169_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ - input_layer="input" output_layer="densenet169/predictions/Reshape_1" + python launch_benchmark.py \ + --model-name densenet169 \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 100 \ + --socket-id 0 \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ + input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name densenet169 \ --precision fp32 \ --mode inference \ @@ -76,8 +73,8 @@ following modes/precisions: --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/densenet169_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -85,7 +82,7 @@ following modes/precisions: For accuracy (using your `--data-location`, `--socket-id 0`, `--accuracy-only` and `--batch-size 100`): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name densenet169 \ --precision fp32 \ --mode inference \ @@ -93,9 +90,9 @@ following modes/precisions: --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/densenet169_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --data-location $MODEL_WORK_DIR/imagenet_validation_dataset \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -144,8 +141,3 @@ following modes/precisions: Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_021545.log ``` - -5. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 2474ed25b..359b6fd12 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -12,13 +12,10 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory and then clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` @@ -26,7 +23,7 @@ This repository includes launch scripts for running an optimized version of the 2. Download the pretrained model: ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inception_resnet_v2_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inception_resnet_v2_int8_pretrained_model.pb ``` 3. If you would like to run Inception ResNet V2 inference and test for @@ -45,7 +42,7 @@ After the script has completed, you should have a directory with the sharded dataset something like: ``` -$ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords +$ ll /home//datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -80,26 +77,22 @@ For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision int8 \ --mode inference \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/datasets/ImageNet_TFRecords + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb \ + --data-location /home//datasets/ImageNet_TFRecords ``` For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision int8 \ --mode inference \ @@ -107,16 +100,14 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_int8_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision int8 \ --mode inference \ @@ -124,8 +115,8 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_int8_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` Note that the `--verbose` flag can be added to any of the above commands @@ -176,21 +167,13 @@ Ran inference with batch size 128 Log location outside container: /benchmark_inception_resnet_v2_inference_int8_20190415_225215.log ``` -6. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the current directory and then clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ git clone git@github.com:IntelAI/models.git ``` @@ -199,7 +182,7 @@ This repository includes launch scripts for running an optimized version of the 2. Download the pre-trained Inception ResNet V2 model: ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inception_resnet_v2_fp32_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inception_resnet_v2_fp32_pretrained_model.pb ``` 3. If you would like to run Inception ResNet V2 inference and test for @@ -219,7 +202,7 @@ After the script has completed, you should have a directory with the sharded dataset something like: ``` -$ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords +$ ll /home//datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -252,26 +235,22 @@ For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision fp32 \ --mode inference \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/datasets/ImageNet_TFRecords + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ + --data-location /home//datasets/ImageNet_TFRecords ``` For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision fp32 \ --mode inference \ @@ -279,16 +258,14 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inception_resnet_v2 \ --precision fp32 \ --mode inference \ @@ -296,8 +273,8 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands @@ -342,8 +319,3 @@ Throughput: 70.402 images/sec Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190410_205628.log ``` - -7. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index dd42c6360..6c25af169 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -14,28 +14,31 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory and clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` This repository includes launch scripts for running an optimized version of the Inception V3 model code. +2. Clone the [tensorflow/models](https://github.com/tensorflow/models) +repository as `tensorflow-models`. This is to avoid conflict with Intel's `models` repo: + +``` +git clone https://github.com/tensorflow/models.git tensorflow-models +``` This repository is used for dependencies that the Inception V3 model requires. -2. Download the pre-trained model. +3. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_int8_pretrained_model.pb ``` -3. If you would like to run Inception V3 inference with real data or test for +4. If you would like to run Inception V3 inference with real data or test for accuracy, you will need the ImageNet dataset. Register and download the @@ -51,7 +54,7 @@ After the script has completed, you should have a directory with the sharded dataset something like: ``` -$ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords +$ ll /home//datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -66,7 +69,7 @@ $ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 ``` -4. Next, navigate to the `benchmarks` directory in your local clone of +5. Next, navigate to the `benchmarks` directory in your local clone of the [intelai/models](https://github.com/IntelAI/models) repo from step 1. The `launch_benchmark.py` script in the `benchmarks` directory is used for starting a model run in a optimized TensorFlow docker @@ -75,7 +78,10 @@ precision, and docker image to use, along with your path to the ImageNet TF Records that you generated in step 4. Substitute in your own `--data-location` (from step 4, for accuracy -only), `--in-graph` pretrained model file path (from step 3). +only), `--in-graph` pretrained model file path (from step 3) and +`--model-source-dir` for the location where you cloned the +[tensorflow/models](https://github.com/tensorflow/models) repo +(from step 2). Inception V3 can be run for accuracy, online inference, or batch inference. Use one of the following examples below, depending on your use case. @@ -84,18 +90,16 @@ For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision int8 \ --mode inference \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/datasets/ImageNet_TFRecords + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_int8_pretrained_model.pb \ + --data-location /home//datasets/ImageNet_TFRecords ``` When testing performance, it is optional to specify the @@ -106,9 +110,7 @@ default to use `warmup_steps=10` and `steps=50`. For online inference with ImageNet data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision int8 \ --mode inference \ @@ -116,18 +118,16 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/datasets/ImageNet_TFRecords \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_int8_pretrained_model.pb \ + --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 ``` For online inference with dummy data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`), remove `--data-location` argument: ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision int8 \ --mode inference \ @@ -135,17 +135,15 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_int8_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` For batch inference with ImageNet data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision int8 \ --mode inference \ @@ -153,18 +151,16 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/datasets/ImageNet_TFRecords \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_int8_pretrained_model.pb \ + --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 ``` For batch inference with dummy data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`), remove `--data-location` argument:: ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision int8 \ --mode inference \ @@ -172,15 +168,15 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_int8_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location.. -5. The log file is saved to the value +7. The log file is saved to the value of `--output-dir`. Below are examples of what the tail of your log file should look like for the different configs. @@ -219,26 +215,18 @@ Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190416_162155.log ``` -6. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the current directory and clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` 2. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_fp32_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb ``` 3. If you would like to run Inception V3 FP32 inference and test for @@ -263,17 +251,15 @@ depending on your use case. * For online inference with dummy data (using `--batch-size 1`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision fp32 \ --mode inference \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_fp32_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for online inference: ``` @@ -296,17 +282,15 @@ Log location outside container: {--output-dir value}/benchmark_inceptionv3_infer * For batch inference with dummy data (using `--batch-size 128`): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision fp32 \ --mode inference \ --framework tensorflow \ --batch-size 128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_fp32_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for batch inference: ``` @@ -329,18 +313,16 @@ Log location outside container: {--output-dir value}/benchmark_inceptionv3_infer `--data-location` with the path to the ImageNet dataset from step 3): ``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name inceptionv3 \ --precision fp32 \ --mode inference \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --data-location $MODEL_WORK_DIR/dataset/Imagenet_Validation \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv3_fp32_pretrained_model.pb + --data-location /dataset/Imagenet_Validation \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for accuracy: ``` @@ -356,8 +338,3 @@ Log location outside container: {--output-dir value}/benchmark_inceptionv3_infer Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location.. - -5. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 92474ba41..b2c45c2d4 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -15,19 +15,16 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory and clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` This repository includes launch scripts for running the model. 2. Download the pretrained model: ``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv4_int8_pretrained_model.pb + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv4_int8_pretrained_model.pb ``` 3. If you would like to run Inception V4 inference and test for @@ -51,9 +48,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision int8 \ --mode inference \ @@ -61,16 +56,14 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/ImageNet_TFRecords + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_int8_pretrained_model.pb \ + --data-location /home//ImageNet_TFRecords ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision int8 \ --mode inference \ @@ -78,15 +71,13 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_int8_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision int8 \ --mode inference \ @@ -94,8 +85,8 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_int8_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` Note that the `--verbose` flag can be added to any of the above commands @@ -150,26 +141,18 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. Log location outside container: /benchmark_inceptionv4_inference_int8_20190415_232441.log ``` -6. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the curernt directory and clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` This repository includes launch scripts for running the model. 2. Download the pretrained model: ``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv4_fp32_pretrained_model.pb + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv4_fp32_pretrained_model.pb ``` 3. If you would like to run Inception V4 inference and test for @@ -194,9 +177,7 @@ $ popd For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision fp32 \ --mode inference \ @@ -204,16 +185,14 @@ $ popd --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/ImageNet_TFRecords + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_fp32_pretrained_model.pb \ + --data-location /home//ImageNet_TFRecords ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision fp32 \ --mode inference \ @@ -221,15 +200,13 @@ $ popd --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_fp32_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` - $ cd models/benchmarks - - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name inceptionv4 \ --precision fp32 \ --mode inference \ @@ -237,8 +214,8 @@ $ popd --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/inceptionv4_fp32_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` Note that the `--verbose` flag can be added to any of the above commands @@ -294,8 +271,3 @@ $ popd Ran inference with batch size 1 Log location outside container: /benchmark_inceptionv4_inference_fp32_20190307_221954.log ``` - -6. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 249e5d672..b332cec5a 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -16,11 +16,8 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store path to current directory and download ImageNet dataset. - ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - ``` +1. Download ImageNet dataset. + This step is required only for running accuracy, for running benchmark we do not need to provide dataset. Register and download the ImageNet dataset. Once you have the raw ImageNet dataset downloaded, we need to convert @@ -29,7 +26,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. to download, process and convert the ImageNet dataset to the TF records format. After converting data, you should have a directory with the sharded dataset something like below, we only need `validation-*` files, discard `train-*` files: ``` - $ ll datasets/ImageNet_TFRecords + $ ll /home/myuser/datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -45,23 +42,23 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. ``` 2. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mobilenetv1_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenetv1_int8_pretrained_model.pb ``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the model scripts for either online or batch inference or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. Each model run has user configurable arguments separated from regular arguments by '--' at the end of the command. Unless configured, these arguments will run with default values. Below are the example codes for each use case: - + ``` $ git clone https://github.com/IntelAI/models.git - $ cd models/benchmarks + $ cd benchmarks ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name mobilenet_v1 \ --precision int8 \ --mode inference \ @@ -69,15 +66,14 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mob --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/mobilenetv1_int8_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name mobilenet_v1 \ --precision int8 \ --mode inference \ @@ -85,8 +81,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mob --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/mobilenetv1_int8_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -94,7 +89,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mob For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --model-name mobilenet_v1 \ --precision int8 \ --mode inference \ @@ -102,15 +97,16 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mob --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/mobilenetv1_int8_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --data-location $MODEL_WORK_DIR/dataset/Imagenet_Validation \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ + --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. + + At present no relesed docker image support the latest MobileNet Int8 inference and accuracy. 4. The log file is saved to the `models/benchmarks/common/tensorflow/logs` directory, or the directory specified by the `--output-dir` arg. Below are examples of @@ -156,11 +152,6 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/mob Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164955.log ``` -5. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions 1. The ImageNet dataset is required for testing accuracy and can also be @@ -171,30 +162,16 @@ $ popd using the instructions [here](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data). -2. Store the path to the current directory and then download and extract the checkpoint files for the pretrained MobileNetV1 FP32 model: - - ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - - $ wget http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz - - $ tar -xvf mobilenet_v1_1.0_224.tgz - x ./ - x ./mobilenet_v1_1.0_224.tflite - x ./mobilenet_v1_1.0_224.ckpt.meta - x ./mobilenet_v1_1.0_224.ckpt.index - x ./mobilenet_v1_1.0_224.ckpt.data-00000-of-00001 - x ./mobilenet_v1_1.0_224_info.txt - x ./mobilenet_v1_1.0_224_frozen.pb - x ./mobilenet_v1_1.0_224_eval.pbtxt - ``` +2. Download the pre-trained model. +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb +``` 3. Clone the [tensorflow/models](https://github.com/tensorflow/models) repository. ``` - $ git clone https://github.com/tensorflow/models tf_models + $ git clone https://github.com/tensorflow/models ``` The [tensorflow/models](https://github.com/tensorflow/models) files @@ -205,7 +182,6 @@ $ popd ``` $ git clone https://github.com/IntelAI/models.git - $ cd models/benchmarks ``` @@ -222,17 +198,19 @@ $ popd is optional): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --precision fp32 \ --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow/models \ --batch-size 1 \ --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/Imagenet_Validation \ - --checkpoint $MODEL_WORK_DIR + --data-location /dataset/Imagenet_Validation \ + --in-graph /home//mobilenet_v1_1.0_224_frozen.pb + -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ + input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` * Run for batch inference (with `--batch-size 100`, @@ -240,31 +218,33 @@ $ popd the `--data-location` is optional): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --precision fp32 \ --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/Imagenet_Validation \ - --checkpoint $MODEL_WORK_DIR + --data-location /dataset/Imagenet_Validation \ + --in-graph /home//mobilenet_v1_1.0_224_frozen.pb + -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ + input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` * Run for accuracy (with `--batch-size 100`, `--accuracy-only` and `--in-graph` with a path to the frozen graph .pb file): ``` - $ python launch_benchmark.py \ + python launch_benchmark.py \ --precision fp32 \ --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --accuracy-only \ - --data-location $MODEL_WORK_DIR/dataset/Imagenet_Validation \ - --in-graph $MODEL_WORK_DIR/mobilenet_v1_1.0_224_frozen.pb + --data-location /dataset/Imagenet_Validation \ + --in-graph /home//mobilenet_v1_1.0_224_frozen.pb ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. @@ -312,8 +292,3 @@ $ popd Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190110_211648.log ``` - -6. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py index adbae022d..9fd30f920 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py @@ -18,6 +18,7 @@ # +import argparse import os from common.base_model_init import BaseModelInitializer from common.base_model_init import set_env_var @@ -41,7 +42,7 @@ def __init__(self, args, custom_args=[], platform_util=None): self.set_num_inter_intra_threads(num_inter_threads=2) script_name = "accuracy.py" if self.args.accuracy_only \ - else "eval_image_classifier.py" + else "benchmark.py" script_path = os.path.join( self.args.intelai_models, self.args.mode, self.args.precision, script_name) @@ -53,22 +54,16 @@ def __init__(self, args, custom_args=[], platform_util=None): set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + self.parse_args() + if not self.args.accuracy_only: - self.command_prefix = ("{prefix} " - "--dataset_name imagenet " - "--checkpoint_path {checkpoint} " - "--dataset_split_name=validation " - "--clone_on_cpu=True " - "--model_name {model} " - "--inter_op_parallelism_threads {inter} " - "--intra_op_parallelism_threads {intra} " - "--batch_size {bz}").format( - prefix=self.command_prefix, checkpoint=self.args.checkpoint, - model=self.args.model_name, inter=self.args.num_inter_threads, - intra=self.args.num_intra_threads, bz=self.args.batch_size) - - if self.args.data_location: - self.command_prefix += " --dataset_dir {}".format(self.args.data_location) + # add args for the benchmark script + script_args_list = [ + "input_graph", "input_height", "input_width", "batch_size", + "input_layer", "output_layer", "num_inter_threads", + "num_intra_threads", "warmup_steps", "steps"] + self.command_prefix = self.add_args_to_command( + self.command_prefix, script_args_list) else: # add args for the accuracy script script_args_list = [ @@ -78,5 +73,36 @@ def __init__(self, args, custom_args=[], platform_util=None): self.command_prefix = self.add_args_to_command( self.command_prefix, script_args_list) + def parse_args(self): + if self.custom_args == None: + return + + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_height", default=224, + dest='input_height', type=int, help="input height") + parser.add_argument( + "--input_width", default=224, + dest='input_width', type=int, help="input width") + parser.add_argument( + "--warmup_steps", dest="warmup_steps", + help="number of warmup steps", + type=int, default=10) + parser.add_argument( + "--steps", dest="steps", + help="number of steps", + type=int, default=50) + parser.add_argument( + "--input_layer", dest="input_layer", + help="name of input layer", + type=str, default="input") + parser.add_argument( + "--output_layer", dest="output_layer", + help="name of output layer", + type=str, default="MobilenetV1/Predictions/Reshape_1") + + self.args = parser.parse_args(self.custom_args, + namespace=self.args) + def run(self): self.run_command(self.command_prefix) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py index ea974b587..3706c411b 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py @@ -55,7 +55,16 @@ def __init__(self, args, custom_args=[], platform_util=None): "input_graph", "input_height", "input_width", "batch_size", "input_layer", "output_layer", "num_inter_threads", "num_intra_threads", "warmup_steps", "steps"] - if self.args.accuracy_only: + + if self.args.calibration_only: + run_script = os.path.join( + self.args.intelai_models, self.args.mode, + self.args.precision, "calibration.py") + script_args_list = [ + "input_graph", "data_location", "input_height", "input_width", + "batch_size", "input_layer", "output_layer", + "num_inter_threads", "num_intra_threads"] + elif self.args.accuracy_only: run_script = os.path.join( self.args.intelai_models, self.args.mode, self.args.precision, "accuracy.py") @@ -76,21 +85,25 @@ def parse_args(self): "--input_width", default=224, dest='input_width', type=int, help="input width") parser.add_argument( - '--warmup_steps', dest='warmup_steps', - help='number of warmup steps', + "--warmup_steps", dest="warmup_steps", + help="number of warmup steps", type=int, default=10) parser.add_argument( - '--steps', dest='steps', - help='number of steps', + "--steps", dest="steps", + help="number of steps", type=int, default=50) parser.add_argument( - '--input_layer', dest='input_layer', - help='name of input layer', + "--input_layer", dest="input_layer", + help="name of input layer", type=str, default="input") parser.add_argument( - '--output_layer', dest='output_layer', - help='name of output layer', + "--output_layer", dest="output_layer", + help="name of output layer", type=str, default="MobilenetV1/Predictions/Reshape_1") + parser.add_argument( + "--calibration-only", dest="calibration_only", + help="calibrate the accuracy", + action="store_true") self.args = parser.parse_args(self.custom_args, namespace=self.args) diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index 71d2d582d..e1c082b07 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -12,13 +12,10 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory and clone this [intelai/models](https://github.com/IntelAI/models) +1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` @@ -27,7 +24,7 @@ an optimized version of the ResNet101 model code. 2. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet101_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_int8_pretrained_model.pb ``` 3. If you would like to run ResNet101 inference with real data or test for @@ -46,7 +43,7 @@ After the script has completed, you should have a directory with the sharded dataset something like: ``` -$ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords +$ ll /home//datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -79,7 +76,7 @@ For accuracy (using your `--data-location`,`--in-graph`, `--accuracy-only` and `--batch-size 100`): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ --model-name resnet101 \ @@ -88,9 +85,9 @@ $ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --data-location $MODEL_WORK_DIR/dataset/FullImageNetData_directory \ - --in-graph=$MODEL_WORK_DIR/resnet101_int8_pretrained_model.pb + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --data-location /home//dataset/FullImageNetData_directory \ + --in-graph=/home//resnet101_int8_pretrained_model.pb ``` When running for performance, it is optional to specify the @@ -101,9 +98,7 @@ default to use `warmup_steps=40` and `steps=100`. For online inference with dummy data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name resnet101 \ --precision int8 \ --mode inference \ @@ -111,17 +106,15 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph=$MODEL_WORK_DIR/resnet101_int8_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` For online inference with ImageNet data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name resnet101 \ --precision int8 \ --mode inference \ @@ -129,18 +122,16 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/FullImageNetData_directory \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph=$MODEL_WORK_DIR/resnet101_int8_pretrained_model.pb \ + --data-location /home//dataset/FullImageNetData_directory \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` For batch inference with dummy data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name resnet101 \ --precision int8 \ --mode inference \ @@ -148,27 +139,25 @@ $ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph=$MODEL_WORK_DIR/resnet101_int8_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` For batch inference with ImageNet data (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name resnet101 \ --precision int8 \ --mode inference \ --framework tensorflow \ --benchmark-only \ --batch-size 128 \ - --data-location $MODEL_WORK_DIR/dataset/FullImageNetData_directory \ + --data-location /home//dataset/FullImageNetData_directory \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph=$MODEL_WORK_DIR/resnet101_int8_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -211,27 +200,19 @@ Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190223_192438.log ``` -7. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the current directory and clone the +1. Clone the [intelai/models](https://github.com/intelai/models) repository ``` - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git ``` 2. Download the pre-trained model. - ``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet101_fp32_pretrained_model.pb - ``` +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb +``` 3. Download ImageNet dataset. @@ -243,7 +224,7 @@ repository to download, process and convert the ImageNet dataset to the TF records format. After converting data, you should have a directory with the sharded dataset something like below, we only need `validation-*` files, discard `train-*` files: ``` - $ ll $MODEL_WORK_DIR/datasets/ImageNet_TFRecords + $ ll /home//datasets/ImageNet_TFRecords -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 @@ -262,16 +243,15 @@ repository For online inference measurements with dummy data set `--batch-size 1` and for batch inference set `--batch-size 128` ``` - $ cd $MODEL_WORK_DIR/models/benchmarks - + $ cd /home//models/benchmarks $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ --model-name resnet101 \ --batch-size 128 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/trained_models/resnet101_fp32_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --socket-id 0 ``` @@ -290,17 +270,16 @@ repository 5. Run for accuracy ``` - $ cd $MODEL_WORK_DIR/models/benchmarks - + $ cd /home//models/benchmarks $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ --model-name resnet101 \ --batch-size 100 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/trained_models/resnet101_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/imagenet_validation_dataset \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ + --data-location /home//imagenet_validation_dataset \ --accuracy-only \ --socket-id 0 ``` @@ -321,8 +300,3 @@ repository Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. - -6. To return to where you started from: - ``` - $ popd - ``` diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py index c60693ba9..738f5bc4b 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py @@ -101,7 +101,7 @@ def run_benchmark_or_accuracy(self): self.run_command(cmd) def run_calibration(self): - calibration_script = os.path.join(self.args.intelai_models, + calibration_script = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision, "calibration.py") script_args_list = [ "input_graph", "data_location", diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 6ba94b4b3..06b771335 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -17,12 +17,9 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. 1. Download the full ImageNet dataset and convert to the TF records format. -* Store the path to the current directory and clone the tensorflow/models repository: +* Clone the tensorflow/models repository as tensorflow-models. This is to avoid conflict with Intel's `models` repo: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/tensorflow/models.git tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models ``` The TensorFlow models repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) @@ -32,7 +29,7 @@ to download, process and convert the ImageNet dataset to the TF records format. 2. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet50_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_int8_pretrained_model.pb ``` 3. Clone the @@ -50,18 +47,18 @@ located at `models/models/image_recognition/tensorflow/resnet50/`. the pre-trained `final_int8_resnet50.pb` input graph file (from step 2), and the `--accuracy-only` flag. ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/dataset/FullImageNetData_directory - --in-graph $MODEL_WORK_DIR/resnet50_int8_pretrained_model.pb \ + --data-location /home//dataset/FullImageNetData_directory + --in-graph /home//resnet50_int8_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision int8 \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -92,17 +89,17 @@ args, as shown in the command below. If these values are not specified, the script will default to use `warmup_steps=10` and `steps=50`. ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_int8_pretrained_model.pb \ + --in-graph /home//resnet50_int8_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision int8 \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 -- warmup_steps=50 steps=500 ``` The tail of the log output when the script completes should look @@ -123,19 +120,11 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location.. -5. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the current directory and download the pre-trained model. +1. Download the pre-trained model. ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet50_fp32_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb ``` 2. Clone the @@ -161,17 +150,17 @@ If using dummy data for inference, `--data-location` flag is not required. Other * To measure online inference, set `--batch-size=1` and run the script as shown: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_fp32_pretrained_model.pb \ + --in-graph /home//resnet50_fp32_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -196,17 +185,17 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc * To measure batch inference, set `--batch-size=128` and run the launch script as shown: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_fp32_pretrained_model.pb \ + --in-graph /home//resnet50_fp32_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -232,10 +221,10 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc * To measure the model accuracy, use the `--accuracy-only` flag and pass the ImageNet dataset directory from step 3 as the `--data-location`: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_fp32_pretrained_model.pb \ + --in-graph /home//resnet50_fp32_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision fp32 \ @@ -243,8 +232,8 @@ $ python launch_benchmark.py \ --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/ImageNetData_directory \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -265,10 +254,10 @@ output can only be used with real data. For example, the command below is the same as the accuracy test above, except with the `--output-results` flag added: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_fp32_pretrained_model.pb \ + --in-graph /home//resnet50_fp32_pretrained_model/freezed_resnet50.pb \ --model-name resnet50 \ --framework tensorflow \ --precision fp32 \ @@ -277,8 +266,8 @@ $ python launch_benchmark.py \ --output-results \ --batch-size 100 \ --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/ImageNetData_directory \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another @@ -300,8 +289,3 @@ ILSVRC2012_val_00021512.JPEG,424,424 Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. - -5. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md index 41791df10..5c7b21fa2 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -4,24 +4,24 @@ This document has instructions for how to run ResNet50 (v1.5) for the following precisions: * [Int8 inference](#int8-inference-instructions) * [FP32 inference](#fp32-inference-instructions) +* [FP32 training](#fp32-training-instructions) Original ResNet model has multiple versions which have shown better accuracy -and/or batch inference performance. As mentioned in TensorFlow's [official ResNet +and/or batch inference and training performance. As mentioned in TensorFlow's [official ResNet model page](https://github.com/tensorflow/models/tree/master/official/resnet), 3 different versions of the original ResNet model exists - ResNet50v1, ResNet50v1.5, and ResNet50v2. As a side note, ResNet50v1.5 is also in MLPerf's [cloud inference benchmark for -image classification](https://github.com/mlperf/inference/tree/master/cloud/image_classification). +image +classification](https://github.com/mlperf/inference/tree/master/cloud/image_classification) +and [training benchmark](https://github.com/mlperf/training). ## Int8 Inference Instructions 1. Download the full ImageNet dataset and convert to the TF records format. -* Store the path to the current directory and clone the tensorflow/models repository: +* Clone the tensorflow/models repository as `tensorflow-models`. This is to avoid conflict with Intel's `models` repo: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/tensorflow/models.git tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models ``` The TensorFlow models repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) @@ -29,9 +29,9 @@ to download, process and convert the ImageNet dataset to the TF records format. * The ImageNet dataset directory location is only required to calculate the model accuracy. -2. Download the pre-trained model and store the path to the current directory. +2. Download the pre-trained model. ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet50v1_5_int8_pretrained_model.pb +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50v1_5_int8_pretrained_model.pb ``` 3. Clone the @@ -45,26 +45,26 @@ $ git clone https://github.com/IntelAI/models.git The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. - The docker image (`gcr.io/deeplearning-platform-release/tf-cpu.1-15`) + The docker image (`intel/intel-optimized-tensorflow:2.1.0`) used in the commands above were built using [TensorFlow](git@github.com:tensorflow/tensorflow.git) master for TensorFlow - version 1.14. + version 2.1.0. * Calculate the model accuracy, the required parameters parameters include: the `ImageNet` dataset location (from step 1), the pre-trained `resnet50v1_5_int8_pretrained_model.pb` input graph file (from step 2), and the `--accuracy-only` flag. ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/dataset/FullImageNetData_directory \ - --in-graph $MODEL_WORK_DIR/resnet50v1_5_int8_pretrained_model.pb \ + --data-location /home//dataset/FullImageNetData_directory \ + --in-graph resnet50v1_5_int8_pretrained_model.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision int8 \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -93,17 +93,17 @@ args, as shown in the command below. If these values are not specified, the script will default to use `warmup_steps=10` and `steps=50`. ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50v1_5_int8_pretrained_model.pb \ + --in-graph resnet50v1_5_int8_pretrained_model.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision int8 \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 -- warmup_steps=50 steps=500 ``` The tail of the log output when the benchmarking completes should look @@ -122,21 +122,12 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -5. To return to where you started from: -``` -$ popd -``` - - ## FP32 Inference Instructions -1. Store the path to the current directory. Download the pre-trained model. +1. Download the pre-trained model. If you would like to get a pre-trained model for ResNet50v1.5, ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - $ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb ``` @@ -161,17 +152,17 @@ If benchmarking uses dummy data for inference, `--data-location` flag is not req * To measure online inference, set `--batch-size=1` and run the model script as shown: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_v1.pb \ + --in-graph resnet50_v1.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --batch-size=1 \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --socket-id=0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -199,17 +190,17 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc * To measure batch inference, set `--batch-size=128` and run the model script as shown: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_v1.pb \ + --in-graph resnet50_v1.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --batch-size=128 \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --socket-id=0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -238,19 +229,19 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc * To measure the model accuracy, use the `--accuracy-only` flag and pass the ImageNet dataset directory from step 3 as the `--data-location`: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_v1.pb \ + --in-graph resnet50_v1.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --accuracy-only \ --batch-size 100 \ - --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/ImageNetData_directory \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --socket-id=0 \ + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The log file is saved to the value of `--output-dir`. @@ -274,10 +265,10 @@ output can only be used with real data. For example, the command below is the same as the accuracy test above, except with the `--output-results` flag added: ``` -$ cd models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/resnet50_v1.pb \ + --in-graph resnet50_v1.pb \ --model-name resnet50v1_5 \ --framework tensorflow \ --precision fp32 \ @@ -285,9 +276,9 @@ $ python launch_benchmark.py \ --accuracy-only \ --output-results \ --batch-size 100 \ - --socket-id 0 \ - --data-location $MODEL_WORK_DIR/dataset/ImageNetData_directory \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --socket-id=0 \ + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another @@ -311,7 +302,50 @@ ILSVRC2012_val_00014735.JPEG,31,31 Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -5. To return to where you started from: +## FP32 Training Instructions + +1. Download the full ImageNet dataset and convert to the TF records format. + +* Clone the tensorflow/models repository as `tensorflow-models`. This is to avoid conflict with Intel's `models` repo: +``` +$ git clone https://github.com/tensorflow/models.git tensorflow-models ``` -$ popd +The TensorFlow models repo provides +[scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) +to download, process and convert the ImageNet dataset to the TF records format. + +2. Clone the +[intelai/models](https://github.com/intelai/models) +repository +``` +$ git clone https://github.com/IntelAI/models.git +``` + +3. Run the following command to start ResNet50v1.5 FP32 training run. +``` +$ python launch_benchmark.py \ + --model-name=resnet50v1_5 \ + --precision=fp32 \ + --mode=training \ + --framework tensorflow \ + --checkpoint \ + --data-location=/home//dataset/ImageNetData_directory +``` + +This run will take considerable amount of time since it is running for +convergence (90 epochs). + +If you want to do a trial run, add +``` +-- steps= +``` +argument to the command. + +If you run the script for more than 100 steps, you should see training loss +decreasing like below: + +``` +I0816 basic_session_run_hooks.py:262] loss = 8.442491, step = 0 +I0816 basic_session_run_hooks.py:260] loss = 8.373407, step = 100 (174.484 sec) +... ``` diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/__init__.py similarity index 99% rename from benchmarks/object_detection/tensorflow/faster_rcnn/__init__.py rename to benchmarks/image_recognition/tensorflow/resnet50v1_5/training/__init__.py index c4fdb7d61..8cb0c8d8d 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/__init__.py +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/__init__.py @@ -15,5 +15,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -# diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/__init__.py similarity index 100% rename from models/language_translation/tensorflow/gnmt/inference/fp32/scripts/__init__.py rename to benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/__init__.py diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/config.json similarity index 100% rename from benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json rename to benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/config.json diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/resnet50_model_init.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/resnet50_model_init.py new file mode 100644 index 000000000..bbbf4731e --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/common_resnet50/resnet50_model_init.py @@ -0,0 +1,107 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + +import os +from argparse import ArgumentParser +import time + + +class ResNet50ModelInitializer(BaseModelInitializer): + """initialize mode and run benchmark""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ResNet50ModelInitializer, self).__init__( + args, custom_args, platform_util) + + self.benchmark_command = "" + if not platform_util: + raise ValueError("Did not find any platform info.") + + # use default batch size if -1 + if self.args.batch_size == -1: + self.args.batch_size = 64 + + # set num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument("--steps", dest='steps', + type=int, default=112590, + help="number of steps") + arg_parser.add_argument("--train_epochs", dest='trainepochs', + type=int, default=72, + help="number of epochs") + arg_parser.add_argument("--epochs_between_evals", dest='epochsbtwevals', + type=int, default=1, + help="number of epochs between eval") + + self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) + + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + benchmark_script = os.path.join( + self.args.intelai_models, self.args.mode, + "mlperf_resnet/imagenet_main.py") + + # We need to change directory to model source to avoid python + # module name conflicts. + #self.benchmark_command = "cd " + self.args.model_source_dir + \ + # "/models && " + self.get_command_prefix(args.socket_id) + \ + # self.python_exe + " " + benchmark_script + + self.benchmark_command = "PYTHONPATH=$PYTHONPATH:" + \ + os.path.join(self.args.intelai_models, self.args.mode) + \ + " " + self.get_command_prefix(args.socket_id) + \ + self.python_exe + " " + benchmark_script + + # Model requires random_seed. Just setting it to a random value. + random_seed = 2 + self.benchmark_command = \ + self.benchmark_command + \ + " " + str(random_seed) + \ + " --batch_size=" + str(self.args.batch_size) + \ + " --max_train_steps=" + str(self.args.steps) + \ + " --train_epochs=" + str(self.args.trainepochs) + \ + " --epochs_between_evals=" + str(self.args.epochsbtwevals) + \ + " --inter_op_parallelism_threads " + str(self.args.num_inter_threads) + \ + " --intra_op_parallelism_threads " + str(self.args.num_intra_threads) + \ + " --version 1 --resnet_size 50" + + # if the data location and checkpoint directory is not empty, then include the arg + if self.args.data_location and os.listdir(self.args.data_location): + self.benchmark_command += " --data_dir=" + \ + self.args.data_location + if self.args.checkpoint: + self.benchmark_command += " --model_dir=" + \ + self.args.checkpoint + + def run(self): + if self.benchmark_command: + self.run_command(self.benchmark_command) diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/model_init.py similarity index 69% rename from benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py rename to benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/model_init.py index 90c883b9a..acc1864b2 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/training/fp32/model_init.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2018 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,11 +18,14 @@ # -from object_detection.tensorflow.ssd_vgg16.inference.ssd_vgg16_model_init import SSDVGG16ModelInitializer +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from ..common_resnet50.resnet50_model_init import ResNet50ModelInitializer -class ModelInitializer(SSDVGG16ModelInitializer): - """Model initializer for SSD-VGG16 Int8 inference""" +class ModelInitializer(ResNet50ModelInitializer): + """Initialize FP32 model and run benchmark""" def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) diff --git a/benchmarks/image_segmentation/__init__.py b/benchmarks/image_segmentation/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/image_segmentation/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/image_segmentation/tensorflow/__init__.py b/benchmarks/image_segmentation/tensorflow/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/image_segmentation/tensorflow/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md deleted file mode 100644 index 0dc3f76f9..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md +++ /dev/null @@ -1,105 +0,0 @@ -## Mask R-CNN ## - -This document has instructions for how to run Mask R-CNN for the -following modes/precisions: -* [FP32 inference](#fp32-inference-instructions) - -Instructions and scripts for model training and inference. - -## FP32 Inference Instructions - -1. Download the [MS COCO 2014 dataset](http://cocodataset.org/#download). - -2. Clone the [Mask R-CNN model repository](https://github.com/matterport/Mask_RCNN). -It is used as external model directory for dependencies. -Clone the `MS COCO API` repository in the Mask_RCNN directory that you just cloned. -you can get the `MS COCO API` from the [MS COCO API fork with fixes for Python3](https://github.com/waleedka/coco), -or from the original [MS COCO API repository](https://github.com/cocodataset/cocoapi) -and use [this pull request for Python3 fixes](https://github.com/cocodataset/cocoapi/pull/50). -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/matterport/Mask_RCNN.git -$ cd Mask_RCNN -$ git checkout 3deaec5d902d16e1daf56b62d5971d428dc920bc - -$ git clone https://github.com/waleedka/coco.git -$ cd coco -$ git checkout f83e9552d4ca57e15a16dca2efe3dcd80693358a -``` - -3. Download pre-trained COCO weights `mask_rcnn_coco.h5)` from the -[Mask R-CNN repository release page](https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5), -and place it in the `MaskRCNN` directory (from step 2) . -``` -$ wget -q https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5 -``` - -4. Clone this [intelai/models](https://github.com/IntelAI/models) -repository: - -``` -$ git clone https://github.com/IntelAI/models.git -``` - -This repository includes launch scripts for running an optimized version of the Mask R-CNN model code. - -5. Navigate to the `benchmarks` directory in your local clone of -the [intelai/models](https://github.com/IntelAI/models) repo from step 4. -The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a model run in a optimized TensorFlow docker -container. It has arguments to specify which model, framework, mode, -precision, and docker image to use, along with your path to the external model directory -for `--model-source-dir` (from step 2) and `--data-location` (from step 1). - - -Run for batch and online inference with `--batch-size=1` : -``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ - --model-source-dir $MODEL_WORK_DIR/Mask_RCNN \ - --model-name maskrcnn \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --batch-size 1 \ - --socket-id 0 \ - --data-location $MODEL_WORK_DIR/COCO2014 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` - -5. Log files are located at the value of `--output-dir`. - -Below is a sample log file tail when running for batch -and online inference: -``` -Running per image evaluation... -Evaluate annotation type *bbox* -DONE (t=0.23s). -Accumulating evaluation results... -DONE (t=0.14s). - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.442 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.612 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.483 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.216 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.474 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.621 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.373 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.461 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.473 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.500 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.654 -Batch size: 1 -Time spent per BATCH: 609.6943 ms -Total samples/sec: 1.6402 samples/s -Total time: 35.407243490219116 -Log location outside container: {--output-dir value}/benchmark_maskrcnn_inference_fp32_20190111_205935.log -``` - -6. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/__init__.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/__init__.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/__init__.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json deleted file mode 100644 index 23d5de76e..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1, - "KMP_HW_SUBSET": "1T" - } -} diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py deleted file mode 100644 index fcfb8dfa1..000000000 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - -import os - - -class ModelInitializer(BaseModelInitializer): - """initialize model and run benchmark""" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - # set num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads() - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - benchmark_script = os.path.join( - self.args.intelai_models, "coco.py") - self.benchmark_command = self.get_command_prefix(args.socket_id) + \ - self.python_exe + " " + benchmark_script + " evaluate " - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - self.benchmark_command = self.benchmark_command + \ - " --dataset=" + str(self.args.data_location) + \ - " --num_inter_threads " + str(self.args.num_inter_threads) + \ - " --num_intra_threads " + str(self.args.num_intra_threads) + \ - " --nw 5 --nb 50 --model=coco" + \ - " --infbs " + str(self.args.batch_size) - - def run(self): - if self.benchmark_command: - self.run_command(self.benchmark_command) diff --git a/benchmarks/language_modeling/__init__.py b/benchmarks/language_modeling/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_modeling/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_modeling/tensorflow/__init__.py b/benchmarks/language_modeling/tensorflow/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_modeling/tensorflow/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md deleted file mode 100644 index 1bc4d0158..000000000 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# LM-1B - -This document has instructions for how to run LM-1B for the -following modes/platforms: -* [FP32 inference](#fp32-inference-instructions) - -Instructions and scripts for model training and inference for -other platforms are coming later. - -## FP32 Inference Instructions - -1. Store the path to the current directory and clone [mlperf/inference](https://github.com/mlperf/inference.git) -with the current SHA from master of the repo on 6/26/2019: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/mlperf/inference.git -$ cd inference -$ git checkout 41eb3e489233e83e544cd25148aca177b95d7bea -``` - -To prepare the checkpoint and dataset, run the `benchmark.py` script -from the mlperf inference repo. Since this requires python3 and -TensorFlow to be installed, the following instructions show how to run -a docker container with your cloned mlperf inference repo mounted as a -volume: -``` -$ docker run --volume $MODEL_WORK_DIR/inference:/inference -it gcr.io/deeplearning-platform-release/tf-cpu.1-15 /bin/bash -``` -In the docker container, run: -``` -$ cd /inference/others/cloud/language_modeling/ -$ python3 benchmark.py -``` - -2. Clone this [intelai/models](https://github.com/IntelAI/models) -repository: - -``` -$ git clone https://github.com/IntelAI/models.git -``` - -3. Next, navigate to the `benchmarks` directory in your local clone of -the [intelai/models](https://github.com/IntelAI/models) repo (from step 2). -The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a model run in a optimized TensorFlow docker -container. It has arguments to specify which model, framework, mode, -precision, and docker image to use, and the checkpoint directory. - -Substitute the `--model-source-dir` to `/inference/cloud/language_modeling`. -Before running, ensure that you have run the script to prepare checkpoint files and the dataset -from Step 1. - -LM-1B can run for online or batch inference. Use one of the following examples below, depending on -your use case. - -For online inference (using `--socket-id 0` and `--batch-size 1`): - -``` -$ python launch_benchmark.py \ - --model-name lm-1b \ - --precision fp32 \ - --mode inference \ - --framework tensorflow \ - --batch-size 1 \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/inference/others/cloud/language_modeling - -``` - -For batch inference (using `--socket-id 0` and `--batch-size 1024`): - -``` -$ python launch_benchmark.py \ - --model-name lm-1b \ - --precision fp32 \ - --mode inference \ - --framework tensorflow \ - --batch-size 1024 \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/inference/others/cloud/language_modeling \ - -- steps=4 \ -``` - -Note that the `--verbose` flag can be added to any of the above commands -to get additional debug output. - -4. By default, the log file is saved to the -`models/benchmarks/common/tensorflow/logs` directory. The user can specify a -different directory using `--output-dir`. - -Example log tail when running for online or batch inference: -``` -Running warmup... -Running benchmark... -Number samples: 4234 -Longest latency was: 2.9153692722320557 seconds. Average latency was:2.891982913017273 -Perplexity: 40.110043230980665, target is 40.209 . -Ran inference with batch size 1024 -``` - -5. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py deleted file mode 100644 index 19ca8e6df..000000000 --- a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import os -from argparse import ArgumentParser - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer(BaseModelInitializer): - """Model initializer for LM-1B FP32 inference""" - - def __init__(self, args, custom_args, platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - self.cmd = self.get_command_prefix(self.args.socket_id) - - self.set_num_inter_intra_threads() - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - if self.args.socket_id != -1: - if self.args.num_cores != -1: - self.cmd += "--physcpubind=0-" + \ - (str(self.args.num_cores - 1)) + " " - self.cmd += self.python_exe + " " - - run_script = os.path.join(self.args.model_source_dir, - "benchmark.py") - - # Model args - arg_parser = ArgumentParser(description='process custom_args') - - arg_parser.add_argument('-S', '--steps', help='Number of steps', - dest="steps", - default="100") - self.args = arg_parser.parse_args(self.custom_args, - namespace=self.args) - - # Model parameter control - cmd_args = " -b=" + str(self.args.batch_size) + \ - " -I=" + str(self.args.steps) + \ - " --inter=" + \ - str(self.args.num_inter_threads) + \ - " --intra=" + \ - str(self.args.num_intra_threads) - - self.cmd = self.cmd + run_script + cmd_args - - def run(self): - original_dir = os.getcwd() - os.chdir(self.args.model_source_dir) - self.run_command(self.cmd) - - os.chdir(original_dir) diff --git a/benchmarks/language_translation/tensorflow/gnmt/README.md b/benchmarks/language_translation/tensorflow/gnmt/README.md deleted file mode 100644 index 0353b20fb..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/README.md +++ /dev/null @@ -1,280 +0,0 @@ -# GNMT - -This document has instructions for how to run GNMT for the -following modes/platforms: -* [FP32 inference](#fp32-inference-instructions) -* [FP32 training](#fp32-training-instructions) - -## FP32 Inference Instructions - -1. Store the path to the current directory and clone this [intelai/models](https://github.com/IntelAI/models) -repository: - -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/IntelAI/models.git -``` - -This repository includes launch scripts for running -an optimized version of the GNMT model code. - -2. Download the pre-trained model. -``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/gnmt_4layer_fp32_pretrained_model.tar.gz -``` - -3. To run GNMT inference, you will WMT16 German-English data. You can -download the dataset using the script provided on nmt github. - -``` -$ git clone https://github.com/tensorflow/nmt.git -Cloning into 'nmt'... -remote: Enumerating objects: 1247, done. -remote: Total 1247 (delta 0), reused 0 (delta 0), pack-reused 1247 -Receiving objects: 100% (1247/1247), 1.23 MiB | 7.72 MiB/s, done. -Resolving deltas: 100% (891/891), done. - -$ nmt/scripts/wmt16_en_de.sh $MODEL_WORK_DIR/wmt16 -``` - -After the script has completed, you should have a directory with the -dataset looks like: - -``` -$ ls $MODEL_WORK_DIR/wmt16/ -bpe.32000 newstest2010.tok.de newstest2012.tok.en newstest2015.de train.de -data newstest2010.tok.en newstest2013.de newstest2015.en train.en -mosesdecoder newstest2011.de newstest2013.en newstest2015.tok.bpe.32000.de train.tok.bpe.32000.de -newstest2009.de newstest2011.en newstest2013.tok.bpe.32000.de newstest2015.tok.bpe.32000.en train.tok.bpe.32000.en -newstest2009.en newstest2011.tok.bpe.32000.de newstest2013.tok.bpe.32000.en newstest2015.tok.de train.tok.clean.bpe.32000.de -newstest2009.tok.bpe.32000.de newstest2011.tok.bpe.32000.en newstest2013.tok.de newstest2015.tok.en train.tok.clean.bpe.32000.en -newstest2009.tok.bpe.32000.en newstest2011.tok.de newstest2013.tok.en newstest2016.de train.tok.clean.de -newstest2009.tok.de newstest2011.tok.en newstest2014.de newstest2016.en train.tok.clean.en -newstest2009.tok.en newstest2012.de newstest2014.en newstest2016.tok.bpe.32000.de train.tok.de -newstest2010.de newstest2012.en newstest2014.tok.bpe.32000.de newstest2016.tok.bpe.32000.en train.tok.en -newstest2010.en newstest2012.tok.bpe.32000.de newstest2014.tok.bpe.32000.en newstest2016.tok.de vocab.bpe.32000 -newstest2010.tok.bpe.32000.de newstest2012.tok.bpe.32000.en newstest2014.tok.de newstest2016.tok.en vocab.bpe.32000.de -newstest2010.tok.bpe.32000.en newstest2012.tok.de newstest2014.tok.en subword-nmt vocab.bpe.32000.en -``` - -4. Next, navigate to the `benchmarks` directory in your local clone of -the [intelai/models](https://github.com/IntelAI/models) repo from step 1. -The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a model run in a optimized TensorFlow docker -container. It has arguments to specify which model, framework, mode, -platform, and docker image to use, along with your path to the dataset -that you generated in step 3. - -Substitute in your own `--data-location` (from step 3), `--checkpoint` pre-trained -model file path (from step 2), and the name/tag for your docker image. - -GNMT can be run for online and batch inference. Use one of -the following examples below, depending on your use case. - -For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): - -``` -$ cd models/benchmarks - -$ python launch_benchmark.py \ ---model-name gnmt \ ---precision fp32 \ ---mode inference \ ---framework tensorflow \ ---benchmark-only \ ---batch-size 1 \ ---socket-id 0 \ ---checkpoint $MODEL_WORK_DIR/gnmt_checkpoints \ ---data-location $MODEL_WORK_DIR/wmt16 \ ---docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ ---infer_mode=beam_search -``` - -For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 32`): - -``` -$ python launch_benchmark.py \ ---model-name gnmt \ ---precision fp32 \ ---mode inference \ ---framework tensorflow \ ---benchmark-only \ ---batch-size 32 \ ---socket-id 0 \ ---checkpoint $MODEL_WORK_DIR/gnmt_checkpoints \ ---data-location $MODEL_WORK_DIR/wmt16 \ ---docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ ---infer_mode=beam_search -``` - -6. The log file is saved to the -`models/benchmarks/common/tensorflow/logs` directory. Below are -examples of what the tail of your log file should look like for the -different configs. - -Example log tail when running for online inference: -``` - dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/basic_lstm_cell/bias:0, (4096,), /device:CPU:0 - dynamic_seq2seq/decoder/output_projection/kernel:0, (1024, 36548), - loaded infer model parameters from /checkpoints/translate.ckpt, time 1.09s -# Start decoding - decoding to output /workspace/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/out_dir/output_infer - done, num sentences 2169, num translations per input 1, time 1108s, Wed Feb 6 01:36:13 2019. - The latency of the model is 511.2466 ms/sentences - bleu: 29.2 -Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_gnmt_inference_fp32_20190206_011740.log -``` - -Example log tail when running for batch inference: -``` - dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/basic_lstm_cell/bias:0, (4096,), /device:CPU:0 - dynamic_seq2seq/decoder/output_projection/kernel:0, (1024, 36548), - loaded infer model parameters from /checkpoints/translate.ckpt, time 1.08s -# Start decoding - decoding to output /workspace/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/out_dir/output_infer - done, num sentences 2169, num translations per input 1, time 302s, Wed Feb 6 01:48:30 2019. - The throughput of the model is 7.1780 sentences/s - bleu: 29.2 -Ran inference with batch size 32 -Log location outside container: {--output-dir value}/benchmark_gnmt_inference_fp32_20190206_014324.log -``` - -Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands -to get additional debug output or change the default output location.. - -7. To return to where you started from: -``` -$ popd -``` - -## FP32 Training Instructions - -These instructions will run training on two CPU sockets on a single physical machine. - -1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: - -``` -$ git clone https://github.com/IntelAI/models.git -``` - -This repository includes launch scripts for running an optimized version of the GNMT model code. - -2. Download and install the [Intel(R) MPI Library for Linux](https://software.intel.com/en-us/mpi-library/choose-download/linux). - -The l_mpi_2019.3.199 and l_mpi_2019.4.243 were verified and suggested. Once you have the l_mpi_2019.3.199.tgz downloaded, -unzip it into /home//l_mpi directory. Make sure to accept the installation license and change the value of "ACCEPT_EULA" -to "accept" in /home//l_mpi/l_mpi_2019.3.199/silent.cfg, before starting the silent installation. The software is -installed by default to "/opt/intel" location in docker. If want to run the training in a container, please keep the default -installation location. - -``` -$ tar -zxvf l_mpi_2019.3.199.tgz -C /home//l_mpi -$ cd /home//l_mpi/l_mpi_2019.3.199 -$ vim silent.cfg -``` - -3. To run GNMT training, use the WMT16 English-German dataset. You can -download the dataset using the script provided on nmt github. - -``` -$git clone https://github.com/tensorflow/nmt.git -Cloning into 'nmt'... -remote: Enumerating objects: 1247, done. -remote: Total 1247 (delta 0), reused 0 (delta 0), pack-reused 1247 -Receiving objects: 100% (1247/1247), 1.23 MiB | 7.72 MiB/s, done. -Resolving deltas: 100% (891/891), done. - -$cd nmt -$git checkout b278487980832417ad8ac701c672b5c3dc7fa553 -$nmt/scripts/wmt16_en_de.sh /home//wmt16 -``` - -After the script has completed, you should have a directory with the -dataset that looks like: - -``` -$ ls /home//wmt16/ -bpe.32000 newstest2010.tok.de newstest2012.tok.en newstest2015.de train.de -data newstest2010.tok.en newstest2013.de newstest2015.en train.en -mosesdecoder newstest2011.de newstest2013.en newstest2015.tok.bpe.32000.de train.tok.bpe.32000.de -newstest2009.de newstest2011.en newstest2013.tok.bpe.32000.de newstest2015.tok.bpe.32000.en train.tok.bpe.32000.en -newstest2009.en newstest2011.tok.bpe.32000.de newstest2013.tok.bpe.32000.en newstest2015.tok.de train.tok.clean.bpe.32000.de -newstest2009.tok.bpe.32000.de newstest2011.tok.bpe.32000.en newstest2013.tok.de newstest2015.tok.en train.tok.clean.bpe.32000.en -newstest2009.tok.bpe.32000.en newstest2011.tok.de newstest2013.tok.en newstest2016.de train.tok.clean.de -newstest2009.tok.de newstest2011.tok.en newstest2014.de newstest2016.en train.tok.clean.en -newstest2009.tok.en newstest2012.de newstest2014.en newstest2016.tok.bpe.32000.de train.tok.de -newstest2010.de newstest2012.en newstest2014.tok.bpe.32000.de newstest2016.tok.bpe.32000.en train.tok.en -newstest2010.en newstest2012.tok.bpe.32000.de newstest2014.tok.bpe.32000.en newstest2016.tok.de vocab.bpe.32000 -newstest2010.tok.bpe.32000.de newstest2012.tok.bpe.32000.en newstest2014.tok.de newstest2016.tok.en vocab.bpe.32000.de -newstest2010.tok.bpe.32000.en newstest2012.tok.de newstest2014.tok.en subword-nmt vocab.bpe.32000.en -``` - -4. Next, navigate to the `benchmarks` directory in your local clone of -the [intelai/models](https://github.com/IntelAI/models) repo from step 1. -The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a model run in a optimized TensorFlow docker -container. It has arguments to specify which model, framework, mode, -platform, and docker image to use, along with your path to the dataset -that you generated in step 3. - -Substitute in your own `--data-location` (from step 3), `--volume` Intel(R) MPI -package path (from step 2),`--num-processes` the number of processes to run on, -`--num-processes-per-node` the number of processes to launch on each node, `--shm-size` -the size of docker /dev/shm and the name/tag for your docker image, `--src` the source -language suffix, `--tgt` the target language suffix, `--vocab_reefix` the vocab prefix, -expect files with src/tgt suffixes, `--train_prefix` the train prefix, expect files -with src/tgt suffixes,`--dev_prefix` the dev prefix, expect files with src/tgt suffixes, -`--test_prefix` the test prefix, expect files with src/tgt suffixes. The value of -docker `--shm-size` needs to be set as 4g bytes at least. - -GNMT can be run in docker. Use one of the following examples below, depending -on your use case. - -``` -python launch_benchmark.py \ - --model-name gnmt \ - --mode training \ - --framework tensorflow \ - --precision fp32 \ - --batch-size 512 \ - --benchmark-only \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --volume /home//l_mpi/l_mpi_2019.3.199:/l_mpi \ - --shm-size 4g \ - --num-processes 2 \ - --num-processes-per-node 1 \ - --num-inter-threads 1 \ - --num-intra-threads 28 \ - --num-train-steps 340000 \ - --output-dir /home//output \ - --data-location /home//wmt16 \ - --num_units=1024 \ - --dropout=0.2 \ - --src=de \ - --tgt=en \ - --vocab_prefix=vocab.bpe.32000 \ - --train_prefix=train.tok.clean.bpe.32000 \ - --dev_prefix=newstest2013.tok.bpe.32000 \ - --test_prefix=newstest2015.tok.bpe.32000 \ - --hparams_path=nmt/standard_hparams/wmt16_gnmt_4_layer_multi_instances.json -``` - -5. The log file is saved to the`models/benchmarks/common/tensorflow/logs` directory. -Below are examples of what the tail of your log file should look like. - -Example log tail when running: -``` - step 100 lr 0.5 step-time 11.82s wps 2.46K ppl 23753.45 gN 202.07 bleu 0.00, Thu Jul 11 22:47:09 2019 rank 1 - step 100 lr 0.5 step-time 11.82s wps 2.46K ppl 23747.96 gN 202.07 bleu 0.00, Thu Jul 11 22:47:09 2019 rank 0 - step 200 lr 0.5 step-time 11.54s wps 2.51K ppl 2316.64 gN 41.70 bleu 0.00, Thu Jul 11 23:06:23 2019 rank 0 - step 200 lr 0.5 step-time 11.54s wps 2.51K ppl 2317.43 gN 41.70 bleu 0.00, Thu Jul 11 23:06:23 2019 rank 1 - step 300 lr 0.0625 step-time 11.52s wps 2.51K ppl 998.43 gN 13.73 bleu 0.00, Thu Jul 11 23:25:35 2019 rank 1 - step 300 lr 0.0625 step-time 11.52s wps 2.51K ppl 999.89 gN 13.73 bleu 0.00, Thu Jul 11 23:25:35 2019 rank 0 -``` - -Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands -to get additional debug output or change the default output location.. - diff --git a/benchmarks/language_translation/tensorflow/gnmt/__init__.py b/benchmarks/language_translation/tensorflow/gnmt/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/__init__.py b/benchmarks/language_translation/tensorflow/gnmt/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/__init__.py b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json deleted file mode 100644 index 4d0e2acf5..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py deleted file mode 100644 index 915cf456f..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py +++ /dev/null @@ -1,128 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import os -from argparse import ArgumentParser - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer(BaseModelInitializer): - """Model initializer for GNMT FP32 inference""" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_command_prefix(self.args.socket_id) - - if self.args.socket_id != -1 and self.args.num_cores != -1: - self.cmd += "--physcpubind=0-" + \ - (str(self.args.num_cores - 1)) + " " - self.cmd += "{} ".format(self.python_exe) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - # use default batch size if -1 - if self.args.batch_size == -1: - self.args.batch_size = 32 - - # set num_inter_threads and num_intra_threads (override inter threads to 2) - self.set_num_inter_intra_threads() - - DATA_DIR = os.path.join(self.args.intelai_models, - self.args.precision, "wmt16") - HPARAM_DIR = os.path.join(self.args.intelai_models, self.args.mode, - self.args.precision, "standard_hparams") - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - if self.args.data_location is None: - data_dir = DATA_DIR - else: - data_dir = self.args.data_location - - arg_parser = ArgumentParser(description="process custom_args") - arg_parser.add_argument( - "-S", "--src", help="source lanuage", - dest="src", default="de") - arg_parser.add_argument( - "-T", "--tgt", help="target lanuage", - dest="tgt", default="en") - arg_parser.add_argument( - "-HPMS", "--hparams_path", help="hparameter files location", - dest="hparams_path", - default=HPARAM_DIR + "/wmt16_gnmt_4_layer_internal.json") - arg_parser.add_argument( - "-VP", "--vocab_prefix", - help="prefix of vocabulary file", - dest="vocab_prefix", - default=data_dir + "/vocab.bpe.32000") - arg_parser.add_argument( - "-IF", "--inference_input_file", - help="inference input file with path", - dest="inference_input_file", - default=data_dir + "/newstest2015.tok.bpe.32000.de") - arg_parser.add_argument( - "-OF", "--inference_output_file", - help="inference output file with path", - dest="inference_output_file", - default=self.args.output_dir + "/output_infer") - arg_parser.add_argument( - "-RF", "--inference_ref_file", - help="inference ref file with path", - dest="inference_ref_file", - default=data_dir + "/newstest2015.tok.bpe.32000.en") - arg_parser.add_argument( - "--infer_mode", type=str, default="greedy", - choices=["greedy", "sample", "beam_search"], - help="Which type of decoder to use during inference.", - dest="infer_mode") - - self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) - - # Model parameter control - - cmd_args = " --src=" + self.args.src + " --tgt=" + self.args.tgt + \ - " --hparams_path=" + self.args.hparams_path + \ - " --out_dir=" + self.args.output_dir + \ - " --vocab_prefix=" + self.args.vocab_prefix + \ - " --ckpt=" + (self.args.checkpoint + "/translate.ckpt") + \ - " --infer_batch_size=" + str(self.args.batch_size) + \ - " --inference_input_file=" + \ - self.args.inference_input_file + \ - " --inference_output_file=" + \ - self.args.inference_output_file + \ - " --inference_ref_file=" + self.args.inference_ref_file + \ - " --num_inter_threads=" + \ - str(self.args.num_inter_threads) + \ - " --num_intra_threads=" + \ - str(self.args.num_intra_threads) + \ - " --infer_mode=" + self.args.infer_mode - - run_script = os.path.join(self.args.intelai_models, self.args.mode, - self.args.precision, "nmt.py") - - self.cmd = self.cmd + run_script + cmd_args - - def run(self): - if self.cmd: - self.run_command(self.cmd) diff --git a/benchmarks/language_translation/tensorflow/gnmt/training/__init__.py b/benchmarks/language_translation/tensorflow/gnmt/training/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/training/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/__init__.py b/benchmarks/language_translation/tensorflow/gnmt/training/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/config.json b/benchmarks/language_translation/tensorflow/gnmt/training/fp32/config.json deleted file mode 100644 index 4d0e2acf5..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/model_init.py b/benchmarks/language_translation/tensorflow/gnmt/training/fp32/model_init.py deleted file mode 100644 index acc6b19f6..000000000 --- a/benchmarks/language_translation/tensorflow/gnmt/training/fp32/model_init.py +++ /dev/null @@ -1,169 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import os -from argparse import ArgumentParser -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer(BaseModelInitializer): - """Model initializer for GNMT FP32 training""" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - # set num_inter_threads and num_intra_threads (override inter threads to 2) - self.set_num_inter_intra_threads() - - DATA_DIR = os.path.join(self.args.intelai_models, self.args.mode, - self.args.precision, "wmt16") - HPARAM_DIR = os.path.join(self.args.intelai_models, self.args.mode, - self.args.precision, "standard_hparams") - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - if self.args.data_location is None: - data_dir = DATA_DIR - else: - data_dir = self.args.data_location - # Train parater control - arg_parser = ArgumentParser(description="process custom_args") - arg_parser.add_argument( - "-s", "--src", help="source lanuage", - dest="src", default="de") - arg_parser.add_argument( - "-t", "--tgt", help="target lanuage", - dest="tgt", default="en") - arg_parser.add_argument( - "-VP", "--vocab_prefix", - help="prefix of vocabulary file", - dest="vocab_prefix", - default=data_dir + "/vocab.bpe.32000") - arg_parser.add_argument( - "-TP", "--train_prefix", - help="prefix of train file", - dest="train_prefix", - default=data_dir + "/train.tok.clean.bpe.32000") - arg_parser.add_argument( - "-DP", "--dev_prefix", - help="prefix of dev file", - dest="dev_prefix", - default=data_dir + "/newstest2013.tok.bpe.32000") - arg_parser.add_argument( - "-TSP", "--test_prefix", - help="prefix of test file", - dest="test_prefix", - default=data_dir + "/newstest2015.tok.bpe.32000") - arg_parser.add_argument( - "-OD", "--output_dir", - help="output directory", - dest="output_dir", - default=self.args.output_dir) - arg_parser.add_argument( - "-NU", "--num_units", - help="number of units", - dest="num_units", - default=1024) - arg_parser.add_argument( - "-DO", "--dropout", - help="dropout", - dest="dropout", - default=0.2) - arg_parser.add_argument( - "-BS", "--batch_size", - help="batch size", - dest="batch_size", - default=512) - arg_parser.add_argument( - "-NP", "--num_processes", - help="number of processes", - dest="num_processes", - default=2) - arg_parser.add_argument( - "-NPPN", "--num_processes_per_node", - help="number of processes per node", - dest="num_processes_per_node", - default=1) - arg_parser.add_argument( - "-NT", "--num_inter_threads", - help="number of inter threads", - dest="num_inter_threads", - default=1) - arg_parser.add_argument( - "-NAT", "--num_intra_threads", - help="number of intra threads", - dest="num_intra_threads", - default=28) - arg_parser.add_argument( - "-NTS", "--num_train_steps", - help="number of train steps", - dest="num_train_steps", - default=340000) - arg_parser.add_argument( - "-HPMS", "--hparams_path", help="hparameter files location", - dest="hparams_path", - default=HPARAM_DIR + "/wmt16_gnmt_4_layer_multi_instances.json") - self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) - - # Model parameter control - - cmd_args = " --src=" + self.args.src + " --tgt=" + self.args.tgt + \ - " --vocab_prefix=" + os.path.join(data_dir, self.args.vocab_prefix) + \ - " --train_prefix=" + os.path.join(data_dir, self.args.train_prefix) + \ - " --dev_prefix=" + os.path.join(data_dir, self.args.dev_prefix) + \ - " --test_prefix=" + os.path.join(data_dir, self.args.test_prefix) + \ - " --out_dir=" + self.args.output_dir + \ - " --num_units=" + str(self.args.num_units) + \ - " --dropout=" + str(self.args.dropout) + \ - " --batch_size=" + str(self.args.batch_size) + \ - " --num_inter_threads=" + \ - str(self.args.num_inter_threads) + \ - " --num_intra_threads=" + \ - str(self.args.num_intra_threads) + \ - " --num_train_steps=" + str(self.args.num_train_steps) + \ - " --hparams_path=" + self.args.hparams_path - - self.run_script_dir = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision, "nmt") - multi_instance_param_list = ["-genv:I_MPI_ASYNC_PROGRESS=1", - "-genv:I_MPI_FABRICS=shm", - "-genv:I_MPI_PIN_DOMAIN=socket", - "-genv:I_MPI_ASYNC_PROGRESS_PIN={},{}".format(0, self.args.num_intra_threads), - "-genv:OMP_NUM_THREADS={}".format(self.args.num_intra_threads)] - self.cmd = self.get_multi_instance_train_prefix(multi_instance_param_list) - self.cmd += "{} ".format(self.python_exe) - run_script = "-m nmt.nmt " - self.cmd = self.cmd + run_script + cmd_args - - def run(self): - if self.cmd: - # The generate.py script expects that we run from the model source - # directory. Save off the current working directory so that we can - # restore it when the script is done. - original_dir = os.getcwd() - os.chdir(self.run_script_dir) - # Run benchmarking - self.run_command(self.cmd) - # Change context back to the original dir - os.chdir(original_dir) diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md b/benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md new file mode 100644 index 000000000..70aef10e9 --- /dev/null +++ b/benchmarks/language_translation/tensorflow/mlperf_gnmt/README.md @@ -0,0 +1,86 @@ +# MLPerf GNMT + +This document has instructions for how to run GNMT for the +following modes/platforms: +* [FP32 inference](#fp32-inference-instructions) + +## FP32 Inference Instruction + +1. Clone the intelai/models repo. +This repo has the launch script for running the model, which we will +use in the next step. +``` +git clone https://github.com/IntelAI/models.git +``` + +2. Download GNMT benchmarking data. +``` +wget https://zenodo.org/record/2531868/files/gnmt_inference_data.zip +unzip gnmt_inference_data.zip +``` + +3. Download the pretrained model: +``` +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mlperf_gnmt_fp32_pretrained_model.pb +``` + +4. Build `tensorflow-addons` `v0.5.2` before you start the next step. +Clone tensorflow-addons repo +``` +pip install intel-tensorflow==2.1.0 +git clone -b v0.5.2 https://github.com/tensorflow/addons.git +cd addons +sed -i 's;\${PYTHON_VERSION:=python} -m pip install $QUIET_FLAG -r $REQUIREMENTS_TXT;PYTHON_VERSION=python;' configure.sh +sh configure.sh +bazel build --enable_runfiles build_pip_pkg +bazel-bin/build_pip_pkg artifacts +pip install artifacts/tensorflow_addons-*.whl +``` + +5. Navigate to the `benchmarks` directory in your local clone of +the [intelai/models](https://github.com/IntelAI/models) repo from step 1. + +Substitute in your own `--data-location` (from step 2), `--checkpoint` pre-trained +model file path (from step 3). + +For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`): +``` +python launch_benchmark.py \ +--model-name mlperf_gnmt \ +--framework tensorflow \ +--precision fp32 \ +--mode inference \ +--batch-size 1 \ +--socket-id 0 \ +--data-location /home//nmt/data \ +--in-graph /home//mlperf_gnmt_fp32_pretrained_model.pb \ +--benchmark-only +``` + +For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 32`): +``` +python launch_benchmark.py \ +--model-name mlperf_gnmt \ +--framework tensorflow \ +--precision fp32 \ +--mode inference \ +--batch-size 32 \ +--socket-id 0 \ +--data-location /home//nmt/data \ +--in-graph /home//mlperf_gnmt_fp32_pretrained_model.pb \ +--benchmark-only +``` + +For accuracy test (using `--accuracy_only`, `--socket-id 0` and `--batch-size 32`): +``` +python launch_benchmark.py \ +--model-name mlperf_gnmt \ +--framework tensorflow \ +--precision fp32 \ +--mode inference \ +--batch-size 32 \ +--socket-id 0 \ +--data-location /home//nmt/data \ +--in-graph /home//mlperf_gnmt_fp32_pretrained_model.pb \ +--accuracy_only +``` diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/__init__.py b/benchmarks/language_translation/tensorflow/mlperf_gnmt/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/__init__.py b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/__init__.py b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/config.json new file mode 100644 index 000000000..c67a8f22a --- /dev/null +++ b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/model_init.py new file mode 100644 index 000000000..1d3cb6311 --- /dev/null +++ b/benchmarks/language_translation/tensorflow/mlperf_gnmt/inference/fp32/model_init.py @@ -0,0 +1,73 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import os +from argparse import ArgumentParser + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for GNMT FP32 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + self.cmd = self.get_command_prefix(self.args.socket_id) + + if self.args.socket_id != -1 and self.args.num_cores != -1: + self.cmd += "--physcpubind=0-" + \ + (str(self.args.num_cores - 1)) + " " + self.cmd += "{} ".format(self.python_exe) + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + # use default batch size if -1 + if self.args.batch_size == -1: + self.args.batch_size = 32 + + # set num_inter_threads and num_intra_threads (override inter threads to 2) + self.set_num_inter_intra_threads() + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + arg_parser = ArgumentParser(description="process custom_args") + self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) + src_vocab_file = os.path.join(self.args.data_location, "vocab.bpe.32000.en") + tgt_vocab_file = os.path.join(self.args.data_location, "vocab.bpe.32000.de") + inference_input_file = os.path.join(self.args.data_location, "newstest2014.tok.bpe.32000.en") + inference_ref_file = os.path.join(self.args.data_location, "newstest2014.tok.bpe.32000.de") + + cmd_args = " --in_graph=" + self.args.input_graph + \ + " --batch_size=" + str(self.args.batch_size) + \ + " --num_inter_threads=" + str(self.args.num_inter_threads) + \ + " --num_intra_threads=" + str(self.args.num_intra_threads) + \ + " --src_vocab_file=" + src_vocab_file + \ + " --tgt_vocab_file=" + tgt_vocab_file + \ + " --inference_input_file=" + inference_input_file + \ + " --inference_ref_file=" + inference_ref_file + + run_script = os.path.join(self.args.intelai_models, + self.args.precision, "run_inference.py") + + self.cmd = self.cmd + run_script + cmd_args + + def run(self): + if self.cmd: + self.run_command(self.cmd) diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md new file mode 100644 index 000000000..df46e175d --- /dev/null +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md @@ -0,0 +1,90 @@ +# Transformer Language Translation (LT) Official + +This document has instructions for how to run Transformer Language official from TensorFlow models +for the following modes/platforms: +* [FP32 inference](#fp32-inference-instructions) + +Instructions and scripts for model inference for other platforms are coming later. + +## FP32 Inference Instructions + +1. Download and extract the frozen graph of the model and necessary data files. + +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/transformer_lt_official_fp32_pretrained_model.tar.gz +$ tar -xzvf transformer_lt_official_fp32_pretrained_model.tar.gz +$ ls -l transformer_lt_official_fp32_pretrained_model/* +transformer_lt_official_fp32_pretrained_model/graph: +total 241540 +-rwx------. 1 user group 247333269 Mar 15 17:29 fp32_graphdef.pb + +transformer_lt_official_fp32_pretrained_model/data: +total 1064 +-rw-r--r--. 1 user group 359898 Feb 20 16:05 newstest2014.en +-rw-r--r--. 1 user group 399406 Feb 20 16:05 newstest2014.de +-rw-r--r--. 1 user group 324025 Mar 15 17:31 vocab.txt +``` + +2. Clone this [intelai/models](https://github.com/IntelAI/models) +repository: + +``` +$ git clone https://github.com/IntelAI/models.git +``` + +3. Next, navigate to the `benchmarks` directory in your local clone of +the [intelai/models](https://github.com/IntelAI/models) repo (from step 3). +The `launch_benchmark.py` script in the `benchmarks` directory is +used for starting a model run in a optimized TensorFlow docker +container. It has arguments to specify which model, framework, mode, +precision, and docker image to use, along with your path to the dataset location (from step 2). + +Transformer LT official can run for online or batch inference. Use one of the following examples below, depending on +your use case. + +For online inference (using `--socket-id 0` and `--batch-size 1`): + +``` +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 1 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + file_out=translate.txt \ + reference=newstest2014.de \ + vocab_file=vocab.txt +``` + +For batch inference (using `--socket-id 0` and `--batch-size 64`): + +``` +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 64 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + file_out=translate.txt \ + reference=newstest2014.de \ + vocab_file=vocab.txt + +``` + +Note that the `--verbose` flag can be added to any of the above commands +to get additional debug output. +The num-inter-threads and num-intra-threads could be set different numbers depends on +the CPU in the system to achieve the best performance. + +4. The log file and default translated results is saved to the `models/benchmarks/common/tensorflow/logs` directory. + diff --git a/benchmarks/content_creation/__init__.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/__init__.py similarity index 100% rename from benchmarks/content_creation/__init__.py rename to benchmarks/language_translation/tensorflow/transformer_lt_official/__init__.py diff --git a/benchmarks/content_creation/tensorflow/__init__.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/__init__.py similarity index 100% rename from benchmarks/content_creation/tensorflow/__init__.py rename to benchmarks/language_translation/tensorflow/transformer_lt_official/inference/__init__.py diff --git a/benchmarks/content_creation/tensorflow/draw/__init__.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/__init__.py similarity index 100% rename from benchmarks/content_creation/tensorflow/draw/__init__.py rename to benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/__init__.py diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json similarity index 100% rename from benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json rename to benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py new file mode 100644 index 000000000..f1d784f75 --- /dev/null +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py @@ -0,0 +1,113 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import os +from argparse import ArgumentParser + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for Transformer LT FP32 inference""" + + def __init__(self, args, custom_args, platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + self.cmd = self.get_command_prefix(self.args.socket_id) + self.bleu_params = "" + + self.set_num_inter_intra_threads() + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + MODEL_EXEC_DIR = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision) + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + if self.args.socket_id != -1: + if self.args.num_cores != -1: + self.cmd += "--physcpubind=0-" + \ + (str(self.args.num_cores - 1)) + " " + self.cmd += self.python_exe + + run_script = os.path.join(MODEL_EXEC_DIR, "infer_ab.py") + + # Model args + arg_parser = ArgumentParser(description='process custom_args') + arg_parser.add_argument('--param_set', + help='hparameter setting', + dest="param_set", + default="big") + arg_parser.add_argument('--vocab_file', + help='input vocable file for translation', + dest="vocab_file", + default="vocab.txt") + arg_parser.add_argument('--in_graph', + help='input fp32 frozen graph file for inference', + dest="fp32_graph", + default="fp32_graphdef.pb") + arg_parser.add_argument('--file', + help='decode input file with path', + dest="decode_from_file", + default="") + arg_parser.add_argument('--file_out', + help='inference output file name', + dest="decode_to_file", + default="translate.txt") + arg_parser.add_argument('--reference', + help='inference ref file with path', + dest="reference", + default="") + + self.args = arg_parser.parse_args(self.custom_args, + namespace=self.args) + + # Model parameter control + translate_file = os.path.join(self.args.output_dir, + self.args.decode_to_file) + cmd_args = " --param_set=" + self.args.param_set + \ + " --in_graph=" + self.args.fp32_graph + \ + " --batch_size=" + \ + (str(self.args.batch_size) + if self.args.batch_size != -1 else "1") + \ + " --file=" + self.args.decode_from_file + \ + " --file_out=" + translate_file + \ + " --vocab_file=" + self.args.vocab_file +\ + " --num_inter=" + str(self.args.num_inter_threads) +\ + " --num_intra=" + str(self.args.num_intra_threads) + + self.bleu_params += " --translation=" + translate_file + \ + " --reference=" + self.args.reference + + self.cmd += " " + run_script + cmd_args + compute_bleu_script = os.path.join(MODEL_EXEC_DIR, "compute_bleu.py") + self.bleucmd = self.python_exe + " " + compute_bleu_script \ + + self.bleu_params + + def run(self): + original_dir = os.getcwd() + #os.chdir(self.args.model_source_dir) + self.run_command(self.cmd) + + # calculate the bleu number after inference is done + os.system(self.bleucmd) + os.chdir(original_dir) diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/requirements.txt b/benchmarks/language_translation/tensorflow/transformer_lt_official/requirements.txt new file mode 100644 index 000000000..b159414b3 --- /dev/null +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/requirements.txt @@ -0,0 +1,2 @@ +Cython +pandas diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py old mode 100644 new mode 100755 index cc2d0a42d..0f98cf04d --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -184,9 +184,6 @@ def get_env_vars(self, benchmark_scripts, use_case, intelai_models): "NUM_INTER_THREADS": args.num_inter_threads, "NUM_INTRA_THREADS": args.num_intra_threads, "DATA_NUM_INTER_THREADS": args.data_num_inter_threads, - "NUM_PROCESSES": args.num_processes, - "NUM_PROCESSES_PER_NODE": args.num_processes_per_node, - "NUM_TRAIN_STEPS": args.num_train_steps, "DATA_NUM_INTRA_THREADS": args.data_num_intra_threads, "BENCHMARK_ONLY": args.benchmark_only, "ACCURACY_ONLY": args.accuracy_only, @@ -194,17 +191,19 @@ def get_env_vars(self, benchmark_scripts, use_case, intelai_models): "DISABLE_TCMALLOC": args.disable_tcmalloc, "TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD": args.tcmalloc_large_alloc_report_threshold, "DOCKER": str(args.docker_image is not None), - "PYTHON_EXE": sys.executable if not args.docker_image else "python" + "PYTHON_EXE": sys.executable if not args.docker_image else "python", + "MPI_NUM_PROCESSES": args.mpi, + "MPI_NUM_PROCESSES_PER_SOCKET": args.num_mpi } # Add custom model args as env vars) - for custom_arg in args.model_args + self.unknown_args: + for custom_arg in args.model_args: if "=" not in custom_arg: raise ValueError("Expected model args in the format " "`name=value` but received: {}". format(custom_arg)) split_arg = custom_arg.split("=") - split_arg[0] = split_arg[0].replace("-", "_").lstrip('_') + split_arg[0] = split_arg[0].replace("-", "_") env_var_dict[split_arg[0]] = split_arg[1] # Set the default value for NOINSTALL, if it's not explicitly set by the user @@ -226,59 +225,18 @@ def run_bare_metal(self, benchmark_scripts, intelai_models, env_var_dict): checkpoint_path = args.checkpoint dataset_path = args.data_location - # To Launch Tensorflow Serving benchmark we need only --in-graph arg. - # It does not support checkpoint files. - if args.framework == "tensorflow_serving": - if args.docker_image: - raise ValueError("--docker-image arg is not supported with tensorflow serving benchmarking, " - "as script automatically builds image and supplies it.") + mount_external_models_source = args.model_source_dir + mount_intelai_models = intelai_models - if checkpoint_path: - raise ValueError("--checkpoint-path arg is not supported with tensorflow serving benchmarking") - - if args.mode != "inference": - raise ValueError("--mode arg should be set to inference") - - if in_graph_path: - env_var_dict["IN_GRAPH"] = in_graph_path - else: - raise ValueError("--in-graph arg is required to run tensorflow serving benchmarking") - - for env_var_name in env_var_dict: - os.environ[env_var_name] = str(env_var_dict[env_var_name]) - - # We need this env to be set for the platform util - os.environ["PYTHON_EXE"] = str(sys.executable if not args.docker_image else "python") - - # Get Platformutil - platform_util_obj = None or platform_util.PlatformUtil(self.args) - - # Configure num_inter_threads and num_intra_threads - base_obj = BaseModelInitializer(args=self.args, custom_args=[], platform_util=platform_util_obj) - base_obj.set_num_inter_intra_threads() - - # Update num_inter_threads and num_intra_threads in env dictionary - env_var_dict["NUM_INTER_THREADS"] = self.args.num_inter_threads - env_var_dict["NUM_INTRA_THREADS"] = self.args.num_intra_threads - - # Set OMP_NUM_THREADS - env_var_dict["OMP_NUM_THREADS"] = self.args.num_intra_threads - - else: - mount_intelai_models = intelai_models - - # Add env vars with bare metal settings - env_var_dict["MOUNT_INTELAI_MODELS_SOURCE"] = mount_intelai_models - - if in_graph_path: - env_var_dict["IN_GRAPH"] = in_graph_path + # Add env vars with bare metal settings + env_var_dict["MOUNT_EXTERNAL_MODELS_SOURCE"] = mount_external_models_source + env_var_dict["MOUNT_INTELAI_MODELS_SOURCE"] = mount_intelai_models - if checkpoint_path: - env_var_dict["CHECKPOINT_DIRECTORY"] = checkpoint_path + if in_graph_path: + env_var_dict["IN_GRAPH"] = in_graph_path - if args.model_source_dir: - mount_external_models_source = args.model_source_dir - env_var_dict["MOUNT_EXTERNAL_MODELS_SOURCE"] = mount_external_models_source + if checkpoint_path: + env_var_dict["CHECKPOINT_DIRECTORY"] = checkpoint_path if dataset_path: env_var_dict["DATASET_LOCATION"] = dataset_path diff --git a/benchmarks/object_detection/__init__.py b/benchmarks/object_detection/__init__.py index fd96ab7af..c4fdb7d61 100644 --- a/benchmarks/object_detection/__init__.py +++ b/benchmarks/object_detection/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2018 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md deleted file mode 100644 index e496796a5..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ /dev/null @@ -1,332 +0,0 @@ -# Faster R-CNN (ResNet50) - -This document has instructions for how to run Faster R-CNN for the -following modes/precisions: -* [FP32 inference](#fp32-inference-instructions) -* [Int8 inference](#int8-inference-instructions) - -Instructions and scripts for the Faster R-CNN ResNet50 model training and inference -for other precisions are coming later. - -## FP32 Inference Instructions - -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the `tensorflow/models` and `cocoapi` repositories: - -``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models -$ git clone https://github.com/cocodataset/cocoapi.git - -``` - -The TensorFlow models repo will be used for running inference as well as -converting the coco dataset to the TF records format. - -3. Download and unzip the 2017 validation -[COCO dataset](http://cocodataset.org/#home) images: - -``` -$ cd $MODEL_WORK_DIR -$ mkdir val -$ cd val -$ wget http://images.cocodataset.org/zips/val2017.zip -$ unzip val2017.zip -``` - -4. Download and unzip the coco dataset annotations file: -``` -$ cd $MODEL_WORK_DIR -$ mkdir annotations -$ cd annotations -$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -$ unzip annotations_trainval2017.zip -``` - -Since we are only using the validation dataset in this example, we will -create an empty directory and empty annotations json file to pass as the -train and test directories in the next step. - -``` -$ cd $MODEL_WORK_DIR -$ mkdir empty_dir - -$ cd annotations -$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR -``` - -5. Now that you have the raw COCO dataset and annotations files, we need to convert it to the -TF records format in order to use it with the inference script. We will -do this by running the `create_coco_tf_record.py` file in the TensorFlow -models repo. - -Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). -Follow the steps below to navigate to the proper directory and point the -script to the raw COCO dataset files that you have downloaded in step 2 -and the annotations files that you downloaded and created in step 3. -The `--output_dir` is the location where the TF record files will be -located after the script has completed. - -``` - -# We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models -$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 - -$ cd research/object_detection/dataset_tools/ -$ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output -total 1598276 --rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record --rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record --rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record -``` - -The `coco_val.record` file is what we will use in this inference example. - -For the accuracy test, a patch is required in the cloned models repo until [this issue](https://github.com/tensorflow/models/issues/5411) -gets fixed in the TensorFlow repository. -Go back to the main models directory and get the specified SHA that we are using for the model, the patch will be applied automatically: -``` -$ cd $MODEL_WORK_DIR/tf_models -$ git checkout 20da786b078c85af57a4c88904f7889139739ab0 -``` - -6. Download and extract the pre-trained model. -``` -$ cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/faster_rcnn_fp32_pretrained_model.pb -``` - -7. Clone the [intelai/models](https://github.com/intelai/models) repo. -This repo has the launch script for running the model. - -``` -$ git clone https://github.com/IntelAI/models.git -Cloning into 'models'... -remote: Enumerating objects: 11, done. -remote: Counting objects: 100% (11/11), done. -remote: Compressing objects: 100% (7/7), done. -remote: Total 11 (delta 3), reused 4 (delta 0), pack-reused 0 -Receiving objects: 100% (11/11), done. -Resolving deltas: 100% (3/3), done. -``` - -8. Run the `launch_benchmark.py` script from the intelai/models repo, -with the appropriate parameters. To run on single socket use `--socket_id` switch, -by default it will be using all available sockets. Optional parameter `steps` -(default value = 5000) can be added at the end of command after `--` as shown below: - -Run batch and online inference using the following command. -The `--data-location` is the path to the directory that contains the raw coco dataset -validation images which you downloaded and unzipped: - -``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/val/val2017 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --model-name faster_rcnn \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/faster_rcnn_fp32_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --benchmark-only \ - -- steps=5000 -``` - -Or for accuracy where the `--data-location` is the path the directory -where your `coco_val.record` file is located and the `--in-graph` is -the pre-trained graph located in the pre-trained model directory (from step 5): -``` -$ python launch_benchmark.py \ - --model-name faster_rcnn \ - --mode inference \ - --precision fp32 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/faster_rcnn_fp32_pretrained_model.pb \ - --accuracy-only -``` - -9. The log file is saved to the value of `--output-dir`. - -Below is a sample log file tail when running for batch -and online inference: - -``` -Time spent : 167.353 seconds. -Time spent per BATCH: 0.167 seconds. -Received these standard args: Namespace(accuracy_only=False, batch_size=1, benchmark_only=False, checkpoint='/checkpoints', data_location='/dataset', framework='tensorflow', input_graph=None, intelai_models='/workspace/intelai_models', mode='inference', model_args=[], model_name='faster_rcnn', model_source_dir='/workspace/models', num_cores=-1, num_inter_threads=2, num_intra_threads=56, precision='fp32', socket_id=0, use_case='object_detection', verbose=True) -Received these custom args: ['--config_file=pipeline.config'] -Run model here. -current directory: /workspace/models/research -Running: numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir /workspace/models/research/object_detection/log/eval -PYTHONPATH: :/workspace/intelai_models:/workspace/models/research:/workspace/models/research/slim:/workspace/models -RUNCMD: python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --data-location=/dataset --socket-id 0 --verbose --checkpoint=/checkpoints --config_file=pipeline.config -Batch Size: 1 -Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference.log -``` - -And here is a sample log file tail when running for accuracy: -``` -DONE (t=1.35s). - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.316 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.489 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.355 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.271 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.380 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.383 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.383 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_fp32_20190114_205714.log -``` - -10. To return to where you started from: -``` -$ popd -``` - -## Int8 Inference Instructions - -These instructions use the TCMalloc memory allocator, which produces -better performance results for Int8 precision models with smaller batch sizes. -If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` -when calling `launch_benchmark.py` and the script will run without TCMalloc. - -1. Please follow the steps from the -[Faster R-CNN FP32 instructions](#fp32-inference-instructions) written -above for cloning dependecy repositories and getting the coco dataset: -* Performance bechmarking uses the raw coco dataset images. Follow steps -1, 2 and 3 from the FP32 instructions. -* Accuracy testing requires the coco dataset to be in the TF records -format. Follow steps 1, 2, 3, 4, and 5 from the FP32 instructions. - -2. Download the pre-trained model. -``` -$ cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/faster_rcnn_int8_pretrained_model.pb -``` - -3. Clone the [intelai/models](https://github.com/intelai/models) repo. -This repo has the launch script for running the model. - -``` -$ git clone https://github.com/IntelAI/models.git -Cloning into 'models'... -remote: Enumerating objects: 11, done. -remote: Counting objects: 100% (11/11), done. -remote: Compressing objects: 100% (7/7), done. -remote: Total 11 (delta 3), reused 4 (delta 0), pack-reused 0 -Receiving objects: 100% (11/11), done. -Resolving deltas: 100% (3/3), done. -``` - -4. Run the `launch_benchmark.py` script from the intelai/models repo, -with the appropriate parameters. To run on single socket use `--socket_id` switch, -by default it will be using all available sockets. Optional parameter `steps` -(default value = 5000) can be added at the end of command after `--` as shown below: - -Run batch and online inference using the following command. -The `--data-location` is the path to the directory that contains the raw coco dataset -validation images which you downloaded and unzipped: - -``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/val/val2017 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --model-name faster_rcnn \ - --framework tensorflow \ - --precision int8 \ - --mode inference \ - --socket-id 0 \ - --in-graph $MODEL_WORK_DIR/faster_rcnn_int8_pretrained_model.pb \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --benchmark-only \ - -- steps=5000 -``` - -Or for accuracy where the `--data-location` is the path the directory -where your `coco_val.record` file is located and the `--in-graph` is -the pre-trained graph model: -``` -$ python launch_benchmark.py \ - --model-name faster_rcnn \ - --mode inference \ - --precision int8 \ - --framework tensorflow \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/faster_rcnn_int8_pretrained_model.pb \ - --accuracy-only -``` - -5. The log file is saved to the value of `--output-dir`. - -Below is a sample log file tail when running for batch -and online inference: - -``` -Step 4950: 0.0722849369049 seconds -Step 4960: 0.0763049125671 seconds -Step 4970: 0.070191860199 seconds -Step 4980: 0.0755469799042 seconds -Step 4990: 0.0742928981781 seconds -Avg. Duration per Step:0.0760930150986 -Ran inference with batch size -1 -Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_int8_20190117_232539.log -``` - -And here is a sample log file tail when running for accuracy: -``` -Accumulating evaluation results... -DONE (t=1.34s). - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.310 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.479 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.351 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.310 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.267 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.372 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.375 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -Ran inference with batch size -1 -Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_int8_20190117_231937.log -``` - -6. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json deleted file mode 100644 index 273b45b40..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py deleted file mode 100644 index 834a3136d..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py +++ /dev/null @@ -1,95 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import argparse -import os -import sys - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer (BaseModelInitializer): - def run_inference_sanity_checks(self, args, custom_args): - if args.batch_size != -1 and args.batch_size != 1: - sys.exit("Faster R-CNN inference supports 'batch-size=1' " + - "only, please modify via the '--batch_size' flag.") - - def __init__(self, args, custom_args, platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - self.research_dir = os.path.join(self.args.model_source_dir, - "research") - self.run_inference_sanity_checks(self.args, self.custom_args) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - # set num_inter_threads and num_intra_threds - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) - omp_num_threads = self.args.num_intra_threads if self.args.num_cores == -1 else self.args.num_cores - set_env_var("OMP_NUM_THREADS", omp_num_threads) - - self.parse_custom_args() - - if self.args.accuracy_only: - accuracy_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "coco_accuracy.sh") - if not os.path.exists(accuracy_script): - raise ValueError("Unable to locate the Faster R-CNN accuracy " - "script: {}".format(accuracy_script)) - self.run_cmd = "sh {} {} {} {}".format( - accuracy_script, self.args.input_graph, - self.args.data_location, self.args.model_source_dir) - else: - benchmark_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "run_frozen_graph_rcnn.py") - self.command_prefix = \ - self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + \ - benchmark_script - - self.run_cmd = \ - self.command_prefix + \ - " --num-inter-threads " + \ - str(self.args.num_inter_threads) + \ - " --num-intra-threads " + \ - str(self.args.num_intra_threads) + \ - " -g " + self.args.input_graph + \ - " -d " + self.args.data_location + \ - " -n " + str(self.args.steps) - - def parse_custom_args(self): - if self.custom_args: - parser = argparse.ArgumentParser() - parser.add_argument("-n", "--steps", - help="Run for n number of steps", - type=int, default=None) - self.args = parser.parse_args(self.custom_args, - namespace=self.args) - - def run(self): - original_dir = os.getcwd() - os.chdir(self.research_dir) - self.run_command(self.run_cmd) - os.chdir(original_dir) diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json deleted file mode 100644 index 6f1228ba7..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 0, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py deleted file mode 100644 index 3d848ca41..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py +++ /dev/null @@ -1,114 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -class ModelInitializer(BaseModelInitializer): - RFCN_PERF_SCRIPT = "run_frozen_graph_rcnn.py" - RFCN_ACCURACY_SCRIPT = "coco_int8.sh" - perf_script_path = "" - accuracy_script_path = "" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - self.perf_script_path = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - self.RFCN_PERF_SCRIPT) - self.accuracy_script_path = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - self.RFCN_ACCURACY_SCRIPT) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - # set num_inter_threads and num_intra_threds - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) - omp_num_threads = self.args.num_intra_threads if self.args.num_cores == -1 else self.args.num_cores - set_env_var("OMP_NUM_THREADS", omp_num_threads) - - self.parse_args() - self.validate_args() - - def validate_args(self): - if not (self.args.batch_size == -1 or self.args.batch_size == 1): - raise ValueError( - "Batch Size specified: {}. faster RCNN inference only supports " - "batch size = 1".format(self.args.batch_size)) - - if not os.path.exists(self.perf_script_path)\ - and self.args.bechmark_only: - raise ValueError("Unable to locate the faster RCNN perf script: {}". - format(self.perf_script_path)) - - if not os.path.exists(self.accuracy_script_path)\ - and self.args.accuracy_only: - raise ValueError("Unable to locate the faster RCNN accuracy script: " - "{}".format(self.accuracy_script_path)) - - if not self.args.model_source_dir or not os.path.isdir( - self.args.model_source_dir): - raise ValueError("Unable to locate TensorFlow models at {}". - format(self.args.model_source_dir)) - - def parse_args(self): - if self.custom_args: - parser = argparse.ArgumentParser() - parser.add_argument("-n", "--steps", - help="Run for n number of steps", - type=int, default=None) - self.args = parser.parse_args(self.custom_args, - namespace=self.args) - - def run_perf_command(self): - command = self.get_command_prefix(self.args.socket_id) - command += " {} ".format(self.python_exe) + self.perf_script_path - command += " -g " + self.args.input_graph - if self.custom_args: - command += " -n " + str(self.args.steps) - if self.args.socket_id != -1: - command += " -x " - command += \ - " -d " + self.args.data_location + \ - " --num-inter-threads " + str(self.args.num_inter_threads) + \ - " --num-intra-threads " + str(self.args.num_intra_threads) - self.run_command(command) - - def run_accuracy_command(self): - command = "{} {} {} {}".format(self.accuracy_script_path, - self.args.input_graph, - self.args.data_location, - self.args.model_source_dir) - self.run_command(command) - - def run(self): - # Run script from the tensorflow models research directory - original_dir = os.getcwd() - os.chdir(os.path.join(self.args.model_source_dir, "research")) - if self.args.accuracy_only: - self.run_accuracy_command() - else: - self.run_perf_command() - os.chdir(original_dir) diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/requirements.txt b/benchmarks/object_detection/tensorflow/faster_rcnn/requirements.txt deleted file mode 100644 index 2d861605e..000000000 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -Cython -contextlib2 -pillow>=6.2.2 -lxml -jupyter -matplotlib -pycocotools -numpy < 1.18.0 diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 7962bcdfa..2db8d9d1b 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -1,7 +1,7 @@ # R-FCN (ResNet101) This document has instructions for how to run R-FCN for the -following modes/precisions: +following FP32 and Int8 modes/precisions: * [Int8 inference](#int8-inference-instructions) * [FP32 inference](#fp32-inference-instructions) @@ -15,38 +15,35 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the [tensorflow/models](https://github.com/tensorflow/models) and [cocodataset/cocoapi](https://github.com/cocodataset/cocoapi) repositories: +1. Clone the [tensorflow/models](https://github.com/tensorflow/models) as `tensorflow-models` and [cocodataset/cocoapi](https://github.com/cocodataset/cocoapi) repositories: ``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models +$ git checkout 6c21084503b27a9ab118e1db25f79957d5ef540b +$ git apply models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch $ git clone https://github.com/cocodataset/cocoapi.git + ``` The TensorFlow models repo will be used for installing dependencies and running inference as well as converting the coco dataset to the TF records format. -3. Download the 2017 validation +2. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` -$ cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip +$ cd .. -$ cd $MODEL_WORK_DIR $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip +$ cd .. ``` Since we are only using the validation dataset in this example, we will @@ -54,15 +51,14 @@ create an empty directory and empty annotations json file to pass as the train and test directories in the next step. ``` -$ cd $MODEL_WORK_DIR $ mkdir empty_dir $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -4. Now that you have the raw COCO dataset, we need to convert it to the +3. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -76,82 +72,77 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home//coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record + +# Go back to the main models directory and get master code +$ cd /home//tensorflow-models +$ git checkout master ``` The `coco_val.record` file is what we will use in this inference example. -For the accuracy test, a patch is required in the cloned models repo until [this issue](https://github.com/tensorflow/models/issues/5411) -gets fixed in the TensorFlow repository. -Go back to the main models directory and get the specified SHA that we are using for the model, the patch will be applied automatically: -``` -$ cd $MODEL_WORK_DIR/tf_models -$ git checkout 20da786b078c85af57a4c88904f7889139739ab0 -``` - -5. Download the pretrained model: +4. Download the pretrained model: + Int8 Graph ``` -$ cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/rfcn_resnet101_int8_coco_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/rfcn_resnet101_int8_coco_pretrained_model.pb ``` -6. Clone the [intelai/models](https://github.com/intelai/models) repo +5. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the scripts for either batch/online inference performance or accuracy. ``` $ git clone https://github.com/IntelAI/models.git +$ cd models/benchmarks ``` Run for batch and online inference where the `--data-location` -is the path to the directory with the raw coco validation images: -``` -$ cd $MODEL_WORK_DIR/models/benchmarks +is the path to the directory with the raw coco validation images and the +`--in-graph` is the Int8 pre-trained graph (from step 4): -$ python launch_benchmark.py \ +``` +python launch_benchmark.py \ --model-name rfcn \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/val/val2017 \ - --in-graph $MODEL_WORK_DIR/rfcn_resnet101_int8_coco_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow-models \ + --data-location /home//val/val2017 \ + --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ --verbose \ --benchmark-only \ - -- steps=500 + -- number_of_steps=500 ``` Or for accuracy where the `--data-location` is the path the directory -where your `coco_val.record-00000-of-00001` file is located: +where your `coco_val.record` file is located: ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name rfcn \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/rfcn_resnet101_int8_coco_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow-models \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ --accuracy-only \ -- split="accuracy_message" ``` @@ -159,7 +150,7 @@ $ python launch_benchmark.py \ Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -7. Log files are located at the value of `--output-dir` (or +6. Log files are located at the value of `--output-dir` (or `models/benchmarks/common/tensorflow/logs` if no path has been specified): Below is a sample log file tail when running for batch @@ -181,64 +172,57 @@ And here is a sample log file tail when running for accuracy: ``` ... Accumulating evaluation results... -DONE (t=1.44s). - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.320 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.497 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.361 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.320 +DONE (t=1.91s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.327 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.506 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.365 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.327 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.267 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.369 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.372 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.372 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.271 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.376 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.380 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.380 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 Ran inference with batch size -1 Log location outside container: {--output-dir}/benchmark_rfcn_inference_int8_20190227_194752.log ``` -8. To return to where you started from: -``` -$ popd -``` - - ## FP32 Inference Instructions -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` +These instructions use the TCMalloc memory allocator, which produces +better performance results for FP32 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. -2. Clone the `tensorflow/models` and `cocoapi` repositories: +1. Clone the [tensorflow/models](https://github.com/tensorflow/models) as `tensorflow-models` and [cocodataset/cocoapi](https://github.com/cocodataset/cocoapi) repositories: ``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models +$ git checkout 6c21084503b27a9ab118e1db25f79957d5ef540b +$ git apply models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch $ git clone https://github.com/cocodataset/cocoapi.git - ``` -The TensorFlow models repo will be used for running inference as well as +The TensorFlow models repo will be used for installing dependencies and running inference as well as converting the coco dataset to the TF records format. -3. Download the 2017 validation +2. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` -$ cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip - -$ cd $MODEL_WORK_DIR +$ cd .. $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip +$ cd .. ``` Since we are only using the validation dataset in this example, we will @@ -246,15 +230,13 @@ create an empty directory and empty annotations json file to pass as the train and test directories in the next step. ``` -$ cd $MODEL_WORK_DIR $ mkdir empty_dir - $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -4. Now that you have the raw COCO dataset, we need to convert it to the +3. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -266,108 +248,91 @@ The `--output_dir` is the location where the TF record files will be located after the script has completed. ``` - # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd tensorflow-models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 - $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" +$ ll /home//coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record +# Go back to the main models directory and get master code +$ cd /home//models +$ git checkout master ``` The `coco_val.record` file is what we will use in this inference example. -For the accuracy test, a patch is required in the cloned models repo until [this issue](https://github.com/tensorflow/models/issues/5411) -gets fixed in the TensorFlow repository. -Go back to the main models directory and get the specified SHA that we are using for the model, the patch will be applied automatically: -``` -$ cd $MODEL_WORK_DIR/tf_models -$ git checkout 20da786b078c85af57a4c88904f7889139739ab0 -``` - -5. Download and extract the pretrained model: +4. Download the pretrained model: + FP32 Graph ``` -cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/rfcn_resnet101_fp32_coco_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/rfcn_resnet101_fp32_coco_pretrained_model.tar.gz +$ tar -xzvf rfcn_resnet101_fp32_coco_pretrained_model.tar.gz ``` -6. Clone the [intelai/models](https://github.com/intelai/models) repo. -This repo has the launch script for running the model. -``` -$ git clone https://github.com/IntelAI/models.git -Cloning into 'models'... -remote: Enumerating objects: 11, done. -remote: Counting objects: 100% (11/11), done. -remote: Compressing objects: 100% (7/7), done. -remote: Total 11 (delta 3), reused 4 (delta 0), pack-reused 0 -Receiving objects: 100% (11/11), done. -Resolving deltas: 100% (3/3), done. -``` +5. Clone the [intelai/models](https://github.com/intelai/models) repo +and then run the scripts for either batch/online inference performance or accuracy. -7. Clone the [intelai/models](https://github.com/intelai/models) repo -and then run the scripts for either batch/online inference performance or accuracy. ``` $ git clone https://github.com/IntelAI/models.git +$ cd models/benchmarks ``` + Run for batch and online inference where the `--data-location` -is the path to the directory with the raw coco validation images: -``` -$ cd $MODEL_WORK_DIR/models/benchmarks +is the path to the directory with the raw coco validation images and the +`--in-graph` is the FP32 pre-trained graph (from step 4): -$ python launch_benchmark.py \ +``` +python launch_benchmark.py \ --model-name rfcn \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/val/val2017 \ - --in-graph $MODEL_WORK_DIR/rfcn_resnet101_fp32_coco_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow-models \ + --data-location /home//val/val2017 \ + --in-graph /home//rfcn_resnet101_fp32_coco_pretrained_model \ --verbose \ --benchmark-only \ - -- steps=500 + -- number_of_steps=500 ``` Or for accuracy where the `--data-location` is the path the directory where your `coco_val.record` file is located and the `--in-graph` is the pre-trained graph located in the pre-trained model directory (from step 4): ``` -$ cd $MODEL_WORK_DIR/models/benchmarks - -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name rfcn \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/rfcn_resnet101_fp32_coco_pretrained_model.pb \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --model-source-dir /home//tensorflow-models \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//rfcn_resnet101_fp32_coco_pretrained_model.pb \ --accuracy-only \ -- split="accuracy_message" ``` -8. Log files are located at the value of `--output-dir` (or -`models/benchmarks/common/tensorflow/logs` if no path has been specified): +Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. -Below is a sample log file tail when running for batch and -online inference: +6. Log files are located at the value of `--output-dir` (or +`models/benchmarks/common/tensorflow/logs` if no path has been specified): +Below is a sample log file tail when running for batch +and online inference: ``` Average time per step: 0.262 sec Received these standard args: Namespace(accuracy_only=False, batch_size=1, benchmark_only=False, checkpoint='/checkpoints', data_location='/dataset', framework='tensorflow', input_graph=None, intelai_models='/workspace/intelai_models', mode='inference', model_args=[], model_name='rfcn', model_source_dir='/workspace/models', num_cores=-1, num_inter_threads=2, num_intra_threads=56, precision='fp32, socket_id=0, use_case='object_detection', verbose=True) @@ -399,9 +364,4 @@ DONE (t=1.19s). Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_rfcn_inference_fp32_20181221_211905.log -``` - -9. To return to where you started from: -``` -$ popd ``` \ No newline at end of file diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json index d7f51a4c2..6f1228ba7 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json @@ -1,5 +1,6 @@ { "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", "KMP_BLOCKTIME": 0, "KMP_SETTINGS": 1 } diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py index b2d22b136..703088482 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py @@ -29,9 +29,6 @@ class ModelInitializer(BaseModelInitializer): accuracy_script = "coco_mAP.sh" accuracy_script_path = "" - benchmark_script = "run_rfcn_inference.py" - perf_script_path = "" - command = [] def run_inference_sanity_checks(self, args, custom_args): if args.batch_size != -1 and args.batch_size != 1: @@ -44,87 +41,105 @@ def __init__(self, args, custom_args, platform_util): self.accuracy_script_path = os.path.join( self.args.intelai_models, self.args.mode, self.args.precision, self.accuracy_script) - self.perf_script_path = os.path.join( + self.benchmark_script = os.path.join( self.args.intelai_models, self.args.mode, - self.args.precision, self.benchmark_script) + self.args.precision, "run_rfcn_inference.py") # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - # set num_inter_threads and num_intra_threds - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) - self.omp_num_threads = self.args.num_intra_threads if self.args.num_cores == -1 else self.args.num_cores - set_env_var("OMP_NUM_THREADS", self.omp_num_threads) + # Set num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() - self.parse_custom_args() self.run_inference_sanity_checks(self.args, self.custom_args) + self.parse_custom_args() self.research_dir = os.path.join(self.args.model_source_dir, "research") - def run_benchmark(self): - # Get the command previx, but numactl is added later - self.command.append(self.get_command_prefix(self.args.socket_id, numactl=False)) + def parse_custom_args(self): + if self.custom_args: + parser = argparse.ArgumentParser() + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument("-x", "--number_of_steps", + help="Run for n number of steps", + type=int, default=None) + mutex_group.add_argument( + "-v", "--visualize", + help="Whether to visualize the output image", + action="store_true") + parser.add_argument("-q", "--split", + help="Location of accuracy data", + type=str, default=None) + self.args = parser.parse_args(self.custom_args, namespace=self.args) + else: + raise ValueError("Custom parameters are missing...") + + def run_perf_command(self): + # Get the command previx, but numactl is added later in run_perf_command() + command = [] + num_cores = str(self.platform_util.num_cores_per_socket) + if self.args.num_cores != -1: + num_cores = str(self.args.num_cores) + + set_env_var("OMP_NUM_THREADS", num_cores) if self.args.socket_id != -1: - self.command.append("numactl") + command.append("numactl") if self.args.socket_id: socket_id = self.args.socket_id else: socket_id = "0" if self.args.num_cores != -1: - self.command.append("-C") - self.command.append("+0") + command.append("-C") + command.append("+0") i = 1 while i < self.args.num_cores: - self.command.append(",{}".format(i)) + command.append(",{}".format(i)) i += i - self.command.append("-N") - self.command.append("{}".format(socket_id)) - self.command.append("-m") - self.command.append("{}".format(socket_id)) - - self.command += (self.python_exe, self.perf_script_path) - self.command += ("-m", self.args.model_source_dir) - self.command += ("-g", self.args.input_graph) - self.command += ("--num-intra-threads", str(self.args.num_intra_threads)) - self.command += ("--num-inter-threads", str(self.args.num_inter_threads)) - if self.args.steps: - self.command += ("-x", "{}".format(self.args.steps)) + command.append("-N") + command.append("{}".format(socket_id)) + command.append("-m") + command.append("{}".format(socket_id)) + + command += (self.python_exe, self.benchmark_script) + command += ("-m", self.args.model_source_dir) + command += ("-g", self.args.input_graph) + command += ("--num-intra-threads", str(self.args.num_intra_threads)) + command += ("--num-inter-threads", str(self.args.num_inter_threads)) + if self.args.number_of_steps: + command += ("-x", "{}".format(self.args.number_of_steps)) + if self.args.visualize: + command += ("-v") if self.args.data_location: - self.command += ("-d", self.args.data_location) - self.run_command(" ".join(self.command)) - - def parse_custom_args(self): - if self.custom_args: - parser = argparse.ArgumentParser() - parser.add_argument("-q", "--split", - help="Location of accuracy data", - type=str, default=None) - parser.add_argument("-x", "--steps", - help="Run for n number of steps", - type=int, default=None) - - self.args = parser.parse_args(self.custom_args, - namespace=self.args) + command += ("-d", self.args.data_location) + self.run_command(" ".join(command)) def run_accuracy_command(self): if not os.path.exists(self.accuracy_script_path): - raise ValueError("Unable to locate the R-FCN accuracy script: {}".format(self.accuracy_script_path)) - if not self.args.data_location or not os.path.exists(self.args.data_location): - raise ValueError("Unable to locate the coco data record file at {}".format(self.args.tf_record_file)) - if not self.args.split: + raise ValueError("Unable to locate the R-FCN accuracy script: " + "{}".format(self.accuracy_script_path)) + command = "FROZEN_GRAPH=" + self.args.input_graph + + if self.args.data_location and os.path.exists( + self.args.data_location): + command += " TF_RECORD_FILE=" + self.args.data_location + else: + raise ValueError( + "Unable to locate the coco data record file at {}".format( + self.args.tf_record_file)) + + if self.args.split: + command += " SPLIT=" + self.args.split + else: raise ValueError("Must specify SPLIT parameter") - command = self.get_command_prefix(self.args.socket_id, numactl=False) - command += " {} {} {} {} {}".format(self.accuracy_script_path, - self.args.input_graph, - self.args.data_location, - self.args.model_source_dir, - self.args.split) + command += " TF_MODELS_ROOT={}".format( + self.args.model_source_dir) + + command += " " + self.accuracy_script_path self.run_command(command) def run(self): @@ -133,5 +148,5 @@ def run(self): if self.args.accuracy_only: self.run_accuracy_command() else: - self.run_benchmark() + self.run_perf_command() os.chdir(original_dir) diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/__init__.py b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/__init__.py index 8cb0c8d8d..c4fdb7d61 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/__init__.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/__init__.py @@ -15,3 +15,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +# diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py index d2c78ffb8..b94f966d2 100755 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# from __future__ import absolute_import from __future__ import division @@ -50,23 +51,20 @@ def __init__(self, args, custom_args=[], platform_util=None): if self.args.intelai_models in sys.path: sys.path.remove(self.args.intelai_models) + self.parse_args() + # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - # set num_inter_threads and num_intra_threds - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) - omp_num_threads = self.args.num_intra_threads if self.args.num_cores == -1 else self.args.num_cores - set_env_var("OMP_NUM_THREADS", omp_num_threads) - - self.parse_args() + # Set num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() def parse_args(self): if self.custom_args: parser = argparse.ArgumentParser() mutex_group = parser.add_mutually_exclusive_group() - mutex_group.add_argument("-x", "--steps", + mutex_group.add_argument("-x", "--number_of_steps", help="Run for n number of steps", type=int, default=None) mutex_group.add_argument( @@ -114,6 +112,11 @@ def validate_args(self): def run_perf_command(self): # Get the command previx, but numactl is added later in run_perf_command() self.command.append(self.get_command_prefix(self.args.socket_id, numactl=False)) + num_cores = str(self.platform_util.num_cores_per_socket) + if self.args.num_cores != -1: + num_cores = str(self.args.num_cores) + + set_env_var("OMP_NUM_THREADS", num_cores) if self.args.socket_id != -1: self.command.append("numactl") @@ -140,10 +143,10 @@ def run_perf_command(self): self.command += ("-g", self.args.input_graph) self.command += ("--num-intra-threads", str(self.args.num_intra_threads)) self.command += ("--num-inter-threads", str(self.args.num_inter_threads)) - if self.args.steps: - self.command += ("-x", "{}".format(self.args.steps)) + if self.args.number_of_steps: + self.command += ("-x", "{}".format(self.args.number_of_steps)) if self.args.visualize: - self.command += "-v" + self.command += ("-v") if self.args.timeline: self.command += ("-t", self.args.timeline) if self.args.data_location: @@ -151,24 +154,31 @@ def run_perf_command(self): if self.args.evaluate_tensor: self.command += ("-e", self.args.evaluate_tensor) if self.args.print_accuracy: - self.command += "-p" + self.command += ("-p") self.run_command(" ".join(self.command)) def run_accuracy_command(self): # already validated by parent self.command = self.get_command_prefix(self.args.socket_id, numactl=False) + self.command += "FROZEN_GRAPH=" + self.args.input_graph - if not self.args.data_location or not os.path.exists(self.args.data_location): - raise ValueError("Unable to locate the coco data record file at {}".format(self.args.data_location)) + if self.args.data_location and os.path.exists( + self.args.data_location): + self.command += " TF_RECORD_FILE=" + self.args.data_location + else: + raise ValueError( + "Unable to locate the coco data record file at {}".format( + self.args.tf_record_file)) - if not self.args.split: + if self.args.split: + self.command += " SPLIT=" + self.args.split + else: raise ValueError("Must specify SPLIT parameter") - self.command += " {} {} {} {} {}".format(self.accuracy_script_path, - self.args.input_graph, - self.args.data_location, - self.args.model_source_dir, - self.args.split) + self.command += " TF_MODELS_ROOT={}".format( + self.args.model_source_dir) + + self.command += " " + self.accuracy_script_path self.run_command(self.command) def run(self): diff --git a/benchmarks/object_detection/tensorflow/rfcn/requirements.txt b/benchmarks/object_detection/tensorflow/rfcn/requirements.txt index 2d861605e..a698abc6d 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/requirements.txt +++ b/benchmarks/object_detection/tensorflow/rfcn/requirements.txt @@ -1,3 +1,4 @@ +numpy==1.17.4 Cython contextlib2 pillow>=6.2.2 @@ -5,4 +6,3 @@ lxml jupyter matplotlib pycocotools -numpy < 1.18.0 diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index 298cb10f1..06d89dbde 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -15,58 +15,54 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Store the path to the current directory: +1. Clone the [tensorflow/models](https://github.com/tensorflow/models) +repository as `tensorflow-models` at the specified SHA and clone the +[cocoapi repo](git clone https://github.com/cocodataset/cocoapi.git) in +the models directory: ``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the [tensorflow/models](https://github.com/tensorflow/models) and [cocodataset/cocoapi](https://github.com/cocodataset/cocoapi) repositories: - -``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models +$ git checkout 20da786b078c85af57a4c88904f7889139739ab0 $ git clone https://github.com/cocodataset/cocoapi.git ``` The TensorFlow models repo will be used for running inference as well as converting the coco dataset to the TF records format. -3. Download the 2017 validation +2. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: - ``` -$ cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip +$ cd .. +``` -$ cd $MODEL_WORK_DIR +If you would like to run the model for batch and online inference, the +validation dataset is all that you will need. If you would like to get +accuracy metrics, then continue the instructions below to generate the +TF record file as well. +``` $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip +$ cd .. ``` -If you would like to run the model for batch and online inference, the -validation dataset is all that you will need. If you would like to get -accuracy metrics, then continue the instructions below to generate the -TF record file as well. Otherwise, skip to Step 5. Since we are only using the validation dataset in this example, we will create an empty directory and empty annotations json file to pass as the train and test directories in the next step. - ``` -$ cd $MODEL_WORK_DIR $ mkdir empty_dir $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -4. Now that you have the raw COCO dataset, we need to convert it to the -TF records format in order to use it with the inference script. We will +3. Now that you have the raw COCO dataset, we need to convert it to the +TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -77,60 +73,59 @@ The `--output_dir` is the location where the TF record files will be located after the script has completed. ``` - # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd tensorflow-models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home/myuser/coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record # Go back to the main models directory and get the specified SHA that we are using for SSD-MobileNet -$ cd $MODEL_WORK_DIR/tf_models +$ cd /home//tensorflow-models $ git checkout 20da786b078c85af57a4c88904f7889139739ab0 ``` -5. Download the pretrained model: +4. Download the pretrained model: ``` -$ cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssdmobilenet_int8_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/ssdmobilenet_int8_pretrained_model.pb ``` -6. Clone the [intelai/models](https://github.com/intelai/models) repo +5. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the scripts for either batch/online inference performance or accuracy. ``` -$ git clone https://github.com/IntelAI/models.git -$ cd $MODEL_WORK_DIR/models/benchmarks +$ git clone git@github.com:IntelAI/models.git +$ cd benchmarks ``` Run for batch and online inference where the `--data-location` -is the path to the directory with the unzipped coco validation images: +is the path to the tf record file that you generated in step 2: ``` -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name ssd-mobilenet \ --mode inference \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/val/val2017 \ - --in-graph $MODEL_WORK_DIR/ssdmobilenet_int8_pretrained_model.pb \ + --num-intra-threads 28 \ + --num-inter-threads 1 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0:latest \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ --benchmark-only \ --batch-size 1 ``` @@ -138,16 +133,17 @@ $ python launch_benchmark.py \ Or for accuracy where the `--data-location` is the path to the tf record file that you generated in step 2: ``` -$ python launch_benchmark.py \ +python launch_benchmark.py \ --model-name ssd-mobilenet \ --mode inference \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/ssdmobilenet_int8_pretrained_model.pb \ + --num-intra-threads 28 \ + --num-inter-threads 1 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0:latest \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ --accuracy-only \ --batch-size 1 ``` @@ -155,7 +151,7 @@ $ python launch_benchmark.py \ Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -7. The log file is saved to the value of `--output-dir`. +6. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running for batch and online inference: @@ -194,26 +190,15 @@ Ran inference with batch size 1 Log location outside container: /benchmark_ssd-mobilenet_inference_int8_20181204_185432.log ``` -8. To return to where you started from: -``` -$ popd -``` - ## FP32 Inference Instructions -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the `tensorflow/models` repository with the specified SHA, +1. Clone the `tensorflow/models` repository as `tensorflow-models` with the specified SHA, since we are using an older version of the models repo for SSD-MobileNet. ``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models $ git checkout 20da786b078c85af57a4c88904f7889139739ab0 $ git clone https://github.com/cocodataset/cocoapi.git ``` @@ -221,26 +206,25 @@ $ git clone https://github.com/cocodataset/cocoapi.git The TensorFlow models repo will be used for running inference as well as converting the coco dataset to the TF records format. -3. Follow the TensorFlow models object detection +2. Follow the TensorFlow models object detection [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#installation) to get your environment setup with the required dependencies. -4. Download the 2017 validation +3. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` -$ cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip +$ cd .. -$ cd $MODEL_WORK_DIR $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip -$ cd $MODEL_WORK_DIR +$ cd .. ``` Since we are only using the validation dataset in this example, we will @@ -252,10 +236,10 @@ $ mkdir empty_dir $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -5. Now that you have the raw COCO dataset, we need to convert it to the +4. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -269,33 +253,33 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd tensorflow-models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home//coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record # Go back to the main models directory and checkout the SHA that we are using for SSD-MobileNet -$ cd $MODEL_WORK_DIR/tf_models +$ cd /home//tensorflow-models $ git checkout 20da786b078c85af57a4c88904f7889139739ab0 ``` The `coco_val.record` file is what we will use in this inference example. -6. Download and extract the pre-trained SSD-MobileNet model from the +5. Download and extract the pre-trained SSD-MobileNet model from the [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#coco-trained-models). The downloaded .tar file includes a `frozen_inference_graph.pb` which we will be using when running inference. @@ -328,12 +312,11 @@ total 58132 drwxr-sr-x. 3 4096 Feb 1 2018 saved_model ``` -7. Clone the [intelai/models](https://github.com/intelai/models) repo. +6. Clone the [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running the model, which we will use in the next step. ``` -$ cd $MODEL_WORK_DIR $ git clone https://github.com/IntelAI/models.git Cloning into 'models'... remote: Enumerating objects: 11, done. @@ -344,7 +327,7 @@ Receiving objects: 100% (11/11), done. Resolving deltas: 100% (3/3), done. ``` -8. Next, navigate to the `benchmarks` directory of the +7. Next, navigate to the `benchmarks` directory of the [intelai/models](https://github.com/intelai/models) repo that was just cloned in the previous step. SSD-MobileNet can be run for testing batch and online inference, or testing accuracy. Note that we are running @@ -357,18 +340,19 @@ downloaded in step 5 as the `--in-graph`, and use the `--benchmark-only` flag: ``` -$ cd $MODEL_WORK_DIR/models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb \ --model-name ssd-mobilenet \ --framework tensorflow \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --num-intra-threads 28 \ + --num-inter-threads 1 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0:latest \ --benchmark-only ``` @@ -379,19 +363,20 @@ the path to the frozen graph that you downloaded in step 5 as the ``` $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/output/coco_val.record \ - --in-graph $MODEL_WORK_DIR/ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --data-location /home//coco/output/coco_val.record \ + --in-graph /home//ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb \ --model-name ssd-mobilenet \ --framework tensorflow \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --num-intra-threads 28 \ + --num-inter-threads 1 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0:latest \ --accuracy-only ``` -9. The log file is saved to the value of `--output-dir`. +8. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running for performance: @@ -421,8 +406,3 @@ Below is a sample log file tail when testing accuracy: Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190123_225145.log ``` - -10. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py deleted file mode 100644 index e1e22eb93..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py +++ /dev/null @@ -1,137 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Infers detections on a TFRecord of TFExamples given an inference graph. - -Example usage: - ./infer_detections \ - --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \ - --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \ - --inference_graph=/path/to/frozen_weights_inference_graph.pb - -The output is a TFRecord of TFExamples. Each TFExample from the input is first -augmented with detections from the inference graph and then copied to the -output. - -The input and output nodes of the inference graph are expected to have the same -types, shapes, and semantics, as the input and output nodes of graphs produced -by export_inference_graph.py, when run with --input_type=image_tensor. - -The script can also discard the image pixels in the output. This greatly -reduces the output size and can potentially accelerate reading data in -subsequent processing steps that don't require the images (e.g. computing -metrics). -""" - -import itertools -import tensorflow as tf -from object_detection.inference import detection_inference -import numpy as np -import time - - -tf.flags.DEFINE_string('input_tfrecord_paths', None, - 'A comma separated list of paths to input TFRecords.') -tf.flags.DEFINE_string('output_tfrecord_path', None, - 'Path to the output TFRecord.') -tf.flags.DEFINE_string('inference_graph', None, - 'Path to the inference graph with embedded weights.') -tf.flags.DEFINE_boolean('discard_image_pixels', False, - 'Discards the images in the output TFExamples. This' - ' significantly reduces the output size and is useful' - ' if the subsequent tools don\'t need access to the' - ' images (e.g. when computing evaluation measures).') -tf.flags.DEFINE_integer('num_inter_threads', None, - 'Number of inter op threads') -tf.flags.DEFINE_integer('num_intra_threads', None, - 'Number of intra op threads') - -FLAGS = tf.flags.FLAGS - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - required_flags = ['input_tfrecord_paths', 'output_tfrecord_path', - 'inference_graph', 'num_inter_threads', - 'num_intra_threads'] - for flag_name in required_flags: - if not getattr(FLAGS, flag_name): - raise ValueError('Flag --{} is required'.format(flag_name)) - - with tf.Session(config=tf.ConfigProto( - inter_op_parallelism_threads=FLAGS.num_inter_threads, - intra_op_parallelism_threads=FLAGS.num_intra_threads)) as sess: - input_tfrecord_paths = [ - v for v in FLAGS.input_tfrecord_paths.split(',') if v] - tf.logging.info('Reading input from %d files', len(input_tfrecord_paths)) - serialized_example_tensor, image_tensor = detection_inference.build_input( - input_tfrecord_paths) - tf.logging.info('Reading graph and building model...') - (detected_boxes_tensor, detected_scores_tensor, - detected_labels_tensor) = detection_inference.build_inference_graph( - image_tensor, FLAGS.inference_graph) - - tf.logging.info('Running inference and writing output to {}'.format( - FLAGS.output_tfrecord_path)) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - - latency = [] - with tf.python_io.TFRecordWriter( - FLAGS.output_tfrecord_path) as tf_record_writer: - try: - for counter in itertools.count(): - tf.logging.log_every_n( - tf.logging.INFO, - 'Processed %d images... moving average latency %d ms', - 200, counter + 1, np.mean(latency[-200:])) - start = time.time() - tf_example = detection_inference.\ - infer_detections_and_add_to_example( - serialized_example_tensor, detected_boxes_tensor, - detected_scores_tensor, detected_labels_tensor, - FLAGS.discard_image_pixels) - duration = time.time() - start - latency.append(duration * 1000) - tf_record_writer.write(tf_example.SerializeToString()) - except tf.errors.OutOfRangeError: - tf.logging.info('Finished processing records') - latency = np.array(latency) - print("Latency: min = {:.1f}, max = {:.1f}, mean= {:.1f}, median " - "= {:.1f}".format(latency.min(), latency.max(), latency.mean(), - np.median(latency))) - - -if __name__ == '__main__': - tf.app.run() diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py index 4fa9fa85e..66ebd4f29 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,69 +21,42 @@ import os import sys -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var +from common.base_model_init import BaseModelInitializer, set_env_var class ModelInitializer(BaseModelInitializer): - def run_inference_sanity_checks(self, args, custom_args): - if not args.input_graph: - sys.exit("Please provide a path to the frozen graph directory" - " via the '--in-graph' flag.") - if not args.data_location: - sys.exit("Please provide a path to the data directory via the " - "'--data-location' flag.") - if args.socket_id == -1 and args.num_cores == -1: - print("***Warning***: Running inference on all cores could degrade" - " performance. Pass a '--socket-id' to specify running on a" - " single socket instead.\n") + # SSD-MobileNet Int8 inference model initialization + args = None + custom_args = [] - def __init__(self, args, custom_args, platform_util): + def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.run_inference_sanity_checks(self.args, self.custom_args) - self.research_dir = os.path.join(args.model_source_dir, "research") - # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - # set num_inter_threads and num_intra_threads (override inter threads to 2) - self.set_num_inter_intra_threads(num_inter_threads=2) + benchmark_script = os.path.join(self.args.intelai_models, self.args.mode, + self.args.precision, "infer_detections.py") + self.command_prefix = self.get_command_prefix(self.args.socket_id) + \ + "{} {}".format(self.python_exe, benchmark_script) set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - if self.args.accuracy_only: - # get accuracy test command - script_path = os.path.join( - self.args.benchmark_dir, self.args.use_case, - self.args.framework, self.args.model_name, self.args.mode, - "ssdmobilenet_accuracy.sh") - self.run_cmd = "sh {} {} {}".format( - script_path, self.args.input_graph, self.args.data_location) - elif self.args.benchmark_only: - # get benchmark command - benchmark_script = os.path.join( - self.args.benchmark_dir, self.args.use_case, - self.args.framework, self.args.model_name, self.args.mode, - self.args.precision, "infer_detections.py") - - # get command with numactl - self.run_cmd = self.get_command_prefix( - self.args.socket_id) + "{} {}".format(self.python_exe, benchmark_script) + self.command_prefix += " -g {0}".format(self.args.input_graph) + self.command_prefix += " -i 1000" + self.command_prefix += " -w 200" + self.command_prefix += " -a {0}".format(self.args.num_intra_threads) + self.command_prefix += " -e {0}".format(self.args.num_inter_threads) + if self.args.data_location: + self.command_prefix += " -d {0}".format(self.args.data_location) - output_tf_record_path = os.path.join(os.path.dirname( - self.args.data_location), "SSD-mobilenet-out.tfrecord") - - self.run_cmd += " --input_tfrecord_paths={} " \ - "--output_tfrecord_path={} --inference_graph={} " \ - "--discard_image_pixels=True " \ - "--num_inter_threads={} --num_intra_threads={}".\ - format(self.args.data_location, output_tf_record_path, - self.args.input_graph, self.args.num_inter_threads, - self.args.num_intra_threads) + if self.args.accuracy_only: + self.command_prefix += " -r" + assert self.args.data_location, "accuracy must provide the data." + else: + # Did not support multi-batch accuracy check. + self.command_prefix += " -b {0}".format(self.args.batch_size) def run(self): - original_dir = os.getcwd() - os.chdir(self.research_dir) - self.run_command(self.run_cmd) - os.chdir(original_dir) + # Run script from the tensorflow models research directory + self.run_command(self.command_prefix) \ No newline at end of file diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py index 42cc05d8b..66ebd4f29 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py @@ -36,44 +36,27 @@ def __init__(self, args, custom_args=[], platform_util=None): config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - # set num_inter_threads and num_intra_threads (override inter threads to 2) - self.set_num_inter_intra_threads(num_inter_threads=2) - - # remove intelai models path, so that imports don't conflict - if "MOUNT_BENCHMARK" in os.environ and \ - os.environ["MOUNT_BENCHMARK"] in sys.path: - sys.path.remove(os.environ["MOUNT_BENCHMARK"]) - if self.args.intelai_models in sys.path: - sys.path.remove(self.args.intelai_models) - threads_per_socket = platform_util.num_cores_per_socket * \ - platform_util.num_threads_per_core - - if self.args.benchmark_only: - benchmark_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "run_frozen_graph_ssdmob.py") - self.command_prefix = self.get_command_prefix(self.args.socket_id) + \ - "{} {}".format(self.python_exe, benchmark_script) - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - self.command_prefix = "{} -g {} -n 5000 -d {} --num-inter-threads {} --num-intra-threads {}".format( - self.command_prefix, self.args.input_graph, self.args.data_location, - self.args.num_inter_threads, self.args.num_intra_threads) - - if self.args.socket_id != -1: - self.command_prefix += " -x" + benchmark_script = os.path.join(self.args.intelai_models, self.args.mode, + self.args.precision, "infer_detections.py") + self.command_prefix = self.get_command_prefix(self.args.socket_id) + \ + "{} {}".format(self.python_exe, benchmark_script) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + self.command_prefix += " -g {0}".format(self.args.input_graph) + self.command_prefix += " -i 1000" + self.command_prefix += " -w 200" + self.command_prefix += " -a {0}".format(self.args.num_intra_threads) + self.command_prefix += " -e {0}".format(self.args.num_inter_threads) + if self.args.data_location: + self.command_prefix += " -d {0}".format(self.args.data_location) + + if self.args.accuracy_only: + self.command_prefix += " -r" + assert self.args.data_location, "accuracy must provide the data." else: - set_env_var("OMP_NUM_THREADS", threads_per_socket) - accuracy_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "coco_int8.sh") - self.command_prefix = "sh {} {} {}".format( - accuracy_script, self.args.input_graph, - self.args.data_location) + # Did not support multi-batch accuracy check. + self.command_prefix += " -b {0}".format(self.args.batch_size) def run(self): # Run script from the tensorflow models research directory - original_dir = os.getcwd() - os.chdir(os.path.join(self.args.model_source_dir, "research")) - self.run_command(self.command_prefix) - os.chdir(original_dir) + self.run_command(self.command_prefix) \ No newline at end of file diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh deleted file mode 100644 index 48fbae9c0..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -########## Variables to be defined - run it in research folder - -SPLIT=ssdmob - -FROZEN_GRAPH=$1 -TF_RECORD_FILES=$2 - -python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=$FROZEN_GRAPH \ - --discard_image_pixels=True - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '${PWD}/object_detection/data/mscoco_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'coco_detection_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt - -python -m object_detection.metrics.offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/requirements.txt b/benchmarks/object_detection/tensorflow/ssd-mobilenet/requirements.txt index df0d63adb..ca3b3698d 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/requirements.txt +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/requirements.txt @@ -4,5 +4,5 @@ pillow>=6.2.2 lxml jupyter matplotlib -numpy==1.16.0 +numpy==1.17.4 pycocotools diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index ca05f503c..8fdeaef3d 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -4,26 +4,19 @@ This document has instructions for how to run SSD-ResNet34 for the following modes/precisions: * [FP32 inference](#fp32-inference-instructions) * [INT8 inference](#int8-inference-instructions) -* [FP32 Training](#fp32-training-instructions) Instructions and scripts for model training and inference for other precisions are coming later. ## FP32 Inference Instructions -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the `tensorflow/models` repository with the specified SHA, +1. Clone the `tensorflow/models` repository as `tensorflow-models` with the specified SHA, since we are using an older version of the models repo for SSD-ResNet34. ``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc $ git clone https://github.com/cocodataset/cocoapi.git ``` @@ -31,26 +24,25 @@ $ git clone https://github.com/cocodataset/cocoapi.git The TensorFlow models repo will be used for running inference as well as converting the coco dataset to the TF records format. -3. Follow the TensorFlow models object detection +2. Follow the TensorFlow models object detection [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#installation) to get your environment setup with the required dependencies. -4. Download the 2017 validation +3. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` -cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip -$ cd $MODEL_WORK_DIR +$ cd .. $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip -$ cd $MODEL_WORK_DIR +$ cd .. ``` Since we are only using the validation dataset in this example, we will @@ -62,10 +54,10 @@ $ mkdir empty_dir $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -5. Now that you have the raw COCO dataset, we need to convert it to the +4. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -79,39 +71,42 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd tensorflow-models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home//coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record # Go back to the main models directory and checkout the SHA that we are using for SSD-ResNet34 -$ cd $MODEL_WORK_DIR/tf_models +$ cd /home//tensorflow-models $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc ``` The `coco_val.record` file is what we will use in this inference example. +``` +$ mv /home//coco/output/coco_val.record /home//coco/output/validation-00000-of-00001 +``` -6. Download the pretrained model: +5. Download the pretrained model: ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssd_resnet34_fp32_bs1_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/ssd_resnet34_fp32_bs1_pretrained_model.pb ``` -7. Clone the [intelai/models](https://github.com/intelai/models) repo. +6. Clone the [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running the model, which we will use in the next step. @@ -119,11 +114,11 @@ use in the next step. $ git clone https://github.com/IntelAI/models.git ``` -8. Next, navigate to the `benchmarks` directory of the +7. Next, navigate to the `benchmarks` directory of the [intelai/models](https://github.com/intelai/models) repo that was just cloned in the previous step. SSD-ResNet34 can be run for batch and online inference, or accuracy. Note that we are running -SSD-ResNet34 with a TensorFlow 1.14 docker image. +SSD-ResNet34 with a TensorFlow 2.1.0 docker image. To run for batch and online inference, use the following command, the path to the frozen graph that you downloaded in step 5 as @@ -131,18 +126,18 @@ the `--in-graph`, and use the `--benchmark-only` flag: ``` -$ cd $MODEL_WORK_DIR/models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/ssd_resnet34_fp32_bs1_pretrained_model.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --in-graph /home//ssd_resnet34_fp32_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow-models \ --model-name ssd-resnet34 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --benchmark-only ``` @@ -153,20 +148,20 @@ the path to the frozen graph that you downloaded in step 5 as the ``` $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/output/ \ - --in-graph $MODEL_WORK_DIR/ssd_resnet34_fp32_bs1_pretrained_model.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --data-location /home//coco/output/ \ + --in-graph /home//ssd_resnet34_fp32_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow-models \ --model-name ssd-resnet34 \ --framework tensorflow \ --precision fp32 \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --accuracy-only ``` -9. The log file is saved to the value of `--output-dir`. +8. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running for performance: @@ -194,26 +189,15 @@ Below is a sample log file tail when testing accuracy: Current AP: 0.21082 ``` -10. To return to where you started from: -``` -$ popd -``` - ## INT8 Inference Instructions -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the `tensorflow/models` repository with the specified SHA, +1. Clone the `tensorflow/models` repository as `tensorflow-models` with the specified SHA, since we are using an older version of the models repo for SSD-ResNet34. ``` -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models +$ git clone https://github.com/tensorflow/models.git tensorflow-models +$ cd tensorflow-models $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc $ git clone https://github.com/cocodataset/cocoapi.git ``` @@ -221,26 +205,25 @@ $ git clone https://github.com/cocodataset/cocoapi.git The TensorFlow models repo will be used for running inference as well as converting the coco dataset to the TF records format. -3. Follow the TensorFlow models object detection +2. Follow the TensorFlow models object detection [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#installation) to get your environment setup with the required dependencies. -4. Download the 2017 validation +3. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` -cd $MODEL_WORK_DIR $ mkdir val $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip +$ cd .. -$ cd $MODEL_WORK_DIR $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip -$ cd $MODEL_WORK_DIR +$ cd .. ``` Since we are only using the validation dataset in this example, we will @@ -252,10 +235,10 @@ $ mkdir empty_dir $ cd annotations $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd $MODEL_WORK_DIR +$ cd .. ``` -5. Now that you have the raw COCO dataset, we need to convert it to the +4. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. @@ -268,39 +251,42 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit -$ cd tf_models +$ cd tensorflow-models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 $ cd research/object_detection/dataset_tools/ $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --val_image_dir="$MODEL_WORK_DIR/val/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - -$ ll $MODEL_WORK_DIR/output + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home//coco/output total 1598276 -rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record -rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record -rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record # Go back to the main models directory and checkout the SHA that we are using for SSD-ResNet34 -$ cd $MODEL_WORK_DIR/tf_models +$ cd /home//tensorflow-models $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc ``` The `coco_val.record` file is what we will use in this inference example. +``` +$ mv /home//coco/output/coco_val.record /home//coco/output/validation-00000-of-00001 +``` -6. Download the pretrained model: +5. Download the pretrained model: ``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssd_resnet34_int8_bs1_pretrained_model.pb +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/ssd_resnet34_int8_bs1_pretrained_model.pb ``` -7. Clone the [intelai/models](https://github.com/intelai/models) repo. +6. Clone the [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running the model, which we will use in the next step. @@ -308,10 +294,10 @@ use in the next step. $ git clone https://github.com/IntelAI/models.git ``` -8. Next, navigate to the `benchmarks` directory of the +7. Next, navigate to the `benchmarks` directory of the [intelai/models](https://github.com/intelai/models) repo that was just cloned in the previous step. SSD-ResNet34 can be run for testing batch or online inference, or testing accuracy. Note that we are running -SSD-ResNet34 with a TensorFlow 1.14 docker image. +SSD-ResNet34 with a TensorFlow 2.1.0 docker image. To run for batch and online inference, use the following command, the path to the frozen graph that you downloaded in step 5 as @@ -319,18 +305,18 @@ the `--in-graph`, and use the `--benchmark-only` flag: ``` -$ cd $MODEL_WORK_DIR/models/benchmarks +$ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph $MODEL_WORK_DIR/ssd_resnet34_int8_bs1_pretrained_model.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --in-graph /home//ssd_resnet34_int8_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow-models \ --model-name ssd-resnet34 \ --framework tensorflow \ --precision int8 \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --benchmark-only ``` @@ -341,20 +327,20 @@ the path to the frozen graph that you downloaded in step 5 as the ``` $ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/output/ \ - --in-graph $MODEL_WORK_DIR/ssd_resnet34_int8_bs1_pretrained_model.pb \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ + --data-location /home//coco/output/ \ + --in-graph /home//ssd_resnet34_int8_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow-models \ --model-name ssd-resnet34 \ --framework tensorflow \ --precision int8 \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --accuracy-only ``` -9. The log file is saved to the value of `--output-dir`. +8. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when testing performance: @@ -367,7 +353,7 @@ Total samples/sec: 83.1635 samples/s Below is a sample log file tail when testing accuracy: ``` - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.204 + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.204 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.360 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.208 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.051 @@ -381,164 +367,3 @@ Below is a sample log file tail when testing accuracy: Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.484 Current AP: 0.20408 ``` - -10. To return to where you started from: -``` -$ popd -``` - -## FP32 Training Instructions - -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the `tensorflow/models` repository with the specified SHA, since we are using an older version of the models repository for SSD-ResNet34. - - ```bash - $ git clone https://github.com/tensorflow/models.git tf_models - $ cd tf_models - $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc - ``` - - The TensorFlow models repository will be used for running training as well as converting the coco dataset to the TF records format. - -3. Follow the TensorFlow models object detection [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#installation) to get your environment setup with the required dependencies. - -4. Download the 2017 train [COCO dataset](http://cocodataset.org/#home): - - ```bash - $ cd $MODEL_WORK_DIR - $ mkdir train - $ cd train - $ wget http://images.cocodataset.org/zips/train2017.zip - $ unzip train2017.zip - - $ cd $MODEL_WORK_DIR - $ mkdir val - $ cd val - $ wget http://images.cocodataset.org/zips/val2017.zip - $ unzip val2017.zip - - $ cd $MODEL_WORK_DIR - $ mkdir annotations - $ cd annotations - $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip - $ unzip annotations_trainval2017.zip - ``` - - Since we are only using the train and validation dataset in this example, we will create an empty directory and empty annotations json file to pass as the test directories in the next step. - - ``` - $ cd $MODEL_WORK_DIR - $ mkdir empty_dir - - $ cd annotations - $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json - $ cd $MODEL_WORK_DIR - ``` - -5. Now that you have the raw COCO dataset, we need to convert it to the TF records format in order to use it with the training script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repository. - - ```bash - # We are going to use an older version of the conversion script to checkout the git commit - $ cd models - $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 - - $ cd research/object_detection/dataset_tools/ - $ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="$MODEL_WORK_DIR/train2017" \ - --val_image_dir="$MODEL_WORK_DIR/val2017" \ - --test_image_dir="$MODEL_WORK_DIR/empty_dir" \ - --train_annotations_file="$MODEL_WORK_DIR/annotations/instances_train2017.json" \ - --val_annotations_file="$MODEL_WORK_DIR/annotations/instances_val2017.json" \ - --testdev_annotations_file="$MODEL_WORK_DIR/annotations/empty.json" \ - --output_dir="$MODEL_WORK_DIR/output" - - # Go back to the main models directory and checkout the SHA that we are using for SSD-ResNet34 - $ cd $MODEL_WORK_DIR/tf_models - $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc - ``` - - The `coco_train.record-*-of-*` files are what we will use in this training example. - -6. Clone the [intelai/models](https://github.com/intelai/models) repository. This repository has the launch script for running the model, which we will use in the next step. - - ```bash - $ cd $MODEL_WORK_DIR - $ git clone https://github.com/IntelAI/models.git - ``` - -7. Download and install the [Intel(R) MPI Library for Linux](https://software.intel.com/en-us/mpi-library/choose-download/linux). Once you have the l_mpi_2019.3.199.tgz downloaded, unzip it into /home//l_mpi directory. Make sure to accpet the installation license and **change the value of "ACCEPT_EULA" to "accept" in /home//l_mpi/l_mpi_2019.3.199/silent.cfg**, before start the silent installation. - - The software is installed by default to "/opt/intel" location. If want to run the training in docker, please keep the default installation location. - - ```bash - $ tar -zxvf l_mpi_2019.3.199.tgz -C $MODEL_WORK_DIR/l_mpi - $ cd $MODEL_WORK_DIR/l_mpi/l_mpi_2019.3.199 - # change the value of "ACCEPT_EULA" to "accept" - $ vim silent.cfg - ``` - -8. Next, navigate to the `benchmarks` directory of the [intelai/models](https://github.com/intelai/models) repository that was just cloned in the previous step. Note that we are running SSD-ResNet34 with a TensorFlow 1.14-pre-rc0 docker image. - - To run for training, use the following command, but replace in your path to the unzipped coco dataset images from step 3 for the `--data-location`, `--volume` Intel(R) MPI package path,`--num_processes` the number of MPI processes, `--processes_per_node` the number of processes to launch on each node. - - ```bash - $ cd $MODEL_WORK_DIR/models/benchmarks/ - - $ python launch_benchmark.py \ - --data-location /lustre/dataset/tensorflow/coco \ - --model-source-dir $MODEL_WORK_DIR/models \ - --model-name ssd-resnet34 \ - --framework tensorflow \ - --precision fp32 \ - --mode training \ - --num-train-steps 500 \ - --num-processes 2 \ - --num-processes-per-node 1 \ - --num-cores 27 \ - --num-inter-threads 1 \ - --num-intra-threads 27 \ - --batch-size=32 \ - --weight_decay=1e-4 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-pre-rc0-devel-mkl-py3 \ - --volume $MODEL_WORK_DIR/l_mpi/l_mpi_2019.3.199:/l_mpi \ - --shm-size 4g - ``` - -9. The log file is saved to the value of `--output-dir`. - - Below is a sample log file tail when running for training: - - ```bash - TensorFlow: 1.14 - Model: ssd300 - Dataset: coco - Mode: training - SingleSess: False - Batch size: 64 global - 32 per device - Num batches: 500 - Num epochs: 0.27 - Devices: ['horovod/cpu:0', 'horovod/cpu:1'] - NUMA bind: False - Data format: NCHW - Optimizer: sgd - Variables: horovod - Horovod on: cpu - - - Step Img/sec total_loss - 1 images/sec: 21.6 +/- 0.0 (jitter = 0.0) 52.921 - 10 images/sec: 22.3 +/- 0.1 (jitter = 0.2) 44.674 - 20 images/sec: 22.3 +/- 0.1 (jitter = 0.2) 43.106 - 30 images/sec: 22.3 +/- 0.0 (jitter = 0.2) 34.703 - 40 images/sec: 22.3 +/- 0.0 (jitter = 0.2) 30.737 - 50 images/sec: 22.3 +/- 0.0 (jitter = 0.2) 28.466 - - ``` - - diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py index cb3c4086a..db20c2414 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py @@ -47,12 +47,9 @@ def __init__(self, args, custom_args, platform_util): config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) + self.set_num_inter_intra_threads() - omp_num_threads = platform_util.num_cores_per_socket - - set_env_var("OMP_NUM_THREADS", omp_num_threads if self.args.num_cores == -1 else self.args.num_cores) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) self.model_dir = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision) diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py index cb3c4086a..db20c2414 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py @@ -47,12 +47,9 @@ def __init__(self, args, custom_args, platform_util): config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) + self.set_num_inter_intra_threads() - omp_num_threads = platform_util.num_cores_per_socket - - set_env_var("OMP_NUM_THREADS", omp_num_threads if self.args.num_cores == -1 else self.args.num_cores) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) self.model_dir = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision) diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/requirements.txt b/benchmarks/object_detection/tensorflow/ssd-resnet34/requirements.txt index 8bef9dd21..02eea85c2 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/requirements.txt +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/requirements.txt @@ -1,4 +1,9 @@ +numpy==1.17.4 Cython -pycocotools -numpy<1.18.0 +contextlib2 +pillow>=6.2.2 +lxml +jupyter matplotlib +pycocotools +tensorflow-addons==0.5.0 diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/__init__.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/training/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/config.json b/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/config.json deleted file mode 100644 index 273b45b40..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/model_init.py deleted file mode 100644 index 07837171b..000000000 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/training/fp32/model_init.py +++ /dev/null @@ -1,90 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import os -import sys -import argparse - -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - - -def run_training_checks(args): - if not args.data_location: - sys.exit("Please provide a path to the data directory via the '--data-location' flag.") - - -class ModelInitializer(BaseModelInitializer): - - def __init__(self, args, custom_args, platform_util): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - run_training_checks(self.args) - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - self.set_num_inter_intra_threads() - - # Train parameter parser - parser = argparse.ArgumentParser(description="process custom_args") - parser.add_argument('--weight_decay', type=float, default=1e-4) - parser.add_argument('--num_warmup_batches', type=int, default=20) - parser.add_argument('--num_train_steps', type=int, default=500, help='number of training batches') - parser.add_argument('--num_processes', type=int, default=2, help='number of process') - parser.add_argument('--num_processes_per_node', type=int, default=1, help='number of process per node') - parser.add_argument('--num_inter_threads', type=int, default=1, help='number of inter-threads') - parser.add_argument('--num_intra_threads', type=int, default=28, help='number of intra-threads') - self.args = parser.parse_args(self.custom_args, namespace=self.args) - - omp_num_threads = platform_util.num_cores_per_socket - - set_env_var("OMP_NUM_THREADS", omp_num_threads if self.args.num_cores == -1 else self.args.num_cores) - - cmd_args = " --data_dir {0}".format(self.args.data_location) - cmd_args += " --batch_size {0}".format(self.args.batch_size) - cmd_args += " --weight_decay {0}".format(self.args.weight_decay) - cmd_args += " --num_warmup_batches {0}".format(self.args.num_warmup_batches) - cmd_args += " --num_batches {0}".format(self.args.num_train_steps) - cmd_args += " --num_inter_threads {0}".format(self.args.num_inter_threads) - cmd_args += " --num_intra_threads {0}".format(self.args.num_intra_threads) - cmd_args += " --model=ssd300 --data_name coco" - cmd_args += " --mkl=True --device=cpu --data_format=NCHW" - cmd_args += " --variable_update=horovod --horovod_device=cpu" - - multi_instance_param_list = ["-genv:I_MPI_PIN_DOMAIN=socket", - "-genv:I_MPI_FABRICS=shm", - "-genv:I_MPI_ASYNC_PROGRESS=1", - "-genv:I_MPI_ASYNC_PROGRESS_PIN={},{}".format(0, self.args.num_intra_threads), - "-genv:OMP_NUM_THREADS={}".format(self.args.num_intra_threads)] - self.cmd = self.get_multi_instance_train_prefix(multi_instance_param_list) - self.cmd += "{} ".format(self.python_exe) - - self.training_script_dir = os.path.join('/tmp/benchmark_ssd-resnet34/scripts/tf_cnn_benchmarks') - training_script = os.path.join(self.training_script_dir, 'tf_cnn_benchmarks.py') - - self.cmd = self.cmd + training_script + cmd_args - - def run(self): - original_dir = os.getcwd() - os.chdir(self.training_script_dir) - # Run benchmarking - self.run_command(self.cmd) - os.chdir(original_dir) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md deleted file mode 100644 index 2a3269475..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ /dev/null @@ -1,306 +0,0 @@ -# SSD-VGG16 - -This document has instructions for how to run SSD-VGG16 for the -following modes/precisions: -* [Int8 inference](#int8-inference-instructions) -* [FP32 inference](#fp32-inference-instructions) - -Instructions and scripts for model training and inference -other precisions are coming later. - -## Int8 Inference Instructions - -These instructions use the TCMalloc memory allocator, which produces -better performance results for Int8 precision models with smaller batch sizes. -If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` -when calling `launch_benchmark.py` and the script will run without TCMalloc. - -1. Store the path to the current directory: -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR -``` - -2. Clone the [original model](https://github.com/HiKapok/SSD.TensorFlow) repository: -``` -$ git clone https://github.com/HiKapok/SSD.TensorFlow.git -$ cd SSD.TensorFlow -$ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c -``` - -3. Clone the [intelai/models](https://github.com/intelai/models) repository. -It will be used to run the SSD-VGG16 model accuracy and inference performance tests. -``` -$ cd $MODEL_WORK_DIR -$ git clone https://github.com/IntelAI/models.git -``` - - -4. Download the 2017 validation images file: -[COCO dataset](http://cocodataset.org/#home) and annotations: -This is required if you would like to run the accuracy test, -or batch/online inference with real data. - -``` -$ wget http://images.cocodataset.org/zips/val2017.zip -$ unzip val2017.zip -``` - -Download the validation annotations file: -``` -$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -$ unzip annotations_trainval2017.zip -``` - -5. Convert the COCO dataset to TF records format: - -We provide a script `generate_coco_records.py` to convert the raw dataset to the TF records required pattern. -* Some dependencies are required to be installed to run the script such as `python3`, `Tensorflow` and `tqdm`, also, the `SSD.TensorFlow/dataset` from the original model directory (from step 1). - -Follow the steps below get the COCO TF records: - -* Copy the `generate_coco_records.py` script from `models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py` -from the `models` directory (step 2) to `SSD.TensorFlow/dataset` in the original model directory (step 1). - -``` -$ cp $MODEL_WORK_DIR/models/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py $MODEL_WORK_DIR/SSD.TensorFlow/dataset -``` - -* Create directory for the output TF records: -``` -$ cd $MODEL_WORK_DIR -$ mkdir tf_records -``` - -* Run the script to generate the TF records with the required prefix `val`, COCO raw dataset and annotation file (step 3): -``` -$ cd $MODEL_WORK_DIR/SSD.TensorFlow/dataset -$ python generate_coco_records.py \ ---image_path $MODEL_WORK_DIR/val2017/ \ ---annotations_file $MODEL_WORK_DIR/annotations/instances_val2017.json \ ---output_prefix val \ ---output_path $MODEL_WORK_DIR/tf_records/ -``` - -Now, you can use the `$MODEL_WORK_DIR/tf_records/` as the dataset location to run inference with real data, and test the model accuracy. -``` -$ ls -l $MODEL_WORK_DIR/tf_records -total 792084 --rw-r--r--. 1 170038836 Mar 17 21:35 val-00000-of-00005 --rw-r--r--. 1 167260232 Mar 17 21:35 val-00001-of-00005 --rw-r--r--. 1 167326957 Mar 17 21:35 val-00002-of-00005 --rw-r--r--. 1 166289231 Mar 17 21:35 val-00003-of-00005 --rw-r--r--. 1 140168531 Mar 17 21:35 val-00004-of-00005 -``` - -6. Download the pretrained model: - -``` -$ cd $MODEL_WORK_DIR -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssdvgg16_int8_pretrained_model.pb -``` - -7. Navigate to the `benchmarks` directory (step 2), and run the model scripts for either batch or online -inference or accuracy. -``` -$ cd models/benchmarks -``` - -* Run the model for batch or online inference where the `--model-source-dir` is the model source directory from step 1, -and the `--in-graph` is the pretrained model graph from step 5. -If you specify the `--data-location` which is the path to the tf record file that you generated in step 4, -the model will run with real data, otherwise dummy data will be used: -``` -$ python launch_benchmark.py \ - --model-name ssd_vgg16 \ - --mode inference \ - --precision int8 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/SSD.TensorFlow \ - --data-location $MODEL_WORK_DIR/tf_records \ - --in-graph $MODEL_WORK_DIR/ssdvgg16_int8_pretrained_model.pb \ - --batch-size 1 \ - --socket-id 0 \ - --num-inter-threads 11 \ - --num-intra-threads 21 \ - --data-num-inter-threads 21 \ - --data-num-intra-threads 28 \ - -- warmup-steps=100 steps=500 -``` - -* For the accuracy test: - - * Clone the customized [cocoapi repo](https://github.com/waleedka/coco) in -the model directory `SSD.TensorFlow` from step 1. - ``` - $ cd MODEL_WORK_DIR - $ git clone https://github.com/waleedka/coco.git - ``` - * The `--data-location` is required, which is the path to the tf record file that you generated in step 4. - * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `$MODEL_WORK_DIR/tf_records/`. - * Use the `--accuracy-only` flag: -``` -$ python launch_benchmark.py \ - --model-name ssd_vgg16 \ - --mode inference \ - --precision int8 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/SSD.TensorFlow \ - --data-location $MODEL_WORK_DIR/tf_records \ - --in-graph $MODEL_WORK_DIR/ssdvgg16_int8_pretrained_model.pb \ - --accuracy-only \ - --batch-size 1 -``` - ->Notes: ->* For batch and online inference, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, - `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. - ->* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. - ->* The `--verbose` or `--output-dir` flag can be added to any of the above commands -to get additional debug output or change the default output location. - -8. The log file is saved to the value of `--output-dir`. - -Below is a sample log file tail when running the model for batch -and online inference, the following results are based on CLX 28-cores with hyper-threading enabled: - -``` -Batch size = 1 -Throughput: 30.382 images/sec -Latency: 32.915 ms -Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_ssd_vgg16_inference_int8_20190417_231832.log -``` - -And here is a sample log file tail when running for accuracy: - -``` - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.231 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.386 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.243 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.058 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.265 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.391 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.224 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.330 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.355 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.558 -``` - -9. To return to where you started from: -``` -$ popd -``` - -## FP32 Inference Instructions - -Use the steps 1, 2 ,3, 4 and 5 as above. - -6. Download the pretrained model: -``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssdvgg16_fp32_pretrained_model.pb -``` - -7. Navigate to the `benchmarks` directory (step 2), and run the model scripts for either batch -and online inference or accuracy. -``` -$ cd models/benchmarks -``` - -* Run the model for batch and online inference where the `--model-source-dir` is the model source directory from step 1, -and the `--in-graph` is the pretrained model graph from step 5, -if you specify the `--data-location` which is the path to the tf record file that you generated in step 4, -the benchmark will run with real data, otherwise dummy data will be used: -``` -$ python launch_benchmark.py \ - --data-location $MODEL_WORK_DIR/tf_records \ - --in-graph $MODEL_WORK_DIR/ssdvgg16_fp32_pretrained_model.pb \ - --model-source-dir $MODEL_WORK_DIR/SSD.TensorFlow \ - --model-name ssd_vgg16 \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --batch-size 1 \ - --socket-id 0 \ - --num-inter-threads 11 \ - --num-intra-threads 21 \ - --data-num-inter-threads 21 \ - --data-num-intra-threads 28 \ - -- warmup-steps=100 steps=500 -``` - -* For the accuracy test: - - * Clone the customized [cocoapi repo](https://github.com/waleedka/coco) in -the model directory `SSD.TensorFlow` from step 1. - ``` - $ git clone https://github.com/waleedka/coco.git - - ``` - * The `--data-location` is required, which is the path to the tf record file that you generated in step 3. - * Copy the annotation file `instances_val2017.json` (from step 4) to the dataset directory `$MODEL_WORK_DIR/tf_records/`. - * Use the `--accuracy-only` flag: -``` -$ python launch_benchmark.py \ - --model-name ssd_vgg16 \ - --mode inference \ - --precision fp32 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir $MODEL_WORK_DIR/SSD.TensorFlow \ - --data-location $MODEL_WORK_DIR/tf_records \ - --in-graph $MODEL_WORK_DIR/ssdvgg16_fp32_pretrained_model.pb \ - --accuracy-only \ - --batch-size 1 -``` - ->Notes: ->* For batch and online inference, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, - `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. - ->* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. - ->* The `--verbose` or `--output-dir` flag can be added to any of the above commands -to get additional debug output or change the default output location. - -7. The log file is saved to the value of `--output-dir`. - -Below is a sample log file tail when running batch and online inference, -the following results are based on CLX 28-cores with hyper-threading enabled: - -``` -Batch size = 1 -Throughput: 15.662 images/sec -Latency: 63.848 ms -Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_ssd_vgg16_inference_fp32_20190417_232130.log -``` - -Below is a sample log file tail when testing accuracy: - -``` - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.236 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.391 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.248 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.058 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.399 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.227 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.334 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.358 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.423 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564 -``` - -8. To return to where you started from: -``` -$ popd -``` diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json deleted file mode 100644 index 14d129748..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "optimization_parameters": { - "KMP_SETTINGS": 1, - "TF_ENABLE_WINOGRAD_NONFUSED": 1 - } -} diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py deleted file mode 100644 index fa6537fa6..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -from object_detection.tensorflow.ssd_vgg16.inference.ssd_vgg16_model_init import SSDVGG16ModelInitializer - - -class ModelInitializer(SSDVGG16ModelInitializer): - """Model initializer for SSD-VGG16 FP32 inference""" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py deleted file mode 100644 index d01a5c083..000000000 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py +++ /dev/null @@ -1,107 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import argparse - -from common.base_model_init import BaseModelInitializer, set_env_var - - -class SSDVGG16ModelInitializer(BaseModelInitializer): - """Common model initializer for SSD-VGG16 inference""" - - def run_inference_sanity_checks(self, args, custom_args): - if not args.input_graph: - sys.exit("Please provide a path to the frozen graph directory" - " via the '--in-graph' flag.") - if not args.data_location and self.args.accuracy_only: - sys.exit("For accuracy test, please provide a path to the data directory via the " - "'--data-location' flag.") - if args.batch_size != -1 and args.batch_size != 1: - sys.exit("SSD-VGG16 inference supports 'batch-size=1' " + - "only, please modify via the '--batch_size' flag.") - - def __init__(self, args, custom_args, platform_util): - super(SSDVGG16ModelInitializer, self).__init__(args, custom_args, platform_util) - - self.parse_custom_args() - self.run_inference_sanity_checks(self.args, self.custom_args) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, - num_intra_threads=self.args.num_intra_threads) - - omp_num_threads = str(int(platform_util.num_cores_per_socket / 2))\ - if self.args.precision == "int8" else platform_util.num_cores_per_socket - - set_env_var("OMP_NUM_THREADS", omp_num_threads - if self.args.num_cores == -1 else self.args.num_cores) - - script_path = os.path.join( - self.args.intelai_models, self.args.mode, "eval_ssd.py") - - self.run_cmd = self.get_command_prefix( - self.args.socket_id) + "{} {}".format(self.python_exe, script_path) - - self.run_cmd += " --input-graph={} " \ - " --num-inter-threads={} --num-intra-threads={} ". \ - format(self.args.input_graph, self.args.num_inter_threads, - self.args.num_intra_threads) - - if self.args.data_num_inter_threads: - self.run_cmd += " --data-num-inter-threads={} ".format( - self.args.data_num_inter_threads) - - if self.args.data_num_intra_threads: - self.run_cmd += " --data-num-intra-threads={} ".format( - self.args.data_num_intra_threads) - - if self.args.benchmark_only: - self.run_cmd += " --warmup-steps={} --steps={} ". \ - format(self.args.warmup_steps, self.args.steps) - - # if the data location directory is not empty, then include the arg - if self.args.data_location and os.listdir(self.args.data_location): - self.run_cmd += " --data-location={} ".format(self.args.data_location) - - if self.args.accuracy_only: - self.run_cmd += "--accuracy-only " - - def parse_custom_args(self): - if self.custom_args: - parser = argparse.ArgumentParser() - parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, - help="number of steps") - - self.args = parser.parse_args(self.custom_args, - namespace=self.args) - - def run(self): - self.run_command(self.run_cmd) diff --git a/benchmarks/recommendation/tensorflow/ncf/README.md b/benchmarks/recommendation/tensorflow/ncf/README.md deleted file mode 100644 index e485e3ad9..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/README.md +++ /dev/null @@ -1,137 +0,0 @@ -## Neural Collaborative Filtering (NCF) ## - -This document has instructions for how to run NCF for the -following modes/precisions: -* [FP32 inference](#fp32-inference-instructions) - -Instructions and scripts for model training and inference. - -## FP32 Inference Instructions - -1. Dataset - -This model uses official tensorflow models repo, where [ncf](https://github.com/tensorflow/models/tree/master/official/recommendation) -model automatically downloads movielens ml-1m dataset as default if the `--data-location` flag is not set. -If you want to download movielens 1M dataset and provide that path to `--data-location`, check this [reference](https://grouplens.org/datasets/movielens/1m/) - -2. Clone the official `tensorflow/models` repository with tag `v1.11` and make a small change to `data_async_generation.py`, commenting out a line that causes a crash in the model script. Store the path to the current directory. - -``` -$ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} -$ pushd $MODEL_WORK_DIR - -$ git clone https://github.com/tensorflow/models.git tf_models -$ cd tf_models -$ git checkout v1.11 -$ sed -i.bak 's/atexit.register/# atexit.register/g' official/recommendation/data_async_generation.py -``` - -3. Now clone `IntelAI/models` repository, then navigate to the `benchmarks` folder: - -``` -$ cd $MODEL_WORK_DIR -$ git clone https://github.com/IntelAI/models.git -$ cd models/benchmarks -``` - -4. Download and extract the pre-trained model. -``` -$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ncf_fp32_pretrained_model.tar.gz -$ tar -xzvf ncf_fp32_pretrained_model.tar.gz -``` - -5. Run the `launch_benchmark.py` script with the appropriate parameters. -* `--model-source-dir` - Path to official tensorflow models from step2. -* `--checkpoint` - Path to checkpoint directory for the Pre-trained model from step4 - - -For batch inference, `--batch-size 256`, `--socket-id 0`, `--checkpoint` path from step5, `--model-source-dir` path from step2 - -``` -$ python launch_benchmark.py \ - --checkpoint $MODEL_WORK_DIR/models/benchmarks/ncf_trained_movielens_1m \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --model-name ncf \ - --socket-id 0 \ - --batch-size 256 \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` - -The tail of batch inference log, looks as below. -``` -... -2018-11-12 19:42:44.851050: step 22900, 931259.2 recommendations/sec, 0.27490 msec/batch -2018-11-12 19:42:44.880778: step 23000, 855571.2 recommendations/sec, 0.29922 msec/batch -2018-11-12 19:42:44.910551: step 23100, 870836.8 recommendations/sec, 0.29397 msec/batch -2018-11-12 19:42:44.940675: sE1112 19:42:45.420336 140101437536000 tf_logging.py:110] CRITICAL - Iteration 1: HR = 0.2248, NDCG = 0.1132 -tep 23200, 867319.7 recommendations/sec, 0.29516 msec/batch -2018-11-12 19:42:44.971828: step 23300, 867319.7 recommendations/sec, 0.29516 msec/batch -2018-11-12 19:42:45.002699: step 23400, 861751.1 recommendations/sec, 0.29707 msec/batch -2018-11-12 19:42:45.033635: step 23500, 873671.1 recommendations/sec, 0.29302 msec/batch -Average recommendations/sec across 23594 steps: 903932.8 (0.28381 msec/batch) -... -``` - -For online inference, `--batch-size 1`, `--socket-id 0`, `--checkpoint` path from step5, `--model-source-dir` path from step2 - -``` -$ python launch_benchmark.py \ - --checkpoint $MODEL_WORK_DIR/models/benchmarks/ncf_trained_movielens_1m \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --model-name ncf \ - --socket-id 0 \ - --batch-size 1 \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` - -The tail of online inference log, looks as below. -``` -... -2018-11-12 20:24:24.986641: step 6039100, 4629.5 recommendations/sec, 0.21601 msec/batch -2018-11-12 20:24:25.010239: step 6039200, 4369.1 recommendations/sec, 0.22888 msec/batch -2018-11-12 20:24:25.033854: step 6039300, 4583.9 recommendations/sec, 0.21815 msec/batch -2018-11-12 20:24:25.057516: step 6039400, 4696.9 recommendations/sec, 0.21291 msec/batch -2018-11-12 20:24:25.080979: step 6039500, 4788.0 recommendations/sec, 0.20885 msec/batch -2018-11-12 20:24:25.104498: step 6039600, 4405.8 recommendations/sec, 0.22697 msec/batch -2018-11-12 20:24:25.128331: step 6039700, 4364.5 recommendations/sec, 0.22912 msec/batch -2018-11-12 20:24:25.151892: step 6039800, 4485.9 recommendations/sec, 0.22292 msec/batch -2018-11-12 20:24:25.175342: step 6039900, 4675.9 recommendations/sec, 0.21386 msec/batch -2018-11-12 20:24:25.198717: step 6040000, 4905.6 recommendations/sec, 0.20385 msec/batch -Average recommendations/sec across 6040001 steps: 4573.0 (0.21920 msec/batch) -... -``` -For Accuracy, `--batch-size 256`, `--socket-id 0`, `--checkpoint` path from step5, `--model-source-dir` path from step2 - -``` -$ python launch_benchmark.py \ - --checkpoint $MODEL_WORK_DIR/models/benchmarks/ncf_trained_movielens_1m \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --model-name ncf \ - --socket-id 0 \ - --accuracy-only \ - --batch-size 256 \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` - -The tail of accuracy log, looks as below. -HR: Hit Ratio (HR) -NDCG: Normalized Discounted Cumulative Gain -``` -... -E0104 20:03:50.940653 140470332344064 tf_logging.py:110] CRITICAL - Iteration 1: HR = 0.2290, NDCG = 0.1148 -... -``` - -6. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/recommendation/tensorflow/ncf/__init__.py b/benchmarks/recommendation/tensorflow/ncf/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/__init__.py b/benchmarks/recommendation/tensorflow/ncf/inference/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/__init__.py b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json deleted file mode 100644 index 273b45b40..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "optimization_parameters": { - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 1, - "KMP_SETTINGS": 1 - } -} diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py deleted file mode 100644 index 7326ec540..000000000 --- a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py +++ /dev/null @@ -1,78 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var - -import os - - -class ModelInitializer(BaseModelInitializer): - """initialize mode and run benchmark""" - - def __init__(self, args, custom_args=[], platform_util=None): - super(ModelInitializer, self).__init__(args, custom_args, platform_util) - - self.benchmark_command = "" - - # use default batch size if -1 - if self.args.batch_size == -1: - self.args.batch_size = 256 - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - - # set num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads() - - benchmark_script = os.path.join( - self.args.intelai_models, self.args.mode, self.args.precision, - "ncf_main.py") - - self.benchmark_command = self.get_command_prefix(args.socket_id) + \ - self.python_exe + " " + benchmark_script - - set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) - - self.benchmark_command = self.benchmark_command + \ - " --data_dir=" + str(args.data_location) + \ - " --model_dir=" + str(args.checkpoint) + \ - " --intra_op_parallelism_threads=" + str( - self.args.num_intra_threads) + \ - " --inter_op_parallelism_threads=" + str( - self.args.num_inter_threads) + \ - " --batch_size=" + str(self.args.batch_size) + \ - " --inference_only" - - if self.args.benchmark_only: - self.benchmark_command = self.benchmark_command + \ - " --benchmark_only" - - if self.args.accuracy_only: - self.benchmark_command = self.benchmark_command + \ - " --accuracy_only" - - def run(self): - if self.benchmark_command: - self.run_command(self.benchmark_command) diff --git a/benchmarks/recommendation/tensorflow/wide_deep/README.md b/benchmarks/recommendation/tensorflow/wide_deep/README.md index 24ea28d66..de3ea238c 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep/README.md @@ -9,21 +9,21 @@ for other precisions are coming later. ## FP32 Inference Instructions -1. Store path to current directory and then clone `tensorflow/models` +1. Clone `tensorflow/models` as a `tensorflow-models` ``` - # We are going to use an older version of the tensorflow model repo. - $ MODEL_WORK_DIR=${MODEL_WORK_DIR:=`pwd`} - $ pushd $MODEL_WORK_DIR - - $ git clone https://github.com/tensorflow/models.git tf_models - $ cd tf_models - $ git checkout 6ff0a53f81439d807a78f8ba828deaea3aaaf269 + # We going to use a branch based on older version of the tensorflow model repo. + # Since, we need to to use logs utils on that branch, which were removed from + # the latest master. + $ git clone https://github.com/tensorflow/models.git tensorflow-models + $ cd tensorflow-models + $ git fetch origin pull/7461/head:wide-deep-tf2 + $ git checkout wide-deep-tf2 ``` 2. Download and extract the pre-trained model. ``` - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/wide_deep_fp32_pretrained_model.tar.gz + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.tar.gz $ tar -xzvf wide_deep_fp32_pretrained_model.tar.gz ``` @@ -32,7 +32,6 @@ This repo has the launch script for running the model, which we will use in the next step. ``` - $ cd $MODEL_WORK_DIR $ git clone https://github.com/IntelAI/models.git ``` 4. Download and preprocess the [income census data](https://archive.ics.uci.edu/ml/datasets/Census+Income) by running @@ -42,43 +41,51 @@ use in the next step. using `--http_proxy` and `--https_proxy` arguments. ``` $ cd models - $ python benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py --data_dir $MODEL_WORK_DIR/widedeep_dataset + $ python benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py --data_dir /home//widedeep_dataset ``` 5. How to run * Running the model in online inference mode, set `--batch-size` = `1` ``` - $ cd $MODEL_WORK_DIR/models/benchmarks + $ cd /home//models/benchmarks - $ python launch_benchmark.py \ - --framework tensorflow \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --precision fp32 \ - --mode inference \ - --model-name wide_deep \ - --batch-size 1 \ - --data-location $MODEL_WORK_DIR/widedeep_dataset \ - --checkpoint $MODEL_WORK_DIR/tf_models/wide_deep_fp32_pretrained_model \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --verbose + $ python launch_benchmark.py \ + --framework tensorflow \ + --model-source-dir /home//path/to/tensorflow-models \ + --precision fp32 \ + --mode inference \ + --model-name wide_deep \ + --batch-size 1 \ + --data-location /home//widedeep_dataset \ + --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --verbose ``` + The three locations used (model-source-dir, data-location, checkpoint) here, + works better with docker if they are located in the local disk. The locations + should be pointed as absolute path. + * Running the model in batch inference mode, set `--batch-size` = `1024` ``` - $ cd $MODEL_WORK_DIR/models/benchmarks + $ cd /home//models/benchmarks - $ python launch_benchmark.py \ - --framework tensorflow \ - --model-source-dir $MODEL_WORK_DIR/tf_models \ - --precision fp32 \ - --mode inference \ - --model-name wide_deep \ - --batch-size 1024 \ - --data-location $MODEL_WORK_DIR/widedeep_dataset \ - --checkpoint $MODEL_WORK_DIR/tf_models/wide_deep_fp32_pretrained_model \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --verbose + $ python launch_benchmark.py \ + --framework tensorflow \ + --model-source-dir /home//path/to/tensorflow-models \ + --precision fp32 \ + --mode inference \ + --model-name wide_deep \ + --batch-size 1024 \ + --data-location /home//path/to/dataset \ + --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --verbose ``` + The three locations used (model-source-dir, data-location, checkpoint) here, + works better with docker if they are located in the local disk. The locations + should be pointed as absolute path. + 6. The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes should look @@ -102,10 +109,5 @@ use in the next step. search path: /workspace/benchmarks/*/tensorflow/wide_deep/inference/fp32/model_init.py Using model init: /workspace/benchmarks/classification/tensorflow/wide_deep/inference/fp32/model_init.py PYTHONPATH: :/workspace/models - RUNCMD: python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --model-name=wide_deep --precision=fp32 --mode=inference --model-source-dir=/workspace/tf_models --intelai-models=/workspace/intelai_models --batch-size=1 --data-location=/dataset --checkpoint=/checkpoints + RUNCMD: python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --model-name=wide_deep --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --data-location=/dataset --checkpoint=/checkpoints ``` - -7. To return to where you started from: -``` -$ popd -``` \ No newline at end of file diff --git a/benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py b/benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py index 2886de396..f1a41cbee 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py +++ b/benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py @@ -41,6 +41,7 @@ def download_and_clean_file(filename, url): """Downloads data from url, and makes changes to match the CSV format.""" proxies = {} + print(filename) if ARGS.http_proxy: proxies['http'] = ARGS.http_proxy if ARGS.https_proxy: @@ -48,15 +49,15 @@ def download_and_clean_file(filename, url): try: request = requests.get(url, stream=True, proxies=proxies) request.raise_for_status() - with open(filename, 'w') as eval_file: + with open(filename, 'wb') as eval_file: for line in request.iter_lines(): line = line.strip() - line = line.replace(', ', ',') - if not line or ',' not in line: + line = line.replace(b', ', b',') + if not line or b',' not in line: continue if line[-1] == '.': line = line[:-1] - line += '\n' + line += b'\n' eval_file.write(line) except requests.exceptions.HTTPError as err: print(err) diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md index 9ffd3b5f0..b848e0953 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md @@ -25,7 +25,7 @@ following modes/precisions: Note: The dataset does not contain the eval.txt file required for measuring model accuracy. So, download the evaluation dataset for accuracy measurement from https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/eval.csv - + Download the train dataset from https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/train.csv 3. Pre-process the downloaded dataset to tfrecords using [preprocess_csv_tfrecords.py](/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py) Copy the eval.csv and test.csv into your current working directory (i.e. root of models repo) and launch @@ -76,7 +76,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. ``` $ cd $MODEL_WORK_DIR - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/wide_deep_int8_pretrained_model.pb + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_int8_pretrained_model.pb ``` 2. Run Accuracy test @@ -93,15 +93,15 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 1000 \ --socket-id 0 \ --accuracy-only \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph $MODEL_WORK_DIR/wide_deep_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/eval_preprocessed_eval.tfrecords + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords ``` 3. Run Performance test * Running in online inference mode, set `--batch-size 1` - + ``` $ cd $MODEL_WORK_DIR/models/benchmarks @@ -113,47 +113,87 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph $MODEL_WORK_DIR/wide_deep_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/eval_preprocessed_eval.tfrecords \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ --num-intra-threads 1 --num-inter-threads 1 --num-cores 1 \ -- num_omp_threads=1 ``` * Running in batch inference mode, set `--batch-size 512` - ``` - $ cd $MODEL_WORK_DIR/models/benchmarks - - $ python launch_benchmark.py \ - --model-name wide_deep_large_ds \ - --precision int8 \ - --mode inference \ - --framework tensorflow \ - --benchmark-only \ - --batch-size 512 \ - --socket-id 0 \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ - --in-graph $MODEL_WORK_DIR/wide_deep_int8_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/eval_preprocessed_eval.tfrecords \ - --num-intra-threads 28 --num-inter-threads 1 --num-cores 28 \ - -- num_omp_threads=16 - ``` - * The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes - should look something like this: - ``` - -------------------------------------------------- - Total test records : 2000000 - Batch size is : 512 - Number of batches : 3907 - Classification accuracy (%) : 77.6405 - No of correct predictions : 1552720 - Inference duration (seconds) : 4.2531 - Avergare Latency (ms/batch) : 1.1173 - Throughput is (records/sec) : 696784.187 - -------------------------------------------------- - Ran inference with batch size 512 - Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_int8_20190225_061815.log - ``` + Case 1 : Disabling `use_parallel_batches` option. In this case the batches are inferred in sequential order. By default `use_parallel_batches` is disabled. Kmp variables can also be set by using the arguments shown below. + + + ``` + $ cd $MODEL_WORK_DIR/models/benchmarks + + $ python launch_benchmark.py \ + --model-name wide_deep_large_ds \ + --precision int8 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 512 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph $MODEL_WORK_DIR/wide_deep_int8_pretrained_model.pb \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ + --num-intra-threads 28 --num-inter-threads 1 --num-cores 28 \ + -- num_omp_threads=16 kmp_block_time=0 kmp_settings=1 kmp_affinity="noverbose,warnings,respect,granularity=core,none" + ``` + * The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes + should look something like this: + + ``` + -------------------------------------------------- + Total test records : 2000000 + Batch size is : 512 + Number of batches : 3907 + Classification accuracy (%) : 77.636 + Inference duration (seconds) : 36.2106 + Average Latency (ms/batch) : 3.0946 + Throughput is (records/sec) : 165446.881 + -------------------------------------------------- + + Ran inference with batch size 512 + Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_int8_20190225_061815.log + ``` + + Case 2 : Enabling `use_parallel_batches` option. In this case multiples batches are inferred in parallel. Number of batches to be executed in parallel can be given by argument num_parallel_batches. + + ``` + $ cd $MODEL_WORK_DIR/models/benchmarks + + $ python launch_benchmark.py \ + --model-name wide_deep_large_ds \ + --precision int8 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 512 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph $MODEL_WORK_DIR/wide_deep_int8_pretrained_model.pb \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ + --num-intra-threads 1 --num-inter-threads 28 --num-cores 28 \ + -- num_omp_threads=1 use_parallel_batches=True num_parallel_batches=28 kmp_block_time=0 kmp_settings=1 kmp_affinity="noverbose,warnings,respect,granularity=core,none" + ``` + + * The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes + should look something like this: + + ``` + -------------------------------------------------- + Total test records : 2000000 + Batch size is : 512 + Number of batches : 3907 + Inference duration (seconds) : 1.9365 + Average Latency (ms/batch) : 13.8807 + Throughput is (records/sec) : 1032799.599 + -------------------------------------------------- + Ran inference with batch size 512 + Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_int8_20190225_061815.log + ``` 4. To return to where you started from: ``` $ popd @@ -165,7 +205,7 @@ $ popd ``` $ cd $MODEL_WORK_DIR - $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/wide_deep_fp32_pretrained_model.pb + $wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.pb ``` 2. Run Accuracy test @@ -174,7 +214,7 @@ $ popd ``` $ cd $MODEL_WORK_DIR/models/benchmarks - $ python launch_benchmark.py \ + $ python launch_benchmark.py --model-name wide_deep_large_ds \ --precision fp32 \ --mode inference \ @@ -182,9 +222,9 @@ $ popd --batch-size 1000 \ --socket-id 0 \ --accuracy-only \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph $MODEL_WORK_DIR/wide_deep_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/dataset_preprocessed_eval.tfrecords + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords ``` 3. Run Performance test @@ -194,7 +234,7 @@ $ popd ``` $ cd $MODEL_WORK_DIR/models/benchmarks - $ python launch_benchmark.py \ + $ python launch_benchmark.py --model-name wide_deep_large_ds \ --precision fp32 \ --mode inference \ @@ -202,46 +242,81 @@ $ popd --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph $MODEL_WORK_DIR/wide_deep_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/eval_preprocessed_eval.tfrecord \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ --num-intra-threads 1 --num-inter-threads 1 --num-cores 1 \ -- num_omp_threads=1 ``` * Running in batch inference mode, set `--batch-size 512` - ``` - $ cd $MODEL_WORK_DIR/models/benchmarks - $ python launch_benchmark.py \ - --model-name wide_deep_large_ds \ - --precision fp32 \ - --mode inference \ - --framework tensorflow \ - --benchmark-only \ - --batch-size 512 \ - --socket-id 0 \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --in-graph $MODEL_WORK_DIR/wide_deep_fp32_pretrained_model.pb \ - --data-location $MODEL_WORK_DIR/eval_preprocessed_eval.tfrecords \ - --num-intra-threads 28 --num-inter-threads 1 --num-cores 28 \ - -- num_omp_threads=20 - ``` - * The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes - should look something like this: - ``` - -------------------------------------------------- - Total test records : 2000000 - Batch size is : 512 - Number of batches : 3907 - Classification accuracy (%) : 77.6693 - No of correct predictions : 1553386 - Inference duration (seconds) : 5.6724 - Avergare Latency (ms/batch) : 1.4902 - Throughput is (records/sec) : 343560.261 - -------------------------------------------------- - Ran inference with batch size 512 - Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_fp32_20190225_062206.log - ``` + Case 1 : Disabling `use_parallel_batches` option + + ``` + $ cd $MODEL_WORK_DIR/models/benchmarks + + $ python launch_benchmark.py + --model-name wide_deep_large_ds \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 512 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph $MODEL_WORK_DIR/wide_deep_fp32_pretrained_model.pb \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ + --num-intra-threads 28 --num-inter-threads 1 --num-cores 28 \ + -- num_omp_threads=20 kmp_block_time=0 kmp_settings=1 kmp_affinity="noverbose,warnings,respect,granularity=core,none" + ``` + -------------------------------------------------- + Total test records : 2000000 + Batch size is : 512 + Number of batches : 3907 + Classification accuracy (%) : 77.6693 + No of correct predictions : 1553386 + Inference duration (seconds) : 7.7408 + Average Latency (ms/batch) : 2.0337 + Throughput is (records/sec) : 251755.95 + -------------------------------------------------- + + Case 2 : Enabling `use_parallel_batches` option. + + ``` + cd /home//models/benchmarks + + python launch_benchmark.py + --model-name wide_deep_large_ds \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 512 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph $MODEL_WORK_DIR/wide_deep_fp32_pretrained_model.pb \ + --data-location $MODEL_WORK_DIR/models/eval_preprocessed_eval.tfrecords \ + --num-intra-threads 1 --num-inter-threads 28 --num-cores 28 \ + -- num_omp_threads=1 use_parallel_batches=True num_parallel_batches=28 kmp_block_time=0 kmp_settings=1 kmp_affinity="noverbose,warnings,respect,granularity=core,none" + ``` + + * The log file is saved to the value of `--output-dir`. The tail of the log output when the script completes + should look something like this: + + ``` + -------------------------------------------------- + Total test records : 2000000 + Batch size is : 512 + Number of batches : 3907 + Classification accuracy (%) : 77.6693 + No of correct predictions : 1553386 + Inference duration (seconds) : 3.362 + Average Latency (ms/batch) : 24.0985 + Throughput is (records/sec) : 594891.931 + -------------------------------------------------- + Ran inference with batch size 512 + Log location outside container: {--output-dir value}/benchmark_wide_deep_large_ds_inference_fp32_20190225_062206.log + ``` 4. To return to where you started from: ``` @@ -268,6 +343,6 @@ $ popd --framework tensorflow \ --batch-size 512 \ --data-location /root/dataset \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py index a242ab30b..526a5d574 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py @@ -16,8 +16,6 @@ # limitations under the License. # -# - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -31,29 +29,43 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - # Set the num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads(num_inter_threads=platform_util.num_cores_per_socket, - num_intra_threads=1) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) - + def parse_args(self): parser = argparse.ArgumentParser() parser.add_argument("--num_omp_threads", dest='num_omp_threads', - type=str, default="1", + type=str, default=None, help="number of omp threads") + parser.add_argument("--use_parallel_batches", dest='use_parallel_batches', + type=str, default="False", + help="Enable to batches in parallel") + parser.add_argument("--num_parallel_batches",dest='num_parallel_batches', default="1", + type=str, help="num of parallel batches.Default is 1") + parser.add_argument('--kmp_block_time', dest='kmp_block_time', + help='number of kmp block time.', + type=str, default=None) + parser.add_argument('--kmp_affinity', dest='kmp_affinity', + help='kmp affinity value', + type=str, default=None) + parser.add_argument('--kmp_settings', dest='kmp_settings', + help='kmp settings', + type=str, default=None) self.args = parser.parse_args(self.custom_args, namespace=self.args) + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path,kmp_settings=str(self.args.kmp_settings),kmp_blocktime=str(self.args.kmp_block_time),kmp_affinity=str(self.args.kmp_affinity)) def run_benchmark(self): - benchmark_script = os.path.join(self.args.intelai_models, - self.args.mode, "inference.py") - + enable_parallel_batches = getattr(self.args, 'use_parallel_batches') script_args_list = ["input_graph", "batch_size", "num_inter_threads", "num_intra_threads", "accuracy_only", "data_location", "num_omp_threads"] + if enable_parallel_batches=='True': + benchmark_script = os.path.join(self.args.intelai_models, + self.args.mode, "parallel_inference.py") + script_args_list.append("num_parallel_batches") + else: + benchmark_script = os.path.join(self.args.intelai_models, + self.args.mode, "inference.py") command_prefix = self.get_command_prefix(-1) if self.args.socket_id != -1 and self.args.num_cores != -1: command_prefix = command_prefix + " numactl --physcpubind=0-{} --membind={} ".\ diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py index f3c22ac6c..1722c167f 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py @@ -16,8 +16,6 @@ # limitations under the License. # -# - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -31,29 +29,43 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - # Set the num_inter_threads and num_intra_threads - self.set_num_inter_intra_threads(num_inter_threads=platform_util.num_cores_per_socket, - num_intra_threads=1) - - # Set KMP env vars, if they haven't already been set - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") - self.set_kmp_vars(config_file_path) def parse_args(self): parser = argparse.ArgumentParser() parser.add_argument("--num_omp_threads", dest='num_omp_threads', - type=str, default="1", + type=str, default=None, help="number of omp threads") + parser.add_argument("--use_parallel_batches", dest='use_parallel_batches', + type=str, default="False", + help="Enable to run batches in parallel") + parser.add_argument("--num_parallel_batches",dest='num_parallel_batches', default="1", + type=str, help="num of parallel batches.Default is 1") + parser.add_argument('--kmp_block_time', dest='kmp_block_time', + help='number of kmp block time.', + type=str, default=None) + parser.add_argument('--kmp_affinity', dest='kmp_affinity', + help='kmp affinity value', + type=str, default=None) + parser.add_argument('--kmp_settings', dest='kmp_settings', + help='kmp settings', + type=str, default=None) self.args = parser.parse_args(self.custom_args, namespace=self.args) + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path,kmp_settings=str(self.args.kmp_settings),kmp_blocktime=str(self.args.kmp_block_time),kmp_affinity=str(self.args.kmp_affinity)) def run_benchmark(self): - benchmark_script = os.path.join(self.args.intelai_models, - self.args.mode, "inference.py") - + enable_parallel_batches = getattr(self.args, 'use_parallel_batches') script_args_list = ["input_graph", "batch_size", "num_inter_threads", "num_intra_threads", "accuracy_only", "data_location", "num_omp_threads"] + if enable_parallel_batches=='True': + benchmark_script = os.path.join(self.args.intelai_models, + self.args.mode, "parallel_inference.py") + script_args_list.append("num_parallel_batches") + else: + benchmark_script = os.path.join(self.args.intelai_models, + self.args.mode, "inference.py") command_prefix = self.get_command_prefix(-1) if self.args.socket_id != -1 and self.args.num_cores != -1: command_prefix = command_prefix + " numactl --physcpubind=0-{} --membind={} ".\ diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/training/fp32/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/training/fp32/model_init.py index 24f20dcae..3254dfca7 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/training/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/training/fp32/model_init.py @@ -39,7 +39,6 @@ def parse_args(self): help="number of train steps") self.args = parser.parse_args(self.custom_args, namespace=self.args) - def run_benchmark(self): benchmark_script = os.path.join(self.args.intelai_models, self.args.mode, "train.py") diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py b/benchmarks/reinforcement/__init__.py similarity index 93% rename from benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py rename to benchmarks/reinforcement/__init__.py index 13ab7d92e..199f25228 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py +++ b/benchmarks/reinforcement/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/reinforcement/tensorflow/__init__.py b/benchmarks/reinforcement/tensorflow/__init__.py new file mode 100644 index 000000000..e67e74189 --- /dev/null +++ b/benchmarks/reinforcement/tensorflow/__init__.py @@ -0,0 +1,17 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/benchmarks/reinforcement/tensorflow/minigo/README.md b/benchmarks/reinforcement/tensorflow/minigo/README.md new file mode 100644 index 000000000..bc86793d8 --- /dev/null +++ b/benchmarks/reinforcement/tensorflow/minigo/README.md @@ -0,0 +1,265 @@ +# MiniGo + +This document has instructions for how to run MiniGo for the +following modes/precisions: +* [FP32 training](#fp32-training-instructions) + +Instructions and scripts for model training and inference +for other precisions are coming later. + +## FP32 training Instructions +0. Minigo project will install a specific Tensorflow version, which may change the python environment. Thus, we recommend to use `conda` or `virtualenv` to create a separate environment for running Minigo project. + +1. Clone the `minigo` repository with the specified SHA, +since we are using an specified version of the models repo for +MiniGo. +The minigo repo will be used for running training as well as +download required files from Google Cloud Storage. + +``` +$ git clone --single-branch https://github.com/tensorflow/minigo.git --branch mlperf.0.6 +$ cd minigo +$ git checkout 60ecb12f29582227a473fdc7cd09c2605f42bcd6 +``` + +2. Obtain Minigo `checkpoint` and `target` data from Google Cloud Storage + +2.1 (Optional) Install gsutil + +If you have installed `gsutil` before, please skip this step. You may type command `gsutil --version` to +check whether the `gsutil` has already been installed + +```bash +$ wget https://storage.googleapis.com/pub/gsutil.zip +$ tar xfz gsutil.tar.gz -C $HOME +$ export PATH=${PATH}:$HOME/gsutil +``` + +2.2 Download the `checkpoint` and `target` folders and copy them to the `minigo/mlperf` directory +```bash +# under minigo directory +$ gsutil cp -r gs://minigo-pub/ml_perf/0.6/checkpoint ml_perf/ +# organize target folders +$ cd ml_perf/target +$ mkdir 9 +$ mv target* ./9 +$ cd ../../ +# organize checkpoint folders +$ gsutil cp -r gs://minigo-pub/ml_perf/0.6/target ml_perf/ +$ cd ml_perf/checkpoint/ +$ mv ./work_dir/work_dir/* ./work_dir/ +$ rm -rf ./work_dir/work_dir/ +$ mkdir 9 +$ mv ./golden_chunks ./9 +$ mv ./work_dir ./9 +$ cd ../../../ + +``` + +The organized `checkpoint` folders are shown below. + +``` +. +└── 9 + ├── golden_chunks + │   ├── 000000-000000.tfrecord.zz + │   ├── 000000-000001.tfrecord.zz + │   ├── 000000-000002.tfrecord.zz + │   ├── 000000-000003.tfrecord.zz + │   ├── 000000-000004.tfrecord.zz + │   ├── 000000-000005.tfrecord.zz + │   ├── 000000-000006.tfrecord.zz + │   ├── 000000-000007.tfrecord.zz + │   ├── 000000-000008.tfrecord.zz + │   └── 000000-000009.tfrecord.zz + └── work_dir + ├── checkpoint + ├── model.ckpt-9383.data-00000-of-00001 + ├── model.ckpt-9383.index + └── model.ckpt-9383.meta +``` + +The organized `target` folders are shown below. +``` +. +└── 9 + ├── target.data-00000-of-00001 + ├── target.index + └── target.meta +``` + +3. Install the MPI kits + +3.1 Install Intel MPI + +If you have installed Intel MPI before, please skip this step. + +Download and install the [Intel(R) MPI Library for Linux](https://software.intel.com/en-us/mpi-library/choose-download/linux). +Once you have the l_mpi_2019.3.199.tgz downloaded, unzip it into /home/\/l_mpi directory. + +If you want to make the silent installation. **Change the value of "ACCEPT_EULA" to "accept" +in /home/\/l_mpi/l_mpi_2019.3.199/silent.cfg**, before start the silent installation. +Run `sh install.sh --silent silent.cfg` to complete the installation. The software is installed by default to "/opt/intel" location. + +Otherwise, you can make the custom installation. Run `sh install.sh` and make the custom options. + +```bash +$ tar -zxvf l_mpi_2019.3.199.tgz -C /home//l_mpi +$ cd /home//l_mpi/l_mpi_2019.3.199 + +# 1. Silent installation +# change the value of "ACCEPT_EULA" to "accept" +$ vim silent.cfg +$ sh install.sh --silent silent.cfg + +# 2. Custom installation +$ sh install.sh +# Follow the instructions and complete the installation +``` + +3.2 Install mpi4py +```bash +# set the necessary environmental variables +$ source //intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 +$ export PATH=//intel/impi/2019.3.199/intel64/bin/:$PATH +$ export MPICC=//intel/impi/2019.3.199/intel64/bin/mpiicc +$ export CC=icc +$ export CPPFLAGS=-DOMPI_WANT_MPI_INTERFACE_WARNING=0 +# install the mpi4py package +$ pip install mpi4py +``` + + +4. Install essential tools + +Check you have installed all the tools before start training. + +4.1 Install gcc + +The project has been tested on gcc 8.2.0. We recommend to run the project with gcc > 7.2.0. +``` +$ git clone -b releases/gcc-8.2.0 https://github.com/gcc-mirror/gcc.git +$ cd gcc +$ ./configure --prefix=/path/to/gcc +$ make $$ make install +$ export PATH=/path/to/gcc/bin:$PATH +$ export LD_LIBRARY_PATH=/path/to/gcc/lib:$LD_LIBRARY_PATH + +``` + +4.2 Install bazel 0.22.0 + +Currently, only bazel release 0.22.0 works Minigo training. + +``` +$ wget https://github.com/bazelbuild/bazel/releases/download/0.22.0/bazel-0.22.0-installer-linux-x86_64.sh +$ chmod 755 bazel-0.22.0-installer-linux-x86_64.sh +$ ./bazel-0.22.0-installer-linux-x86_64.sh --prefix=//bazel +$ rm /root/.bazelrc +$ export PATH=//bazel/bin:$PATH +``` + +4.3 Install zlib + +``` +$ wget https://www.zlib.net/zlib-1.2.11.tar.gz +$ tar -xzf https://www.zlib.net/zlib-1.2.11.tar.gz +$ cd zlib-1.2.11 +$ ./configure --prefix=/path/to/zlib +$ make $$ make install +$ export C_INCLUDE_PATH=/path/to/zlib/include:$C_INCLUDE_PATH +$ export CPLUS_INCLUDE_PATH=/path/to/zlib/include:$CPLUS_INCLUDE_PATH +$ export LD_LIBRARY_PATH=/path/to/zlib/lib:$LD_LIBRARY_PATH + +``` + +5. Clone the [intelai/models](https://github.com/intelai/models) repository. +This repository has the launch script for running the model, which we will use in the next step. + +```bash +$ git clone https://github.com/IntelAI/models.git +``` + +6. Environment variables setting +``` +$ source //intel/compilers_and_libraries/linux/bin/compilervars.sh intel64 +$ export LD_LIBRARY_PATH=//intel/compilers_and_libraries_2019.3.199/linux/mpi/intel64/libfabric/lib:$LD_LIBRARY_PATH +$ export FI_PROVIDER=tcp + +``` + +7. Next, navigate to the `benchmarks` directory of the +[intelai/models](https://github.com/intelai/models) repo that was just +cloned in the previous step. MiniGo can be run for training. + + +7.1 Single-node training +You may run the scripts below to execute the single-node training. The flag `model-source-dir` is the repository cloned in step 1. The flag `steps` sets the num of iterations to run (1 training, 1 selfplay and 1 eval per step). +The default value for `step` is 30. The flag `quantization` sets to apply int8 quantization or not and its default value is True. + +``` +$ cd /home//models/benchmarks +$ python launch_benchmark.py \ + --model-source-dir /home//minigo \ + --model-name minigo \ + --framework tensorflow \ + --precision fp32 \ + --mode training \ + -- steps=30 quantization=True + +``` + +7.2 Multi-node training (normal mode) + +First, prepare `node_list.txt` to contain all node addresses. Each line for a single ip address. Then Copy the `node_list.txt` file to the `benchmarks` directory of the [intelai/models]. + +``` +# file node_list.txt +192.168.30.81 +192.168.30.82 +192.168.30.83 +192.168.30.84 +192.168.30.85 + +# Caution: no blank space after ip address +# For the example node_list.txt +# cat node_list.txt | wc -l => 5 + +``` + +Second, the host node where the program is launched must be able to SSH to all other hosts without any prompts. Verify that you can ssh to every other server without entering a password. To learn more about setting up passwordless authentication, please see [this page](http://www.linuxproblem.org/art_9.html). +Also ensure that port 52175 on each node is not occupied by any other process or blocked by firewall. + +Third, add the `multi_node` flag to specify the distributed training, and the `num-train-nodes` flag to specify the number of training nodes. The evaluation nodes and the selfplay nodes share the rest of nodes given in `node_list.txt`. +``` +$ cd /home//models/benchmarks +$ python launch_benchmark.py \ + --model-source-dir /home//minigo \ + --model-name minigo \ + --framework tensorflow \ + --precision fp32 \ + --mode training \ + -- steps=30 quantization=True num-train-nodes=2 multi_node=True + +``` + +Fourth, if you do run on a large scale system (typically more than 32 nodes), add the large_scale flag to enable large scale mode, and num-eval-nodes flag to specify number of evaluation nodes. The number of selfplay nodes are the rest of nodes given in node_list.txt. A typical ratio of train, eval and selfplay nodes are 8 : 4 : 48. There is also an additional node for orchestrating, so a total 8+4+48+1=61 nodes needs to be used to achieve this ratio. +``` +$ cd /home//models/benchmarks +$ python launch_benchmark.py \ + --model-source-dir /home//minigo \ + --model-name minigo \ + --framework tensorflow \ + --precision fp32 \ + --mode training \ + -- steps=30 quantization=True num-train-nodes=8 num-eval-nodes=4 multi_node=True large_scale=True + +``` + +8. Generally, the model convergences in ~20 steps (average of 10 runs). +The log files are saved in the value of `/home//minigo/results/$HOSTNAME`. +Below is a sample of outputs in `/home//minigo/results/$HOSTNAME`: + +``` + data eval.log flags models mpi rl_loop.log selfplay.log sgf train.log work_dir +``` diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/__init__.py b/benchmarks/reinforcement/tensorflow/minigo/__init__.py similarity index 99% rename from benchmarks/object_detection/tensorflow/faster_rcnn/inference/__init__.py rename to benchmarks/reinforcement/tensorflow/minigo/__init__.py index c4fdb7d61..8cb0c8d8d 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/__init__.py +++ b/benchmarks/reinforcement/tensorflow/minigo/__init__.py @@ -15,5 +15,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -# diff --git a/benchmarks/reinforcement/tensorflow/minigo/requirements.txt b/benchmarks/reinforcement/tensorflow/minigo/requirements.txt new file mode 100644 index 000000000..ac55193ad --- /dev/null +++ b/benchmarks/reinforcement/tensorflow/minigo/requirements.txt @@ -0,0 +1,18 @@ +absl-py +autopep8>=1.3 +fire +google.cloud.logging +google.cloud.bigtable +grpcio-tools +keras +numpy>=1.14.0 +protobuf +sgf==0.5 +six +tqdm>=4.17 +pyasn1>=0.4.1 +setuptools>=34.0.0 + +oauth2client==4.1 + +horovod==0.15.1 diff --git a/benchmarks/reinforcement/tensorflow/minigo/training/__init__.py b/benchmarks/reinforcement/tensorflow/minigo/training/__init__.py new file mode 100644 index 000000000..e67e74189 --- /dev/null +++ b/benchmarks/reinforcement/tensorflow/minigo/training/__init__.py @@ -0,0 +1,17 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py b/benchmarks/reinforcement/tensorflow/minigo/training/fp32/__init__.py similarity index 93% rename from models/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py rename to benchmarks/reinforcement/tensorflow/minigo/training/fp32/__init__.py index 13ab7d92e..199f25228 100644 --- a/models/object_detection/tensorflow/faster_rcnn/inference/int8/__init__.py +++ b/benchmarks/reinforcement/tensorflow/minigo/training/fp32/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/reinforcement/tensorflow/minigo/training/fp32/model_init.py b/benchmarks/reinforcement/tensorflow/minigo/training/fp32/model_init.py new file mode 100644 index 000000000..ad1690fbf --- /dev/null +++ b/benchmarks/reinforcement/tensorflow/minigo/training/fp32/model_init.py @@ -0,0 +1,78 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018-2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from common.base_model_init import BaseModelInitializer + +import os + + +import argparse + + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for minigo""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + arg_parser = argparse.ArgumentParser(description='Parse additional args') + + arg_parser.add_argument( + "--quantization", help="quantization flag", + dest="quantization", default="False") + arg_parser.add_argument( + "--large-scale", help="train on large scale", + dest="large_scale", default="False") + arg_parser.add_argument( + "--num-train-nodes", help="number of train nodes", + dest="num_train_nodes", default=0, type=int) + arg_parser.add_argument( + "--num-eval-nodes", help="number of evaluation nodes", + dest="num_eval_nodes", default=0, type=int) + arg_parser.add_argument( + "--multi-node", help="train on large scale", + dest="multi_node", default="False") + + self.additional_args, unknown_args = arg_parser.parse_known_args(custom_args) + + if self.additional_args.large_scale == "True" and self.additional_args.multi_node == "True": + # multi-node training mode with large scale + self.cmd = "./run_mn.sh " + self.cmd += " {0}".format(self.additional_args.num_train_nodes) + self.cmd += " {0}".format(self.additional_args.num_eval_nodes) + self.cmd += " {0}".format(self.additional_args.quantization) + elif self.additional_args.large_scale == "False" and self.additional_args.multi_node == "True": + # multi-node training mode + self.cmd = "./run_mn.sh " + self.cmd += " {0}".format(self.additional_args.num_train_nodes) + self.cmd += " {0}".format(self.additional_args.quantization) + else: + # single-node training mode + self.cmd = "./run.sh " + self.cmd += " {0}".format(self.additional_args.quantization) + + def run(self): + org_path = os.getcwd() + os.chdir(self.args.model_source_dir) + self.run_command(self.cmd) + os.chdir(org_path) diff --git a/docs/README.md b/docs/README.md index 401cdf150..82a602699 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,7 +10,7 @@ * Inference with Intel® Optimization of Tensorflow: * [Image Recognition](/docs/image_recognition/tensorflow/Tutorial.md) (ResNet50, ResNet101, and InceptionV3) - * [Object Detection](/docs/object_detection/tensorflow/Tutorial.md) (SSD-VGG16) + * [Language Translation](/docs/language_translation/tensorflow/Tutorial.md) (Transformer-LT) * [Recommendation Systems](/docs/recommendation/tensorflow/Tutorial.md) (Wide and Deep) * Model Quantization and Optimization * [Image Recognition](/docs/image_recognition/quantization/Tutorial.md) (ResNet50) diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index e00be9a51..4bcb5db7a 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -31,6 +31,9 @@ Below the general description is an [index of links](#model-scripts-for-tensorfl * InceptionV3: [init](/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py) | [inference](/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py) | [preprocessing](/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py) +* Language Translation + * Transformer-LT: [init](/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py) | + [inference](/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py) * Recommendation Systems * Wide and Deep: [init](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py) | [inference](/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py) | @@ -51,9 +54,8 @@ optional arguments: -r [MODEL_SOURCE_DIR], --model-source-dir [MODEL_SOURCE_DIR] Specify the models source directory from your local machine - -p {fp32,int8,bfloat16}, --precision {fp32,int8,bfloat16} - Specify the model precision to use: fp32, int8, or - bfloat16 + -p {fp32,int8}, --precision {fp32,int8} + Specify the model precision to use: fp32, int8 -mo {training,inference}, --mode {training,inference} Specify the type training or inference -m MODEL_NAME, --model-name MODEL_NAME @@ -71,6 +73,8 @@ optional arguments: used when this value is set. If used in conjunction with --num-cores, all cores will be allocated on the single socket. + --num-instances NUM_INSTANCES + Specify the number of instances to run. -n NUM_CORES, --num-cores NUM_CORES Specify the number of cores to use. If the parameter is not specified or is -1, all cores will be used. @@ -161,7 +165,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --volume /home//custom_folder_1:/custom_folder_1 \ --volume /home//custom_folder_2:/custom_folder_2 ``` @@ -198,7 +202,7 @@ Below is an example showing how to use the `--debug` flag: --batch-size=1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --debug # ls diff --git a/docs/image_recognition/quantization/Tutorial.md b/docs/image_recognition/quantization/Tutorial.md index 72409a1db..f383e20ce 100644 --- a/docs/image_recognition/quantization/Tutorial.md +++ b/docs/image_recognition/quantization/Tutorial.md @@ -1,11 +1,9 @@ -# Image Recognition Model Optimization and Quantization with ResNet50 +# Image Recognition Model Optimization and Quantization with ResNet50 and ResNet50v1.5 Content: * [Goal](#goal) * [Prerequisites](#prerequisites) -* [Install and Build TensorFlow Tools](#install-and-build-tensorflow-tools) -* [Floating point 32-bits Model Optimization](#fp32-model-optimization) -* [Floating point 32-bits Model Quantization to 8-bits Precision](#fp32-model-quantization-to-int8-precision) +* [Floating point 32-bits Model Quantization to 8-bits Precision](#floating-point-32-bits-model-quantization-to-8-bits-precision) * [Performance Evaluation](#performance-evaluation) ## Goal @@ -20,214 +18,164 @@ Usually, there will be some loss in performance, but it has to be within the [ac More resources: [Post-training quantization for mobile and IOT](https://www.tensorflow.org/lite/performance/post_training_quantization), and [TensorFlow graph transform tool user guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms). +This tutorial provides a step-by-step guide for ResNet50 and ResNet50v1.5 models conversion from Floating Point 32-bits (FP32) precision to 8-bits Precision (INT8) using [Intel® AI Quantization Tools for TensorFlow](https://github.com/IntelAI/tools). ## Prerequisites -* The ResNet50 model topology graph (the `model graph_def` as `.pb` or `.pbtxt` file) -and the `checkpoint files` are required to start this tutorial. +* The binary installed [Intel® optimizations for TensorFlow 2.1.0](https://pypi.org/project/intel-tensorflow/). +``` + $ pip install intel-tensorflow==2.1.0 + $ pip install intel-quantization +``` + +* The source release repository of [Model Zoo](https://github.com/IntelAI/models) for Intel® Architecture. +``` + $ cd ~ + $ git clone https://github.com/IntelAI/models.git +``` +* The source release repository of [Intel® AI Quantization Tools for TensorFlow](https://github.com/IntelAI/tools). +``` + $ cd ~ + $ git clone https://github.com/IntelAI/tools.git +``` + +* The frozen FP32 pre-trained model and the ImageNet dataset will be required for fully automatic quantization. +The TensorFlow models repository provides +[scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) +to download, process, and convert the ImageNet dataset to the TFRecord format. + -## Install and Build TensorFlow Tools +## Floating point 32-bits Model Quantization to 8-bits Precision -* Clone the TensorFlow tools repository, and follow the [instructions](https://github.com/IntelAI/tools/tree/master/tensorflow_quantization) -for how to build the TensorFlow tools using Docker. +In this section, we assume that the ImageNet dataset is available, and also you can download the FP32 pre-trained model as shown in [ResNet50](#resnet50) and [ResNet50v1.5](#resnet50v1.5) instructions. + +[Intel® AI Quantization Tools for TensorFlow](https://github.com/IntelAI/tools) repository provides a python script which fully automates the quantization steps for ResNet50 and ResNet50v1.5. +The quantization script requires the input parameters of pre-trained model, dataset path to match with your local environment. +And then simply specify the model name and execute the python script, you will get the fully automatic quantization conversion from FP32 to INT8. ``` -$ git clone https://github.com/IntelAI/tools.git + $ cd /home//tools + $ python api/examples/quantize_model_zoo.py \ + --model model \ + --in_graph /home//fp32_pretrained_model.pb \ + --out_graph /home//output.pb \ + --data_location /home//dataset \ + --models_zoo_location /home//models ``` -## FP32 Model Optimization -In this section, we assume that a trained model topology graph (the model graph_def as .pb or .pbtxt file) and the checkpoint files are available. - * The `model graph_def` is used in `step 1` to get the possible **input and output node names** of the graph. - * Both of the `model graph_def` and the `checkpoint file` are required in `step 2` to get the **model frozen graph**. - * The `model frozen graph`, **optimized** (based on the graph structure and operations, etc.) in `step 3`. - -We also assume that you are in the TensorFlow root directory (`/workspace/tensorflow/` inside the docker container) to execute the following steps. - -1. Find out the possible input and output node names of the graph - From the TensorFlow/tools root directory, run: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/summarize_graph \ - --in_graph=/workspace/tensorflow/resnet50.pbtxt \ - --print_structure=false >& model_nodes.txt - ``` - - In the `model_nodes.txt` file, look for the input and output nodes names such as: - ``` - Found 1 possible inputs: (name=input, type=float(1), shape=[?,224,224,3]) - Found 1 possible outputs: (name=predict, op=Softmax) - ``` -2. Freeze the graph where the checkpoint values are converted into constants in the graph: - * The `--input_graph` is the model topology graph_def, and the checkpoint file are required. - * The `--output_node_names` are obtained from step 1. - >Note: `--input_graph` can be in either binary `pb` or text `pbtxt` format, - and the `--input_binary` flag will be enabled or disabled accordingly. - ``` - $ python tensorflow/python/tools/freeze_graph.py \ - --output_graph= /workspace/tensorflow/resnet50_frozen_fp32_graph.pb \ - --input_binary=False \ - --output_binary=True \ - --input_checkpoint=/workspace/tensorflow/resnet50_model.ckpt \ - --in_graph=/workspace/tensorflow/resnet50.pbtxt \ - --output_node_names=‘predict’ - ``` -3. Optimize the FP32 frozen graph: - * Set the `--in_graph` to the path of the model frozen graph (from step 2), - * The `--inputs` and `--outputs` are the graph input and output node names (from step 1). - * `--transforms` to be set based on the model graph structure (to remove unused nodes, combine operations, etc). - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/tensorflow/freezed_resnet50.pb \ - --out_graph=/workspace/tensorflow/optimized_resnet50_fp32_graph.pb \ - --inputs='input'; \ - --outputs='predict'; \ - --transforms='strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) - fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' - ``` -4. [Evaluate the model performance](#accuracy-for-fp32-optimized-graph) using the the optimized graph `optimized_resnet50_fp32_graph.pb` and check the model accuracy. - -## FP32 Model Quantization to Int8 Precision -In this section, our objective is to quantize the output [FP32 optimized graph](#fp32-model-optimization) of the previous section -to `Int8` precision. -In case you did not do the FP32 model optimization by yourself, please follow the [instructions](/benchmarks/image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) to download the Intel optimized -ResNet50 pre-trained model graph. - -The following steps show how to convert the `FP32` model to `Int8` precision to reduce the model size: - -5. Convert the FP32-graph to a dynamic range Int8-graph using the output node names (from step 1) - - ``` - $ python tensorflow/tools/quantization/quantize_graph.py \ - --input=/workspace/tensorflow/optimized_resnet50_fp32_graph.pb \ - --output=/workspace/tensorflow/int8_dynamic_range_resnet50_graph.pb \ - --output_node_names='predict' \ - --mode=eightbit \ - --intel_cpu_eightbitize=True - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after the model quantization. +The `quantize_model_zoo.py` script executes the following steps to optimize and quantize a FP32 model: +1) Optimize fp32_frozen_graph based on the graph structure and operations, etc. +2) Quantize graph: The FP32-graph is converted to a dynamic range INT8 graph using the output node names. +3) Calibration: It converts the dynamic re-quantization range (`RequantizationRangeOp`) in the initially quantized graph to static (constants). +4) Fuse `RequantizeOp` with fused quantized convolutions, and generate the final +optimized INT8 graph. + +For tuning the pre-defined graph quantization parameters such as +(`INPUT_NODE_LIST`, `OUTPUT_NODE_LIST`, `EXCLUDED_OPS_LIST`, `EXCLUDED_NODE_LIST`, enable or disable `PER_CHANNEL_FLAG`), please check the [models.json](https://github.com/IntelAI/tools/blob/master/api/config/models.json) file, and the [quantization API documentation](https://github.com/IntelAI/tools/tree/master/api#integration-with-model-zoo-for-intel-architecture). + + +## ResNet50 + +* Download the FP32 ResNet50 pre-trained model to a location of your choice or as suggested: +``` + $ cd /home//tools/api/models/resnet50 + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb +``` +* Run the automatic quantization script with the input parameters of pre-trained model, dataset path to match with your local environment. +And then, you will get the quantized ResNet50 INT8 pre-trained model saved in `/home//tools/api/models/resnet50/resnet50_int8.pb` as specified. +``` + $ cd /home//tools + $ python api/examples/quantize_model_zoo.py \ + --model resnet50 \ + --in_graph /home//tools/api/models/resnet50/resnet50_fp32_pretrained_model.pb \ + --out_graph /home//tools/api/models/resnet50/resnet50_int8.pb \ + --data_location /home//imagenet \ + --models_zoo_location /home//models +``` + +* An example for the log output when the graph quantization run completes: +``` + Model Config: MODEL_NAME:resnet50 + Model Config: LAUNCH_BENCHMARK_PARAMS:{'LAUNCH_BENCHMARK_SCRIPT': 'benchmarks/launch_benchmark.py', 'LAUNCH_BENCHMARK_CMD': ['--model-name resnet50', '--framework tensorflow', '--precision int8', '--mode inference', '--batch-size 100', '--accuracy-only'], 'IN_GRAPH': '--in-graph {}', 'DATA_LOCATION': '--data-location {}'} + Model Config: QUANTIZE_GRAPH_CONVERTER_PARAMS:{'INPUT_NODE_LIST': ['input'], 'OUTPUT_NODE_LIST': ['predict'], 'EXCLUDED_OPS_LIST': [], 'EXCLUDED_NODE_LIST': [], 'PER_CHANNEL_FLAG': False} + Model Config: Supported models - ['resnet50', 'resnet50v1_5', 'resnet101', 'mobilenet_v1', 'ssd_mobilenet', 'ssd_resnet34', 'faster_rcnn', 'rfcn', 'inceptionv3'] + Inference Calibration Command: python /home//models/benchmarks/launch_benchmark.py --model-name resnet50 --framework tensorflow --precision int8 --mode inference --batch-size 100 --accuracy-only --data-location /home//imagenet --in-graph {} + ... - The log snippet for the dynamic range Int8 model accuracy: - ``` - ... - Processed 5100 images. (Top1 accuracy, Top5 accuracy) = (0.6665, 0.8506) - Processed 5200 images. (Top1 accuracy, Top5 accuracy) = (0.6683, 0.8523) - Processed 5300 images. (Top1 accuracy, Top5 accuracy) = (0.6698, 0.8538) - ... - ``` - -6. Convert from dynamic to static re-quantization range. -The following steps are to freeze the re-quantization range (also known as -calibration): - - In order to facilitate this section for the user, we attached a sample of the [`resnet50_min_max_log.txt` file](/docs/image_recognition/quantization/resnet50_min_max_log.txt). - In case you decided to use it then you can skip the first two steps `Insert the logging op` and `Generate calibration data`. - - * Insert the logging op: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/int8_resnet50_graph.pb \ - --out_graph=/workspace/quantization/logged_int8_resnet50.pb \ - --transforms='insert_logging(op=RequantizationRange, show_name=true, message="__requant_min_max:")' - ``` - - * Generate calibration data: - * **Generate a data subset of the ImageNet dataset for calibration**, follow [instructions](/benchmarks/image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) - and run inference for accuracy (using `--accuracy-only`, `--in-graph=/home//optimized_resnet50_fp32_graph.pb` (from step 3), - `--docker-image=intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl` and `-- calibration_only=True`). - - > Note: - > - `-- calibration_only=True` is a custom argument to be added at the end of the inference command as formatted (with a white space after `--`). - > - This step works only with `--docker-image=intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`, or an image generated using [TensorFlow](https://github.com/tensorflow/tensorflow) commit `7878f58d38915ba895670d3a550571bebd8c787c` or older. - - We run inference while generating calibration data to be able to pick images that are correctly classified with high confidence for calibration. - The `optimized_resnet50_fp32_graph.pb` is used as the ResNet50 trained model at this step. - A snippet of the ResNet50 inference results while generating the calibration data: - ``` - Processed 10 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 20 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 30 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 40 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 50 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 60 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 70 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 80 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 90 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - Processed 100 images. (Top1 accuracy, Top5 accuracy) = (1.0000, 1.0000) - ``` - The calibration data `calibration-1-of-1` will be created in the current directory. - ``` - $ mkdir dataset && cp calibration-1-of-1 dataset - - ``` - - * **Generate the `resnet50_min_max_log.txt` file**, follow [instructions](/benchmarks/image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) - to run inference (using `--batch_size=10`, `--data-location=/home//dataset`, `--in-graph=/home//logged_int8_resnet50.pb`, - `--accuracy-only`, and `-- calibrate=True`), and **store the output log in `resnet50_min_max_log.txt` file**. - - >Note: - `-- calibrate=True` is a custom argument to be added at the end of the inference command as formatted (with a white space after `--`). - - * The `resnet50_min_max_log.txt` file is used in the following step. We suggest that you store the `resnet50_min_max_log.txt` in the same location specified in - the [start quantization process](https://github.com/IntelAI/tools/tree/master/tensorflow_quantization) section, - which will be mounted inside the container to `/workspace/quantization`. + ;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[0][2.67806506] + ;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[0][23.9200363] + ;v0/mpool0/MaxPool_eightbit_max_v0/conv0/Relu__print__;__max:[5.72005272];v0/mpool0/MaxPool_eightbit_min_v0/conv0/Relu__print__;__min:[-0] + ... - * Run the calibration data replace the - `RequantizationRangeOp` with constants in the original quantized graph (the output of step 1): - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/int8_dynamic_range_resnet50_graph.pb \ - --out_graph=/workspace/quantization/freezed_range_int8_resnet50.pb \ - --transforms='freeze_requantization_ranges(min_max_log_file="/workspace/quantization/resnet50_min_max_log.txt")' - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after this step. - A snippet of the inference log for accuracy: - ``` - ... - Processed 5100 images. (Top1 accuracy, Top5 accuracy) = (0.6529, 0.8647) - Processed 5200 images. (Top1 accuracy, Top5 accuracy) = (0.6540, 0.8654) - Processed 5300 images. (Top1 accuracy, Top5 accuracy) = (0.6555, 0.8664) - ... - ``` - -7. Fuse `RequantizeOp` with fused quantized convolutions, and generate the final -optimized Int8 graph - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_resnet50.pb \ - --out_graph=/workspace/quantization/final_int8_resnet50.pb \ - --outputs='predict' \ - --transforms='fuse_quantized_conv_and_requantize strip_unused_nodes' - ``` - Check the final quantized ResNet50 model `final_int8_resnet50.pb` performance in - the [Accuracy for Int8 Optimized Graph](#accuracy-for-int8-optimized-graph) section. - + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7386, 0.9168) + Iteration time: 1.3564 ms + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7387, 0.9169) + Iteration time: 1.3461 ms + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7387, 0.9169) + Ran inference with batch size 100 + Log location outside container: /home//models/benchmarks/common/tensorflow/logs/benchmark_resnet50_inference_int8_20200401_115400.log + I0401 12:05:21.515716 139714463500096 graph_converter.py:195] Converted graph file is saved to: /home//output.pb +``` -## Performance Evaluation +## ResNet50v1.5 -Validating the model performance is required after each step to verify if the output graph achieves the accuracy target. -* The model accuracy is used as a performance measure. -* The accuracy target is the optimized FP32 model accuracy values. -* The quantized `Int8` graph accuracy should not drop more than ~0.5-1%. +* Download the FP32 ResNet50v1.5 pre-trained model to a location of your choice or as suggested: +``` + $ mkdir /home//tools/api/models/resnet50v1_5 && cd /home//tools/api/models/resnet50v1_5 + $ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb +``` +* Run the automatic quantization script with the input parameters of pre-trained model, dataset path to match with your local environment. +And then, you will get the quantized ResNet50v1.5 INT8 pre-trained model saved in `/home//tools/api/models/resnet50v1_5/resnet50v1_5_int8.pb` as specified. +``` + $ cd /home//tools + $ python api/examples/quantize_model_zoo.py \ + --model resnet50v1_5 \ + --in_graph /home//tools/api/models/resnet50v1_5/resnet50_v1.pb \ + --out_graph /home//tools/api/models/resnet50v1_5/resnet50v1_5_int8.pb \ + --data_location /home//imagenet \ + --models_zoo_location /home//models +``` +* An example for the log output when the graph quantization run completes: +``` +Model Config: MODEL_NAME:resnet50v1_5 +Model Config: LAUNCH_BENCHMARK_PARAMS:{'LAUNCH_BENCHMARK_SCRIPT': 'benchmarks/launch_benchmark.py', 'LAUNCH_BENCHMARK_CMD': ['--model-name resnet50v1_5', '--framework tensorflow', '--precision int8', '--mode inference', '--batch-size 100', '--accuracy-only'], 'IN_GRAPH': '--in-graph {}', 'DATA_LOCATION': '--data-location {}'} +Model Config: QUANTIZE_GRAPH_CONVERTER_PARAMS:{'INPUT_NODE_LIST': ['input_tensor'], 'OUTPUT_NODE_LIST': ['ArgMax', 'softmax_tensor'], 'EXCLUDED_OPS_LIST': [], 'EXCLUDED_NODE_LIST': [], 'PER_CHANNEL_FLAG': True} +Model Config: Supported models - ['resnet50', 'resnet50v1_5', 'resnet101', 'mobilenet_v1', 'ssd_mobilenet', 'ssd_resnet34', 'faster_rcnn', 'rfcn', 'inceptionv3'] +Inference Calibration Command: python /home//models/benchmarks/launch_benchmark.py --model-name resnet50v1_5 --framework tensorflow --precision int8 --mode inference --batch-size 100 --accuracy-only --data-location /home//imagenet --in-graph {} +... + +;resnet_model/conv2d_5/Conv2D_eightbit_requant_range__print__;__requant_min_max:[0][16.3215694] +;resnet_model/conv2d_6/Conv2D_eightbit_requant_range__print__;__requant_min_max:[0][13.4745159] +;resnet_model/conv2d_7/Conv2D_eightbit_requant_range__print__;__requant_min_max:[0][14.5196199] +... + +Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7628, 0.9299) +Iteration time: 1.8439 ms +Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7627, 0.9298) +Iteration time: 1.8366 ms +Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7628, 0.9298) +Ran inference with batch size 100 +Log location outside container: /home//models/benchmarks/common/tensorflow/logs/benchmark_resnet50v1_5_inference_int8_20200402_125005.log +I0402 13:07:13.125293 140357697517376 graph_converter.py:195] Converted graph file is saved to: api/models/resnet50v1_5/resnet50v1_5_int8.pb +``` -This section explains how to run ResNet50 inference and calculate the model accuracy using the [Intel Model Zoo](https://github.com/IntelAI/models). +## Performance Evaluation -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository, -and follow the [documented steps](/benchmarks/image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) -to run `ResNet50` inference performance for both FP32 and Int8 cases. +Verify the quantized model performance: -**Note that the script should be run outside of the quantization docker container -and that some inputs to the script are slightly different for `FP32` and `Int8` models (i.e. `--precision` and `--docker-image`).** +* Run inference using the final quantized graph and calculate the accuracy. +* Typically, the accuracy target is the optimized FP32 model accuracy values. +* The quantized INT8 graph accuracy should not drop more than ~0.5-1%. +### ResNet50 Accuracy Evaluation: +Check [IntelAI/models](https://github.com/IntelAI/models) repository and [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) +for TensorFlow models inference benchmarks with different precisions. -### Accuracy for FP32 Optimized Graph -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository and follow the steps to run the FP32 -script to calculate `accuracy` and use the optimized FP32 graph in `--in-graph`. +#### FP32 +Follow the steps in [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) to run the FP32 +script to calculate `accuracy` and use the FP32 graph in `--in-graph`. ``` - $ git clone https://github.com/IntelAI/models.git $ cd /home//models/benchmarks $ python launch_benchmark.py \ - --in-graph /home///optimized_resnet50_fp32_graph.pb \ + --in-graph /home//tools/api/models/resnet50/resnet50_fp32_pretrained_model.pb \ --model-name resnet50 \ --framework tensorflow \ --precision fp32 \ @@ -235,30 +183,32 @@ script to calculate `accuracy` and use the optimized FP32 graph in `--in-graph`. --accuracy-only \ --batch-size=100 \ --socket-id 0 \ - --data-location /home// \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --data-location /home//imagenet \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 ``` The tail of the log output when the accuracy run completes should look something like this: ``` ... - Processed 4800 images. (Top1 accuracy, Top5 accuracy) = (0.7533, 0.9225) - Processed 4900 images. (Top1 accuracy, Top5 accuracy) = (0.7531, 0.9227) - Processed 5000 images. (Top1 accuracy, Top5 accuracy) = (0.7550, 0.9230) - Processed 5100 images. (Top1 accuracy, Top5 accuracy) = (0.7545, 0.9224) - Processed 5200 images. (Top1 accuracy, Top5 accuracy) = (0.7544, 0.9215) + Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.7422, 0.9184) + Iteration time: 0.3590 ms + Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7423, 0.9184) + Iteration time: 0.3608 ms + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7424, 0.9184) + Iteration time: 0.3555 ms + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7425, 0.9185) + Iteration time: 0.3561 ms + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7425, 0.9185) ... ``` -### Accuracy for Int8 Optimized Graph +#### INT8 -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository and follow the steps to run the Int8 -script to calculate `accuracy` and use the Int8 graph in `--in-graph`. +Follow the steps in [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) +to run the INT8 script to calculate `accuracy` and use the path to the `resnet50_int8.pb` INT8 graph in `--in-graph`. ``` - $ git clone https://github.com/IntelAI/models.git $ cd /home//models/benchmarks - $ python launch_benchmark.py \ - --in-graph /home///final_resnet50_Int8_graph.pb \ + --in-graph /home//tools/api/models/resnet50/resnet50_int8.pb \ --model-name resnet50 \ --framework tensorflow \ --precision int8 \ @@ -266,22 +216,96 @@ script to calculate `accuracy` and use the Int8 graph in `--in-graph`. --accuracy-only \ --batch-size=100 \ --socket-id 0 \ - --data-location /home// \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl + --data-location /home//imagenet \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 + ``` +The tail of the log output when the accuracy run completes should look something like this: + ``` + ... + Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.7369, 0.9159) + Iteration time: 0.1961 ms + Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7370, 0.9160) + Iteration time: 0.1967 ms + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7371, 0.9159) + Iteration time: 0.1952 ms + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7371, 0.9160) + Iteration time: 0.1968 ms + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7371, 0.9160) + ... ``` + + +### ResNet50v1.5 Accuracy Evaluation: +Check [IntelAI/models](https://github.com/IntelAI/models) repository and [ResNet50v1.5 README](/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) +for TensorFlow models inference benchmarks with different precisions. + +#### FP32 +Follow the steps in [ResNet50v1.5 README](/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) to run the FP32 +script to calculate `accuracy` and use the FP32 graph in `--in-graph`. + ``` + $ cd /home//models/benchmarks + $ python launch_benchmark.py \ + --in-graph /home//tools/api/models/resnet50v1_5/resnet50_v1.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --accuracy-only \ + --batch-size=100 \ + --socket-id 0 \ + --data-location /home//imagenet \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 + ``` The tail of the log output when the accuracy run completes should look something like this: ``` ... - Processed 4500 images. (Top1 accuracy, Top5 accuracy) = (0.7384, 0.9207) - Processed 4600 images. (Top1 accuracy, Top5 accuracy) = (0.7387, 0.9209) - Processed 4700 images. (Top1 accuracy, Top5 accuracy) = (0.7383, 0.9211) - Processed 4800 images. (Top1 accuracy, Top5 accuracy) = (0.7375, 0.9208) - Processed 4900 images. (Top1 accuracy, Top5 accuracy) = (0.7382, 0.9212) - Processed 5000 images. (Top1 accuracy, Top5 accuracy) = (0.7378, 0.9210) - Processed 5100 images. (Top1 accuracy, Top5 accuracy) = (0.7380, 0.9214) - Processed 5200 images. (Top1 accuracy, Top5 accuracy) = (0.7387, 0.9219) - Processed 5300 images. (Top1 accuracy, Top5 accuracy) = (0.7387, 0.9221) - Processed 5400 images. (Top1 accuracy, Top5 accuracy) = (0.7376, 0.9213) - Processed 5500 images. (Top1 accuracy, Top5 accuracy) = (0.7373, 0.9211) + Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.7647, 0.9306) + Iteration time: 0.4688 ms + Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7647, 0.9306) + Iteration time: 0.4694 ms + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7648, 0.9307) + Iteration time: 0.4664 ms + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7647, 0.9307) + Iteration time: 0.4650 ms + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7648, 0.9308) ... - ``` \ No newline at end of file + ``` + +#### INT8 + +Follow the steps in [ResNet50v1.5 README](/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) +to run the INT8 script to calculate `accuracy` and use the path to the `resnet50v1_5_int8.pb` INT8 graph in `--in-graph`. + ``` + $ cd /home//models/benchmarks + $ python launch_benchmark.py \ + --in-graph /home//tools/api/models/resnet50v1_5/resnet50v1_5_int8.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision int8 \ + --mode inference \ + --accuracy-only \ + --batch-size=100 \ + --socket-id 0 \ + --data-location /home//imagenet \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 + ``` +The tail of the log output when the accuracy run completes should look something like this: + ``` + ... + Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.7614, 0.9298) + Iteration time: 0.2126 ms + Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7614, 0.9298) + Iteration time: 0.2125 ms + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7614, 0.9298) + Iteration time: 0.2128 ms + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7614, 0.9298) + Iteration time: 0.2122 ms + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7616, 0.9298) + ... + ``` + + +## +Check [Intel® AI Quantization Tools for TensorFlow](https://github.com/IntelAI/tools/tree/master/api#quantization-python-programming-api-quick-start) +for more details about the quantization scripts, procedures with different models. And for [Docker support](https://github.com/IntelAI/tools/tree/master/api#docker-support). + diff --git a/docs/image_recognition/quantization/resnet50_min_max_log.txt b/docs/image_recognition/quantization/resnet50_min_max_log.txt deleted file mode 100644 index 4f668a545..000000000 --- a/docs/image_recognition/quantization/resnet50_min_max_log.txt +++ /dev/null @@ -1,727 +0,0 @@ -2019-02-24 03:51:33.625180: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations: AVX512F -To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags. - -User settings: - - KMP_AFFINITY=granularity=fine,verbose,compact,1,0 - KMP_BLOCKTIME=0 - KMP_SETTINGS=1 - OMP_NUM_THREADS=28 - -Effective settings: - - KMP_ABORT_DELAY=0 - KMP_ADAPTIVE_LOCK_PROPS='1,1024' - KMP_ALIGN_ALLOC=64 - KMP_ALL_THREADPRIVATE=448 - KMP_ATOMIC_MODE=2 - KMP_BLOCKTIME=0 - KMP_CPUINFO_FILE: value is not defined - KMP_DETERMINISTIC_REDUCTION=false - KMP_DEVICE_THREAD_LIMIT=2147483647 - KMP_DISP_HAND_THREAD=false - KMP_DISP_NUM_BUFFERS=7 - KMP_DUPLICATE_LIB_OK=false - KMP_FORCE_REDUCTION: value is not defined - KMP_FOREIGN_THREADS_THREADPRIVATE=true - KMP_FORKJOIN_BARRIER='2,2' - KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper' - KMP_FORKJOIN_FRAMES=true - KMP_FORKJOIN_FRAMES_MODE=3 - KMP_GTID_MODE=3 - KMP_HANDLE_SIGNALS=false - KMP_HOT_TEAMS_MAX_LEVEL=1 - KMP_HOT_TEAMS_MODE=0 - KMP_INIT_AT_FORK=true - KMP_INIT_WAIT=2048 - KMP_ITT_PREPARE_DELAY=0 - KMP_LIBRARY=throughput - KMP_LOCK_KIND=queuing - KMP_MALLOC_POOL_INCR=1M - KMP_NEXT_WAIT=1024 - KMP_NUM_LOCKS_IN_BLOCK=1 - KMP_PLAIN_BARRIER='2,2' - KMP_PLAIN_BARRIER_PATTERN='hyper,hyper' - KMP_REDUCTION_BARRIER='1,1' - KMP_REDUCTION_BARRIER_PATTERN='hyper,hyper' - KMP_SCHEDULE='static,balanced;guided,iterative' - KMP_SETTINGS=true - KMP_SPIN_BACKOFF_PARAMS='4096,100' - KMP_STACKOFFSET=64 - KMP_STACKPAD=0 - KMP_STACKSIZE=4M - KMP_STORAGE_MAP=false - KMP_TASKING=2 - KMP_TASKLOOP_MIN_TASKS=0 - KMP_TASK_STEALING_CONSTRAINT=1 - KMP_TEAMS_THREAD_LIMIT=112 - KMP_TOPOLOGY_METHOD=all - KMP_USER_LEVEL_MWAIT=false - KMP_VERSION=false - KMP_WARNINGS=true - OMP_AFFINITY_FORMAT='OMP: pid %P tid %T thread %n bound to OS proc set {%a}' - OMP_ALLOCATOR=omp_default_mem_alloc - OMP_CANCELLATION=false - OMP_DEFAULT_DEVICE=0 - OMP_DISPLAY_AFFINITY=false - OMP_DISPLAY_ENV=false - OMP_DYNAMIC=false - OMP_MAX_ACTIVE_LEVELS=2147483647 - OMP_MAX_TASK_PRIORITY=0 - OMP_NESTED=false - OMP_NUM_THREADS='28' - OMP_PLACES: value is not defined - OMP_PROC_BIND='intel' - OMP_SCHEDULE='static' - OMP_STACKSIZE=4M - OMP_TARGET_OFFLOAD=DEFAULT - OMP_THREAD_LIMIT=2147483647 - OMP_TOOL=enabled - OMP_TOOL_LIBRARIES: value is not defined - OMP_WAIT_POLICY=PASSIVE - KMP_AFFINITY='verbose,warnings,respect,granularity=fine,compact,1,0' - -2019-02-24 03:51:33.698482: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2500000000 Hz -2019-02-24 03:51:33.703226: I tensorflow/compiler/xla/service/service.cc:162] XLA service 0x46e8cb0 executing computations on platform Host. Devices: -2019-02-24 03:51:33.703289: I tensorflow/compiler/xla/service/service.cc:169] StreamExecutor device (0): , -OMP: Info #212: KMP_AFFINITY: decoding x2APIC ids. -OMP: Info #210: KMP_AFFINITY: Affinity capable, using global cpuid leaf 11 info -OMP: Info #154: KMP_AFFINITY: Initial OS proc set respected: 0-27,56-83 -OMP: Info #156: KMP_AFFINITY: 56 available OS procs -OMP: Info #157: KMP_AFFINITY: Uniform topology -OMP: Info #179: KMP_AFFINITY: 1 packages x 28 cores/pkg x 2 threads/core (28 total cores) -OMP: Info #214: KMP_AFFINITY: OS proc to physical thread map: -OMP: Info #171: KMP_AFFINITY: OS proc 0 maps to package 0 core 0 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 56 maps to package 0 core 0 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 1 maps to package 0 core 1 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 57 maps to package 0 core 1 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 2 maps to package 0 core 2 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 58 maps to package 0 core 2 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 3 maps to package 0 core 3 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 59 maps to package 0 core 3 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 4 maps to package 0 core 4 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 60 maps to package 0 core 4 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 5 maps to package 0 core 5 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 61 maps to package 0 core 5 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 6 maps to package 0 core 6 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 62 maps to package 0 core 6 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 7 maps to package 0 core 8 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 63 maps to package 0 core 8 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 8 maps to package 0 core 9 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 64 maps to package 0 core 9 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 9 maps to package 0 core 10 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 65 maps to package 0 core 10 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 10 maps to package 0 core 11 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 66 maps to package 0 core 11 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 11 maps to package 0 core 12 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 67 maps to package 0 core 12 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 12 maps to package 0 core 13 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 68 maps to package 0 core 13 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 13 maps to package 0 core 14 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 69 maps to package 0 core 14 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 14 maps to package 0 core 16 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 70 maps to package 0 core 16 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 15 maps to package 0 core 17 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 71 maps to package 0 core 17 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 16 maps to package 0 core 18 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 72 maps to package 0 core 18 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 17 maps to package 0 core 19 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 73 maps to package 0 core 19 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 18 maps to package 0 core 20 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 74 maps to package 0 core 20 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 19 maps to package 0 core 21 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 75 maps to package 0 core 21 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 20 maps to package 0 core 22 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 76 maps to package 0 core 22 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 21 maps to package 0 core 24 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 77 maps to package 0 core 24 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 22 maps to package 0 core 25 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 78 maps to package 0 core 25 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 23 maps to package 0 core 26 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 79 maps to package 0 core 26 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 24 maps to package 0 core 27 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 80 maps to package 0 core 27 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 25 maps to package 0 core 28 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 81 maps to package 0 core 28 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 26 maps to package 0 core 29 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 82 maps to package 0 core 29 thread 1 -OMP: Info #171: KMP_AFFINITY: OS proc 27 maps to package 0 core 30 thread 0 -OMP: Info #171: KMP_AFFINITY: OS proc 83 maps to package 0 core 30 thread 1 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 856867 thread 0 bound to OS proc set 0 -2019-02-24 03:51:33.708913: I tensorflow/core/common_runtime/process_util.cc:92] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance. -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857055 thread 1 bound to OS proc set 1 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857054 thread 2 bound to OS proc set 2 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857130 thread 3 bound to OS proc set 3 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857131 thread 4 bound to OS proc set 4 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857132 thread 5 bound to OS proc set 5 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857133 thread 6 bound to OS proc set 6 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857134 thread 7 bound to OS proc set 7 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857136 thread 9 bound to OS proc set 9 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857135 thread 8 bound to OS proc set 8 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857137 thread 10 bound to OS proc set 10 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857138 thread 11 bound to OS proc set 11 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857139 thread 12 bound to OS proc set 12 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857140 thread 13 bound to OS proc set 13 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857141 thread 14 bound to OS proc set 14 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857142 thread 15 bound to OS proc set 15 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857144 thread 17 bound to OS proc set 17 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857143 thread 16 bound to OS proc set 16 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857145 thread 18 bound to OS proc set 18 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857146 thread 19 bound to OS proc set 19 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857147 thread 20 bound to OS proc set 20 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857148 thread 21 bound to OS proc set 21 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857149 thread 22 bound to OS proc set 22 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857150 thread 23 bound to OS proc set 23 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857151 thread 24 bound to OS proc set 24 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857152 thread 25 bound to OS proc set 25 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857153 thread 26 bound to OS proc set 26 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857154 thread 27 bound to OS proc set 27 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857155 thread 28 bound to OS proc set 56 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857156 thread 29 bound to OS proc set 57 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857157 thread 30 bound to OS proc set 58 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857158 thread 31 bound to OS proc set 59 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857159 thread 32 bound to OS proc set 60 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857160 thread 33 bound to OS proc set 61 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857161 thread 34 bound to OS proc set 62 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857162 thread 35 bound to OS proc set 63 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857163 thread 36 bound to OS proc set 64 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857164 thread 37 bound to OS proc set 65 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857165 thread 38 bound to OS proc set 66 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857166 thread 39 bound to OS proc set 67 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857167 thread 40 bound to OS proc set 68 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857168 thread 41 bound to OS proc set 69 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857170 thread 43 bound to OS proc set 71 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857169 thread 42 bound to OS proc set 70 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857171 thread 44 bound to OS proc set 72 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857172 thread 45 bound to OS proc set 73 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857173 thread 46 bound to OS proc set 74 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857174 thread 47 bound to OS proc set 75 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857176 thread 49 bound to OS proc set 77 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857175 thread 48 bound to OS proc set 76 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857178 thread 51 bound to OS proc set 79 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857177 thread 50 bound to OS proc set 78 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857180 thread 53 bound to OS proc set 81 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857179 thread 52 bound to OS proc set 80 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857182 thread 55 bound to OS proc set 83 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857181 thread 54 bound to OS proc set 82 -OMP: Info #250: KMP_AFFINITY: pid 856867 tid 857183 thread 56 bound to OS proc set 0 -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.75943518][3.43590856] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.63552189][5.20797968] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.44367445][1.50843954] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.09641588][0.620731175] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.58065629][1.9690367] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10732758][1.07088137] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.720322669][0.584654391] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.82983959][1.59795547] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.07721686][2.7219286] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.824896395][1.17806804] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.68710172][2.05018425] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.08479333][2.34671474] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.74582064][1.47217679] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.767614484][0.768975317] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.38195038][1.04463243] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.22890556][1.38218594] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.755090952][0.555853] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.44007051][1.30686033] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.76842725][1.49112535] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.685404897][0.691328943] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.46722758][1.60906434] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.91237974][1.34849739] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.91459769][0.710091949] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.00188637][1.0281744] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40868723][1.92530346] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.26170683][1.07963037] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.563055336][0.386614472] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.931854308][1.63107157] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.05295455][0.906698883] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.486810505][0.379112273] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12151337][1.10355508] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.884071469][0.802521229] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.369527668][0.456924617] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.951665819][1.00867164] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.37289476][1.27979386] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.47585988][0.539690733] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.925006092][1.01132202] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.43408775][1.61128783] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.420987248][0.446346313] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.870877624][1.22235119] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.75652683][1.3355633] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.638834357][0.69960618] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.50880206][1.30513251] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.863726854][1.03802192] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04755533][1.08165133] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.519336462][0.521550357] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.938252091][0.961293697] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.35280454][1.66555119] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.601236701][0.479966611] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10111856][1.53641939] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.73484385][1.27774036] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.346945226][1.05037677] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.58576441][3.77917576] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.03489828][5.53160858] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.12922645][1.61958063] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.39951527][0.666388] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.97404814][1.70148897] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.5419085][0.995642364] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.642435133][0.490712851] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.81167531][1.83374763] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.6374867][2.12278914] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.771982789][1.0978564] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.59927905][1.97339714] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.97971308][2.23172021] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.68665683][1.60510576] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.77672565][0.91534096] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.37646627][1.16208231] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.27117527][1.28882182] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.722269714][0.608139575] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33475959][1.37341464] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.92478073][1.5073787] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.654854][1.12937808] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40110707][1.84397352] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.35186696][2.30587268] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.31773663][1.0363518] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.01125431][1.04744375] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.49349618][1.78900957] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.32378852][1.34338892] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.575021803][0.390675694] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.886818886][1.808025] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.20522571][1.17789519] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.634451568][0.473963261] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.14735711][1.10365224] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.0946238][0.882442653] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.418271244][0.419754267] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.09480166][1.16525459] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40698469][1.22870779] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.499442399][0.715690732] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12793684][1.02507889] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.979544044][1.38436377] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.42539981][0.501355529] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04130507][1.72741926] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.93083799][1.35715735] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.802216351][0.668622494] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12197351][1.28832662] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04652143][1.11900795] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.24288583][1.27491951] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.576737046][0.490032971] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.08394134][0.971928] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.23888671][1.57955968] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.706261039][0.519110858] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.45806885][1.42268085] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.47865725][1.45635724] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.34142977][0.834839702] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.44281054][2.50282359] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.94945526][4.56847954] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.18642533][1.10852134] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.93572408][0.571474433] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.37062109][1.41979241] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.866130292][1.23526812] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.8143332][0.74882412] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.58574653][1.27776086] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.52691936][1.51249671] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.664629281][0.902260542] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.50967228][1.55636787] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.66126466][1.75100172] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.51077735][1.28412569] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.7247262][0.641063273] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.21975708][1.00493956] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.983035684][1.33646619] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.22569096][1.11639142] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.26527238][1.18347418] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.68618488][1.52540779] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.725297928][1.18673348] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.27979636][1.90331686] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.29123545][2.43409657] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.36530149][1.1908114] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.981600404][1.3581053] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.72068107][1.63887501] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04709578][1.11724222] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.462388873][0.494641274] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.848142147][1.57988036] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33802378][0.974486411] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.470014185][0.412943095] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.00776887][1.16103256] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.51883101][1.24759769] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.515327][0.44173035] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.846155643][1.0066117] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.2057718][1.0990268] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.485455602][0.529824674] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.907775819][1.10239029] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.18720603][1.75495398] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.485440284][0.509888887] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.929531217][1.96240103] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.80158246][1.40802681] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.848680556][0.855534613] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.16548431][1.24748588] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.892552435][1.02439654] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.38158929][1.2278198] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.427276015][0.443516642] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.841789424][1.05298924] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.41122878][1.442662] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.489688694][0.519702196] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.34052479][1.40631223] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.4758395][1.44152474] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.304487377][1.30448401] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.11060238][2.87369084] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.77514219][5.30395937] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33106434][1.196293] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.935548902][0.602392614] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.56946409][1.75627887] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.981468558][0.841647923] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.506155074][0.460454524] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.75538158][1.70256782] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.55484796][1.6938436] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.736209273][0.940517] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.42796779][1.78947353] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.6399796][1.79320168] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40930331][1.3039366] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.651814][0.67419225] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29022264][1.00639808] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.20030749][1.25252628] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.587718][0.626583576] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.16951191][1.26530957] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.6214][1.45352149] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.715503633][0.939441204] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.24784][1.87065661] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.62459707][2.91558313] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.48560166][0.896476805] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.00902295][1.57955325] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.52751434][1.99196827] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29865563][1.19726241] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.54102844][0.420932889] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.882282495][1.61646938] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.25666988][1.24988043] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.679191291][0.334688604] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.09293962][1.06818759] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.957719207][0.685736477] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.372783929][0.393118203] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.8560763][1.04347813] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.51699603][1.46748197] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.488790929][0.529162884] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.975488544][1.20069242] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.1219362][1.1878525] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.378106654][0.651139081] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.06345284][1.39578056] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.36450183][1.21595871] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.720568597][0.681373358] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.03546441][1.62791181] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.963418424][1.20513487] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.16171658][1.22034895] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.471699][0.520357192] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.02577865][0.807502389] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.17442703][1.49876273] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.49850288][0.400045037] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.23175037][1.28971672] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.20616078][1.0266583] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.289144844][0.756919324] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.88568449][3.22792768] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.59095097][5.72576809] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.62914944][1.11597562] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.14344835][0.596615672] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.54603457][2.27800369] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.09427476][1.25711322] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.664334536][0.516881943] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.82790422][1.51486099] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.6113627][1.9622463] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.774277031][1.07640123] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.57606637][1.97948325] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.95328641][2.52416539] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40229452][1.41875923] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.742325783][0.805856645] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.26482785][1.09492409] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.11287153][1.19268537] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.59677732][0.719728887] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.24360132][1.08732188] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.58129394][1.62534511] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.66589427][0.758983314] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.3936758][2.01517344] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.69839215][1.7762785] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.823517382][0.723294318] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.9658764][1.06713283] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30694115][2.09633899] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.35698402][1.33498824] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.520135403][0.440737754] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.885253][2.05588365] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.70881641][1.14186382] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.710008323][0.420312673] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.23497605][1.24185574] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30509567][0.909225106] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.476253152][0.424572229] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.874041796][0.954642355] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.48129535][0.97538203] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.447237849][0.57939589] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.893094659][1.02162695] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.14698362][1.16969097] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.402298927][0.459685713] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04608262][1.56823051] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.70552957][1.48831975] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.699458778][0.661363959] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.13192344][1.16559303] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.931751311][1.25302839] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.17141557][1.20505726] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.456391305][0.443486601] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.828044891][1.22565579] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.11665368][1.46994638] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.497601181][0.522098184] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30524313][1.91761816] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.88635671][1.92693794] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.356743425][1.05536437] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.61416][3.11280107] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.18323421][5.95803833] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.66973853][1.34627759] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.16336179][0.666640639] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.20986414][1.65369797] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.998955905][1.2186538] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.778871715][0.679018378] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.74122608][1.44462812] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.44279385][1.89457905] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.965224147][1.02991188] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.52419889][1.92583573] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.00354695][2.4411087] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.4945358][1.52526283] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.882729709][0.839008093] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40108418][1.03312254] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.13628161][3.31805038] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.26526785][3.93192625] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.85986233][1.9111433] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.9572562][7.1141448] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.60210943][5.29091692] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.33857393][4.43425417] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.02181888][7.49538612] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-6.97133684][5.78117466] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.2639432][5.3414464] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.95319223][2.29997] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.00888634][5.20611858] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.69472456][2.2488997] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.55093372][5.18658257] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29742503][3.41687059] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.82021403][1.90366125] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.62868261][4.47339869] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.32918191][4.84198332] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.35049391][2.75453949] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.74342][5.60196781] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.38739312][5.82117224] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.47332168][1.83790684] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.98121464][6.33186579] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.39565516][4.16202068] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.99362326][1.94654655] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.80085516][5.00398254] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.81045282][5.44377804] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.52229047][2.14948368] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.17810464][1.10764074] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.933679283][1.26655352] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.28328717][1.48404419] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.450555533][0.452631205] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10988784][1.07834661] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.4684329][1.50729132] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.501977503][0.408521205] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.934378445][1.37620819] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30085874][1.26284444] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.398190498][0.893309116] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.90090656][3.66339827] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.90466189][5.18431473] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.68160319][1.17002857] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.106148][0.620109379] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.74902368][1.57100797] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.803579092][0.722664237] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.574244082][0.486224622] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.82494473][1.54078221] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40112126][1.94129252] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.677609086][0.902351797] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.52799273][2.02943611] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.82513714][1.82887888] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.05910158][1.3719157] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.624048114][0.715833127] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.21174705][1.12130916] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.11129761][1.2323221] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.57105875][1.43649042] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29588354][1.30428636] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.89407218][1.5131489] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10530233][1.05870843] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30870032][1.93537426] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.38844347][2.00554633] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.02810574][0.791944] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.993314624][1.07680011] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33883202][1.82820857] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10397863][1.35471499] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.603086591][0.511982262] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.800626934][1.68927562] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.27554882][1.08101308] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.668885052][0.453903407] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33282125][1.39549875] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12386763][0.649615526] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.425400287][0.321278334] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.927556634][1.11679173] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29108381][1.03515482] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.480029732][0.542360902] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.954746306][0.984010041] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.29352224][1.31652212] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.462044179][0.387033463] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.853172839][1.61093104] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.8041122][1.20438528] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.716440916][0.756617546] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.925331056][1.13324261] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.906496525][1.44400156] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.22445571][1.23552155] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.425581753][0.412563354] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.975463629][1.00844491] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.26098871][1.60355067] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.58918345][0.4894678] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.06941962][1.37577426] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.3333025][1.49486125] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.305293679][0.769064903] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.75725317][3.01207042] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.21370029][5.17486668] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.94377756][1.71933043] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.03485513][0.654445052] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.22178507][1.86535048] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.63824773][1.31444776] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.666594744][0.585671246] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.8172003][1.55111682] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.71566653][1.95697534] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.721360385][1.13738668] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.63624716][1.90127349] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.98574793][2.18644071] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.51347816][1.5947597] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.657982][0.750054419] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.27283764][1.0472188] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12362254][1.67250967] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.15209436][1.9745084] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33703196][1.3116194] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.76092041][2.36821651] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.85271668][1.7615068] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.37395227][2.32514763] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.93566513][2.6953373] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.69007027][1.39085388] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.00678873][1.13436401] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.38781929][1.90962505] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.2412442][1.22290671] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.663186729][0.433356702] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.95832026][2.15463829] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.58990884][1.33241582] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.594862223][0.5173195] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.17232859][1.24870813] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.30059433][0.852134883] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.560698509][0.358376324] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.949972451][1.09847796] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.2506305][1.19866836] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.48800683][0.578629911] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.879816][1.22891736] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04758978][1.25133026] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.421158344][0.682906389] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04316366][2.19171953] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.03447962][1.50098] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.645033419][0.780501902] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.13911474][1.17989135] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.04487693][1.16453469] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.43325555][1.4169997] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.496590227][0.436709017] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.892589331][1.08696377] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40052092][1.42885625] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.642720699][0.468676984] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.35432708][1.37172294] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.33128023][1.16624808] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.325628638][1.03345191] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.79372025][3.2666955] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.07322264][4.93434048] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.64135015][1.42532492] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.13530314][0.60038358] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.48166907][1.80129182] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.24496746][0.884974122] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.648400366][0.53816247] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.81274545][1.39919615] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.67847419][1.65659547] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.708594501][1.04415751] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.64675367][2.0561235] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.91494429][1.92832673] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.98749602][1.40850019] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.653677762][0.808636487] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.55690169][0.984876454] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.07066429][1.22749114] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.607724965][0.612767339] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.3138684][1.26634896] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.73019886][1.41890061] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.657149255][0.757130742] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.42808521][2.05491757] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.82730973][1.57526231] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.911267221][0.769645] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.00837851][1.18119514] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.31879735][1.65257013] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.08541226][1.111305] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.533977747][0.492067635] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.813677311][1.52405667] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.19907117][1.05136299] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.536006689][0.416334569] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.27603114][1.42808652] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.08407402][0.963464141] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.40220508][0.638989389] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.845989823][1.14943433] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.37689281][1.13927412] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.475551605][0.519900084] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10892475][0.974682] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.41514647][1.43834281] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.398008972][0.590463638] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.873371542][1.32158434] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.44471264][1.14847386] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.739494443][0.624114394] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.15658557][1.25570917] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.888308883][1.03205156] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12025487][1.2330718] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.448830247][0.503454506] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.08839881][0.987764776] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.12537217][1.35726976] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.521582305][0.509963751] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.2228663][1.34219229] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.56795228][2.64074159] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.336564869][1.54051161] -;v0/resnet_v10/conv2/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.96055079][4.22171402] -;v0/resnet_v10/conv1/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.76893067][5.81977367] -;v0/resnet_v10/conv3/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.343189][2.05322623] -;v0/resnet_v10/conv4/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.43724322][0.70678246] -;v0/resnet_v11/conv5/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.20922565][2.65648413] -;v0/resnet_v11/conv6/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.975474119][1.2302084] -;v0/resnet_v11/conv7/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.7435413][0.569358587] -;v0/resnet_v12/conv8/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.09674501][1.88101411] -;v0/resnet_v12/conv9/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.88447452][2.03233242] -;v0/resnet_v12/conv10/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.2320497][1.06085789] -;v0/resnet_v13/conv12/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.75268817][2.2860086] -;v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.11649656][2.6228497] -;v0/resnet_v13/conv13/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.68657625][1.42877555] -;v0/resnet_v13/conv14/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.920021236][0.907022953] -;v0/resnet_v14/conv15/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.40140581][1.03680682] -;v0/resnet_v14/conv16/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.07193506][3.73468208] -;v0/resnet_v14/conv17/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.80103254][4.43050241] -;v0/resnet_v15/conv18/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.07251525][1.98044181] -;v0/resnet_v15/conv19/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.79900193][8.57474422] -;v0/resnet_v15/conv20/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-6.75664377][6.38790607] -;v0/resnet_v16/conv21/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.78325057][4.79216] -;v0/resnet_v16/conv22/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.92663217][10.3216963] -;v0/resnet_v16/conv23/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-9.56002522][7.92618179] -;v0/resnet_v17/conv25/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.23777366][7.37546301] -;v0/resnet_v17/conv24/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.77396703][3.04502892] -;v0/resnet_v17/conv26/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.19724178][9.33259583] -;v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-6.44685364][4.02765942] -;v0/resnet_v18/conv28/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.72022939][8.90416241] -;v0/resnet_v18/conv29/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.58583724][6.84438562] -;v0/resnet_v18/conv30/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.68286705][3.89168477] -;v0/resnet_v19/conv31/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-3.01607227][8.166008] -;v0/resnet_v19/conv32/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.5950278][10.0043745] -;v0/resnet_v19/conv33/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.95501041][5.68928432] -;v0/resnet_v110/conv34/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.96661019][10.209343] -;v0/resnet_v110/conv35/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.93904316][12.093955] -;v0/resnet_v110/conv36/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.16923][3.8723011] -;v0/resnet_v111/conv37/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.05513048][11.8921309] -;v0/resnet_v111/conv38/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.58725524][8.86899948] -;v0/resnet_v111/conv39/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-4.22780132][4.16967058] -;v0/resnet_v112/conv40/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-7.46910858][9.56942749] -;v0/resnet_v112/conv41/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.24646974][11.5950613] -;v0/resnet_v112/conv42/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-5.39486551][4.64164352] -;v0/resnet_v113/conv44/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.41026187][1.28409362] -;v0/resnet_v113/conv43/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.01562774][1.24366593] -;v0/resnet_v113/conv45/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.19231975][1.3584007] -;v0/resnet_v113/conv46/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.591259658][0.517282248] -;v0/resnet_v114/conv47/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.10568106][0.967194498] -;v0/resnet_v114/conv48/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.21551704][1.37404895] -;v0/resnet_v114/conv49/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.521544099][0.441248208] -;v0/resnet_v115/conv50/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.24544978][1.42927349] -;v0/resnet_v115/conv51/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.45858741][1.28942311] -;v0/resnet_v115/conv52/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-0.427192688][1.1711427] diff --git a/docs/image_recognition/tensorflow/Tutorial.md b/docs/image_recognition/tensorflow/Tutorial.md index dc2b3a511..7bbbabf59 100644 --- a/docs/image_recognition/tensorflow/Tutorial.md +++ b/docs/image_recognition/tensorflow/Tutorial.md @@ -13,7 +13,7 @@ There are 2 main setbacks for CNNs for performance: 1. Deeply layering convolutions causes the number of training parameters to increase drastically. 2. Linear convolution filters cannot learn size-invariant features without using separate filter for each size regime. -ResNet models use gate and skip logic to address issue #1 and lower the number of parameters, similar to a recurrent neural network (RNN). The InceptionV3 model utilizes “network in network” mini perceptrons to convert linear convolutions into non-linear convolutions in a compact step, addressing issue #2. InceptionV3 also includes optimization that factor and vectorize the convolutions, further increasing the speed of the network. +ResNet models use gate and skip logic to address issue 1 and lower the number of parameters, similar to a recurrent neural network (RNN). The InceptionV3 model utilizes “network in network” mini perceptrons to convert linear convolutions into non-linear convolutions in a compact step, addressing issue 2. InceptionV3 also includes optimization that factor and vectorize the convolutions, further increasing the speed of the network. ## Recommended Settings @@ -127,11 +127,11 @@ git clone https://github.com/IntelAI/models.git any other directory of your choice. ``` -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet50_fp32_pretrained_model.pb +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/resnet101_fp32_pretrained_model.pb +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_fp32_pretrained_model.pb +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb ``` Refer to following Readme files to get the latest locations of pretrained models
a. [ResNet50](/benchmarks/image_recognition/tensorflow/resnet50)
@@ -141,7 +141,7 @@ c. [InceptionV3](/benchmarks/image_recognition/tensorflow/inceptionv3)
3. (optional) Download and setup a data directory that has image files in TFRecord format if you are inferring on a real dataset. You can refer to [ImageNet](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) or [Coco Dataset](http://cocodataset.org/#home) which have images converted to TFRecords, or you can run the [build_image_data.py](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/inception/inception/data/build_image_data.py) script to convert raw images into TFRecords. -4. Install [Docker](https://docs.docker.com/v17.09/engine/installation/) since the tutorial runs on a Docker container. +4. Install [Docker](https://docs.docker.com/install/) since the tutorial runs on a Docker container. ### Run inference @@ -180,7 +180,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --batch-size 1 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -194,7 +194,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --batch-size 1 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 3.1.2 ResNet101 @@ -209,7 +209,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --mode inference \ --batch-size 1 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -222,7 +222,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --mode inference \ --batch-size 1 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 3.1.3 InceptionV3 @@ -239,7 +239,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T --batch-size 1 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -253,7 +253,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T --batch-size 1 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 3.2. *Best Batch inference*(batch_size=128) @@ -273,7 +273,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --batch-size 128 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -287,7 +287,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --batch-size 128 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 3.2.2 ResNet101 @@ -302,7 +302,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --mode inference \ --batch-size 128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -315,7 +315,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for ResNet50. The --mode inference \ --batch-size 128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 3.2.3 InceptionV3 @@ -329,10 +329,10 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T --framework tensorflow \ --precision fp32 \ --mode inference \ - --batch-size 1 \ + --batch-size 128 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 *Real data* @@ -343,10 +343,10 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T --framework tensorflow \ --precision fp32 \ --mode inference \ - --batch-size 1 \ + --batch-size 128 \ --benchmark-only \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest + --docker-image intel/intel-optimized-tensorflow:2.1.0 Example Output @@ -366,7 +366,7 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T The logs are captured in a directory outside of the container.
-4. If you want to run the model script interactively within the docker container, run ```launch_benchmark.py``` with ```--debug``` flag. This will launch a docker container based on the ```--docker_image```, +4. If you want to run the model script interactively within the docker container, run ```launch_benchmark.py``` with ```--debug``` flag. This will launch a docker container based on the ```--docker-image```, performs necessary installs, runs the ```launch_benchmark.py``` script and does not terminate the container process. As an example, this step will demonstrate ResNet50 Real Time inference on Synthetic Data use case, you can implement the same strategy on different use cases demoed in Step 3. @@ -378,7 +378,7 @@ you can implement the same strategy on different use cases demoed in Step 3. --mode inference \ --batch-size 1 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --debug Example Output diff --git a/docs/language_translation/tensorflow/Tutorial.md b/docs/language_translation/tensorflow/Tutorial.md new file mode 100644 index 000000000..1bd7e0469 --- /dev/null +++ b/docs/language_translation/tensorflow/Tutorial.md @@ -0,0 +1,246 @@ +# Language Translation with Transformer-LT + + +## Goal +This tutorial will introduce CPU performance considerations of the deep learning Transformer-LT model for language translation and how to use Intel® Optimizations for TensorFlow to improve inference time on CPUs. +This tutorial will also provide code examples to use Intel Model Zoo's pretrained English to German model that can be copy/pasted for quick off-the-ground implementation on real data. + +## Background +Language Translation with deep learning is a computationally expensive endeavor. This tutorial will show you how to reduce the inference runtime of your Transformer-LT network, a popular topology solution to translation. +It is based on an encoder-decoder architecture with an added attention mechanism. The encoder is used to encode the original sentence to a meaningful fixed-length vector, and the decoder is responsible for extracting the context data from the vector. +The encoder and decoder process the inputs and outputs, which are in the form of a time sequence. + +In a traditional encoder/decoder model, each element in the context vector is treated equally. This is typically not the ideal solution. +For instance, when you translate the phrase “I travel by train” from English into Chinese, the word “I” has a greater influence than other words when producing its counterpart in Chinese. +Thus, the attention mechanism was introduced to differentiate contributions of each element in the source sequence to their counterpart in the destination sequence, through the use of a hidden matrix. +This matrix contains weights of each element in the source sequence when producing elements in the destination sequence. + + +## Recommended Settings +In addition to TensorFlow optimizations that use the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) to utilize instruction sets appropriately, the runtime settings also significantly contribute to improved performance. +Tuning these options to optimize CPU workloads is vital to optimize performance of TensorFlow on Intel® processors. +Below are the set of run-time options tested empirically on Transformer-LT and recommended by Intel: + + +| Run-time options | Recommendations | +| ------------- | ------------- | +| Batch Size | 64. Regardless of the hardware | +| Hyperthreading | Enabled. Turn on in BIOS. Requires a restart. | +|intra_op_parallelism_threads |# physical cores | +|inter_op_parallelism_threads | 1 | +|NUMA Controls| --cpunodebind=0 --membind=0 | +|KMP_AFFINITY| KMP_AFFINITY=granularity=fine,verbose,compact,1,0| +|KMP_BLOCKTIME| 1 | +|OMP_NUM_THREADS |physical cores| + +Note 1: Refer to this [link](https://software.intel.com/en-us/articles/maximize-tensorflow-performance-on-cpu-considerations-and-recommendations-for-inference) to learn more about the run time options. + +Note 2: You can remove `verbose` from `KMP_AFFINITY` setting to avoid verbose output at runtime. + +Run the following commands to get your processor information: + +a. #physical cores per socket : `lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` + +b. #all physical cores: `lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l` + +Below is a code snippet you can incorporate into your existing TensorFlow application to set the best settings. +You can either set them in the CLI or in the Python script. Note that inter and intra_op_parallelism_threads settings can only be set +in the Python script. + +```bash +export OMP_NUM_THREADS=physical cores +export KMP_AFFINITY="granularity=fine,verbose,compact,1,0" +export KMP_BLOCKTIME=1 +export KMP_SETTINGS=1 +``` +(or) +``` +import os +os.environ["KMP_BLOCKTIME"] = "1" +os.environ["KMP_SETTINGS"] = "1" +os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0" +os.environ["OMP_NUM_THREADS"]= <# physical cores> +config = tf.ConfigProto() +config.intra_op_parallelism_threads = <# physical cores> +config.inter_op_parallelism_threads = 1 +tf.Session(config=config) +``` + +## Hands-on Tutorial +This section shows how to measure inference performance on Intel's Model Zoo pretrained model (or your pretrained model) by setting the above-discussed run time flags. +### FP32 inference + +### Initial Setup + +1. Clone IntelAI models and download into your home directory, skip this step if you already have Intel AI models installed. + +```bash +cd ~ +git clone https://github.com/IntelAI/models.git +``` + +2. Skip to step 3 if you already have a pretrained model or download the file `transformer_lt_official_fp32_pretrained_model.tar.gz` into your ~/transformer_LT_german location. +``` +mkdir ~/transformer_LT_german +cd ~/transformer_LT_german +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/transformer_lt_official_fp32_pretrained_model.tar.gz +tar -xzvf transformer_lt_official_fp32_pretrained_model.tar.gz +``` +Refer to the Transformer LT Official [README](/benchmarks/language_translation/tensorflow/transformer_lt_official) to get the latest location of the pretrained model. + +3. After extraction, you should see the following folders and files in the `transformer_lt_official_fp32_pretrained_model` directory: +``` +$ ls -l transformer_lt_official_fp32_pretrained_model/* + +transformer_lt_official_fp32_pretrained_model/data: +total 1064 +-rw-r--r--. 1 359898 Feb 20 16:05 newstest2014.en +-rw-r--r--. 1 399406 Feb 20 16:05 newstest2014.de +-rw-r--r--. 1 324025 Mar 15 17:31 vocab.txt + +transformer_lt_official_fp32_pretrained_model/graph: +total 241540 +-rwx------. 1 247333269 Mar 15 17:29 fp32_graphdef.pb + +``` +`newstest2014.en`: Input file with English text
+`newstest2014.de`: German translation of the input file for measuring accuracy
+`vocab.txt`: A dictionary of vocabulary
+`fp32_graphdef.pb`: Pretrained model + +Or, if you have your own model/data, ensure the folder structure following the structure depicted below to run the pretrained model in Intel Model Zoo. + +``` +├─ transformer_LT_german +│ ├── transformer_pretrained_model +│ ├── data +│ │ ├── newstest2014.en (Input file) +│ │ ├── newstest2014.de (Reference file, this is optional) +│ │ └── vocab.txt +│ └── graph +│ └── pretrained_model.pb +``` +4. Install [Docker](https://docs.docker.com/install/) since the tutorial runs in a Docker container. + +### Run inference + +1. Pull the relevant Intel-optimized TensorFlow Docker image. + [Click here](https://software.intel.com/en-us/articles/intel-optimization-for-tensorflow-installation-guide) to find all the available Docker images. +```bash +docker pull docker.io/intel/intel-optimized-tensorflow:2.1.0 +``` +2. cd to the inference script directory in local IntelAI repo +```bash +cd ~/models/benchmarks +``` +3. Run the Python script ``` launch_benchmark.py``` with the pretrained model. +```launch_benchmark.py``` script can be treated as an entry point to conveniently perform out-of-box high performance +inference on pretrained models trained of popular topologies. +The script will automatically set the recommended run-time options for supported topologies, +but if you choose to set your own options, refer to full of available flags and a detailed +explanation on ```launch_benchmarking.py``` script [here](/docs/general/tensorflow/LaunchBenchmark.md). + This step will automatically launch a new container on every run and terminate. Go to [Step 4](#step_4) to interactively run the script on the container. + +3.1. *Online inference* (using `--socket-id 0` and `--batch-size 1`) + +If you wish to calculate the [BLEU](https://en.wikipedia.org/wiki/BLEU) metric to find out the machine-translation quality, pass the file as `reference` flag. +`newstest2014.en` file must have only one sentence per line + + +console in: +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 1 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de +``` + +The translated German text will be in the file `translation.txt` located at `~/models/benchmarks/common/tensorflow/logs` + +3.2. *Batch inference* (using `--socket-id 0` and `--batch-size 64`) + +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 64 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de +``` +console out: +``` +Graph parsed in ..... s +import_graph_def took .....s +tokenizer took ..... s +Translating 3003 sentences from English to German. +Total inferencing time:.... +Throughput:.... sentences/second +Total number of sentences translated:3003 +I0419 22:50:49.856748 140013257643776 compute_bleu.py:106] Case-insensitive results: 27.510020 +I0419 22:50:51.203501 140013257643776 compute_bleu.py:110] Case-sensitive results: 26.964748 +Ran inference with batch size 64 +Log location outside container: /~/models/benchmarks/common/tensorflow/logs/benchmark_transformer_lt_official_inference_fp32_20190419_224047.log +``` + +The logs are captured in a directory outside of the container.
+ +4. If you want to run the ```launch_benchmark.py``` interactively from within the docker container, add flag ```--debug```. This will launch a docker container based on the ```--docker_image```, +performs necessary installs, runs the ```launch_benchmark.py``` script and does not terminate the container process. As an example, this step will demonstrate online inference (--batch-size 1), but you can implement the same strategy for batch inference (--batch-size 64)." + +console in: +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 64 \ + --socket-id 0 \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + --debug + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de + +``` +console out: +```bash + lscpu_path_cmd = command -v lscpu + lscpu located here: b'/usr/bin/lscpu' + root@a78677f56d69:/workspace/benchmarks/common/tensorflow# +``` + +To rerun the benchmarking script, execute the ```start.sh``` bash script from your existing directory with the available flags, which in turn will run ```launch_benchmark.py```. For e.g to rerun with the different batch size (batch size=64) settings run with ```BATCH_SIZE``` +and to skip the run from reinstalling packages pass ```True``` to ```NOINSTALL```. + +```bash + chmod +x ./start.sh +``` +```bash + NOINSTALL=True BATCH_SIZE=64 ./start.sh +``` + +All other flags will be defaulted to values passed in the first ```launch_benchmark.py``` that starts the container. [See here](/docs/general/tensorflow/LaunchBenchmark.md) to get the full list of flags. + + diff --git a/docs/object_detection/tensorflow/Tutorial.md b/docs/object_detection/tensorflow/Tutorial.md deleted file mode 100755 index 75687c3d6..000000000 --- a/docs/object_detection/tensorflow/Tutorial.md +++ /dev/null @@ -1,290 +0,0 @@ -# Object Detection with SSD-VGG16 - - -## Goal -This tutorial will introduce CPU performance considerations of the deep learning SSD-VGG16 model for object detection and how to use Intel® Optimizations for TensorFlow to improve inference time on CPUs. -This tutorial will also provide code examples to use Intel Model Zoo's pretrained SSD-VGG16 model on the COCO dataset that can be copy/pasted for a quick off-the-ground implementation on real data. - -## Background - -Object detection is breaking into a wide range of industrial applications with some of the top uses cases including computer vision, surveillance, automated vehicle system, etc. One of the widely used topologies used in this space is SSD-VGG16 for its popularity to speed-up real-time inference. Unlike Faster R-CNN, SSD-VGG16 eliminates the need of region-proposal-network to predict the boundary boxes but uses feature maps instead. The modeling process is split into 2 parts. - - 1. Feature extraction where the base network is a collection on VGG16 convolution layers and the output from this layer is fed into the detection phase.
- 2. Detection where the entire network is a sequence of CNNs progressively decreasing in size extracting features and reducing the feature maps. Each added feature layer in the CNNs produces a fixed set of detection predictions with a fixed-size collection of bounding boxes and scores for the presence of class instances in those boxes, followed by a non-maximum suppression step to produce the final detections.
- -## Recommended Settings -In addition to TensorFlow optimizations that use the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) to utilize instruction sets appropriately, the runtime settings also significantly contribute to improved performance. -Tuning these options to optimize CPU workloads is vital to optimize the performance of TensorFlow on Intel processors. -Below are the set of run-time options tested empirically and recommended by Intel for two Intel® Xeon scalable processors on an optimized SSD-VGG16 pretrained model. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Run-time optionsRecommendations for Intel® Xeon Scalable Processors
28-core 1st gen28-core 2nd gen
HyperthreadingEnabled. Turn on in BIOS. Requires a restart.
intra_op_parallelism_threads1421
inter_op_parallelism_threads211
data_num_inter_threads221
data_num_intra_threads728
NUMA Controls--cpunodebind=0 --membind=0
KMP_AFFINITYKMP_AFFINITY=granularity=fine,verbose,compact,1,0
KMP_BLOCKTIME1
OMP_NUM_THREADS28
- -Note 1: Refer to this [link](https://software.intel.com/en-us/articles/maximize-tensorflow-performance-on-cpu-considerations-and-recommendations-for-inference) to learn more about the run time options. - - -Below is a code snippet you can incorporate into your existing TensorFlow application to set the best settings. The values shown are for a 2nd gen Xeon scalable processor. -Note that these recommended settings are hardware and dataset-specific (COCO dataset). These settings are provided to give users a good starting point to tune model run-time settings and may vary based on the hardware choice. -You can either set them in the CLI or the Python script. Note that inter and intra_op_parallelism_threads settings can only be set -in the Python script. - -```bash -export OMP_NUM_THREADS=11 -export KMP_AFFINITY="granularity=fine,verbose,compact,1,0" -export KMP_BLOCKTIME=1 -export KMP_SETTINGS=1 -``` -(or) -``` -import os -os.environ["KMP_BLOCKTIME"] = "1" -os.environ["KMP_SETTINGS"] = "1" -os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0" -os.environ["OMP_NUM_THREADS"]= 11 -config = tf.ConfigProto() -config.intra_op_parallelism_threads = 21 -config.inter_op_parallelism_threads = 11 -inference_sess = tf.Session(config=config) -``` -The data config settings are used to parallelize the part of the graph concerned with image processing (the data layer). -``` -data_config = tf.ConfigProto() -data_config.inter_op_parallelism_threads = 21 -data_config.intra_op_parallelism_threads = 28 -data_sess = tf.Session(config=data_config) -``` - -## Hands-on Tutorial -This section shows how to measure inference performance on Intel's Model Zoo pretrained model (or your pretrained model) by setting the above-discussed run time flags. -The setting values shown in this tutorial are for a 2nd gen Xeon scalable processor. -### FP32 inference - -### Initial Setup - - - -1. Clone the original model repo and checkout the appropriate commit. For demonstration purpose, this entire tutorial will be implemented on the home directory, modify this location as required. - -``` -cd ~ -mkdir object_detection -cd object_detection -mkdir ssd_vgg16 -cd ssd_vgg16 -``` - -``` -git clone https://github.com/HiKapok/SSD.TensorFlow.git -cd SSD.TensorFlow -git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c -``` - -2. Clone IntelAI models and download into your home directory or pull the latest version. -```bash -cd ~ -git clone https://github.com/IntelAI/models.git -``` -(or) - -``` -cd ~/models -git pull -``` - -3. Skip to step 4 if you already have a pretrained model, or download the pretrained model from Intel Model Zoo. Find more info in the [README](/benchmarks/object_detection/tensorflow/ssd_vgg16#fp32-inference-instructions) doc. - -``` -cd ~/object_detection/ssd_vgg16 -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/ssdvgg16_fp32_pretrained_model.pb -``` -4. Skip to step 5 if you already have a dataset with annotations in TFRecords format or follow the below instructions to download and convert COCO dataset with annotations to TFRecords. -Note that to compute accuracy, the TFRecord's filename pattern must start with `"val-*"` - -Download validation dataset: - -``` -wget http://images.cocodataset.org/zips/val2017.zip -unzip val2017.zip -``` -Download annotations for COCO dataset: - -``` -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -unzip annotations_trainval2017.zip -``` -Generate TFRecords by copying the `generate_coco_records.py` script available in IntelAI `models` repo into original model repo: - -``` -cp ~/models/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py ~/object_detection/ssd_vgg16/SSD.TensorFlow/dataset -``` - -Create an output directory to save the generated TFRecords: - -``` -mkdir ~/object_detection/ssd_vgg16/data_tfrecords -``` - -Some dependencies are required to run the script such as python3, TensorFlow and tqdm. -You can use the following install commands to install the requirements: - -``` -sudo apt-get install python3-venv python3-dev -pip3 install --upgrade pip -python3 -m venv venv -source venv/bin/activate -pip3 install intel-tensorflow tqdm -``` -Run the dataset conversion script to generate TFRecords: - -``` -cd ~/object_detection/ssd_vgg16/SSD.TensorFlow/dataset -python generate_coco_records.py \ ---image_path ~/object_detection/ssd_vgg16/val2017/ \ ---annotations_file ~/object_detection/ssd_vgg16/annotations/instances_val2017.json \ ---output_prefix val \ ---output_path ~/object_detection/ssd_vgg16/data_tfrecords -``` -The generated TFrecords can be found at the `--output_path`. - -``` -$ ls ~/object_detection/ssd_vgg16/data_tfrecords - -val-00000-of-00005 val-00001-of-00005 val-00002-of-00005 val-00003-of-00005 val-00004-of-00005 - -``` - -5. Install [Docker](https://docs.docker.com/v17.09/engine/installation/) since the tutorial runs in a Docker container. - -### Run online inference - -1. Pull the relevant Intel-optimized TensorFlow Docker image. -```bash -docker pull gcr.io/deeplearning-platform-release/tf-cpu.1-15 -``` -2. cd to the inference script directory in local IntelAI models repo. -```bash -cd ~/models/benchmarks -``` -3. Run the Python script ``` launch_benchmark.py``` with the pretrained model. -The ```launch_benchmark.py``` script can be treated as an entry point to conveniently perform out-of-box high performance -inference on pretrained models for popular topologies. -The script will automatically set the recommended run-time options for supported topologies, -but if you choose to set your own options, refer to the full list of available flags and a detailed -explanation of ```launch_benchmark.py``` [here](/docs/general/tensorflow/LaunchBenchmark.md). - This step will automatically launch a new container on every run and terminate. Go to [Step 4](#step_4) to interactively run the script on the container. - - -Console in: - -```bash -python launch_benchmark.py \ - --model-name ssd_vgg16 \ - --mode inference \ - --precision fp32 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir ~/object_detection/ssd_vgg16/SSD.TensorFlow \ - --data-location ~/object_detection/ssd_vgg16/data_tfrecords \ - --in-graph ~/object_detection/ssd_vgg16/ssdvgg16_fp32_pretrained_model.pb \ - --batch-size 1 \ - --socket-id 0 \ - --num-inter-threads 11 \ - --num-intra-threads 21 \ - --data-num-inter-threads 21 \ - --data-num-intra-threads 28 \ - -- warmup-steps=100 steps=500 - -``` - -The logs are captured in a directory outside of the container. - -4. If you want to run ```launch_benchmark.py``` interactively from within the docker container, add flag ```--debug```. This launches a docker container based on the ```--docker_image```, -performs necessary installs, runs the ```launch_benchmark.py``` script, and does not terminate the container process. - -Console in: -```bash -python launch_benchmark.py \ - --model-name ssd_vgg16 \ - --mode inference \ - --precision fp32 \ - --framework tensorflow \ - --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-15 \ - --model-source-dir ~/object_detection/ssd_vgg16/SSD.TensorFlow \ - --data-location ~/object_detection/ssd_vgg16/data_tfrecords \ - --in-graph ~/object_detection/ssd_vgg16/ssdvgg16_fp32_pretrained_model.pb \ - --batch-size 1 \ - --socket-id 0 \ - --num-inter-threads 11 \ - --num-intra-threads 21 \ - --data-num-inter-threads 21 \ - --data-num-intra-threads 28 \ - --debug \ - -- warmup-steps=100 steps=500 -``` -Console out: -```bash - lscpu_path_cmd = command -v lscpu - lscpu located here: b'/usr/bin/lscpu' - root@a78677f56d69:/workspace/benchmarks/common/tensorflow# -``` - -To rerun the benchmarking script, execute the ```start.sh``` bash script from your existing directory with the available flags, which in turn will run ```launch_benchmark.py```. For example, to rerun the script and compute accuracy, run with `ACCURACY_ONLY` flag set to True. To skip the run from reinstalling packages pass ```True``` to ```NOINSTALL```. - -```bash - chmod +x ./start.sh -``` -```bash - NOINSTALL=True ACCURACY_ONLY=True ./start.sh -``` - -All other flags will default to values passed in the first ```launch_benchmark.py``` that starts the container. [See here](/docs/general/tensorflow/LaunchBenchmark.md) to get the full list of flags. - - diff --git a/docs/recommendation/quantization/Tutorial.md b/docs/recommendation/quantization/Tutorial.md deleted file mode 100644 index 46ad73252..000000000 --- a/docs/recommendation/quantization/Tutorial.md +++ /dev/null @@ -1,334 +0,0 @@ -# Wide and Deep model Optimization and Quantization - -Content: -* [Goal](#goal) -* [Prerequisites](#prerequisites) -* [Install and Build TensorFlow Tools](#install-and-build-tensorflow-tools) -* [Floating point 32-bits Model Optimization](#fp32-model-optimization) -* [Floating point 32-bits Model Quantization to 8-bits Precision](#fp32-model-quantization-to-int8-precision) -* [Performance Evaluation](#performance-evaluation) - -## Goal -Post-training model quantization and optimization objective is to: -* Reduce the model size -* Run faster online inference - -This is highly recommended in the case of mobile applications and systems of constrained memory and processing power. -Usually, there will be some loss in accuracy, but it has to be within the [acceptable range](#performance-evaluation). - -More resources: [Post-training quantization for mobile and IOT](https://www.tensorflow.org/lite/performance/post_training_quantization), and -[TensorFlow graph transform tool user guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms). - -## Prerequisites -* The wide and deep saved model graph generated during training - - -## Install and Build TensorFlow Tools - -Build an image which contains transform_graph and summarize_graph tools. The -initial build may take a long time, but subsequent builds will be quicker -since layers are cached - ``` - $ git clone https://github.com/IntelAI/tools.git - cd tools/tensorflow_quantization - - docker build \ - --build-arg HTTP_PROXY=${HTTP_PROXY} \ - --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ - --build-arg http_proxy=${http_proxy} \ - --build-arg https_proxy=${https_proxy} \ - -t quantization:latest -f Dockerfile . - ``` -Launch quantization script launch_quantization.py by providing args as below, -this will get user into container environment (/workspace/tensorflow/) with -quantization tools. - - ``` - --docker-image: Docker image tag from above step (quantization:latest) - --pre-trained-model-dir: Path to your pre-trained model directory, which will - be mounted inside container at /workspace/quantization. - - python launch_quantization.py \ - --docker-image quantization:latest \ - --pre-trained-model-dir /home// - ``` -Please provide the output graphs locations relative to /workspace/quantization, so that results are written back to -local machine - -## FP32 Model Optimization -In this section, we assume that a saved model graph generated during training is available. - * The `model graph_def` is used in `step 1` to get the possible **input and output node names** of the graph. - * The input saved model directory generated during training is used in `step 2` to get the **model frozen graph**. - * The `model frozen graph`, **optimized** (based on the graph structure and operations, etc.) in `step 3`. - -We also assume that you are in the TensorFlow root directory (`/workspace/tensorflow/` inside the docker container) to execute the following steps. - -1. Find out the possible input and output node names of the graph - From the TensorFlow/tools root directory, run: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/summarize_graph \ - --in_graph=/workspace/quantization/graph.pbtxt \ - --print_structure=false >& model_nodes.txt - ``` - - In the `model_nodes.txt` file, look for the input and output nodes names such as: - ``` - Found 1 possible inputs: (name=input, type=float(1), shape=[?,224,224,3]) - Found 1 possible outputs: (name=predict, op=Softmax) - ``` -2. Freeze the graph: - * The `--input_saved_model_dir` is the topology saved model directory generated during training - * The `--output_node_names` are obtained from step 1. - >Note: `--input_graph` can be in either binary `pb` or text `pbtxt` format - ``` - $ python tensorflow/python/tools/freeze_graph.py \ - --input_saved_model_dir=/workspace/tensorflow/model_<>/exports/<> \ - --output_graph= /workspace/quantization/wide_deep_frozen_fp32_graph.pb \ - --output_node_names=head/predictions/probabilities - ``` -3. Optimize the FP32 frozen graph to remove training and unused nodes: - * Set the `--in_graph` to the path of the model frozen graph (from step 2), - * The `--inputs` and `--outputs` are the graph input and output node names (from step 1). - * `--transforms` to be set based on the model graph structure (to remove unused nodes, combine operations, etc). - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/wide_deep_frozen_fp32_graph.pb \ - --out_graph=/workspace/quantization/wide_deep_fp32_graph.pb \ - --inputs='Placeholder,Placeholder_1,Placeholder_2,Placeholder_3,Placeholder_4, \ - Placeholder_5,Placeholder_6,Placeholder_7,Placeholder_8,Placeholder_9,Placeholder_10, \ - Placeholder_11,Placeholder_12,Placeholder_13,Placeholder_14,Placeholder_15,Placeholder_16, \ - Placeholder_17,Placeholder_18,Placeholder_19,Placeholder_20,Placeholder_21,Placeholder_22, \ - Placeholder_23,Placeholder_24,Placeholder_25,Placeholder_26,Placeholder_27,Placeholder_28, \ - Placeholder_29,Placeholder_30,Placeholder_31,Placeholder_32,Placeholder_33,Placeholder_34, \ - Placeholder_35,Placeholder_36,Placeholder_37,Placeholder_38' \ - --outputs='head/predictions/probabilities' \ - --transforms='strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) remove_attribute(attribute_name=_class)' - ``` -4. Feature Column optimization of FP32 graph: - - Clone the [IntelAI/models](https://github.com/IntelAI/models) repository and use featurecolumn_graph_optimization.py script by setting `--input-graph` to the path of Fp32 graph obtained from above step and enable flag `wide_and_deep_large_ds` to perform model specific optimizations(fusion of categorical and numeric columns to accept preprocessed and fused data). - ``` - $ git clone https://github.com/IntelAI/models.git - $ cd /home//models - $ python models/recommendation/tensorflow/wide_deep_large_ds/dataset/featurecolumn_graph_optimization.py \ - --input-graph /workspace/quantization/wide_deep_fp32_graph.pb \ - --output-graph /workspace/quantization/optimized_wide_deep_fp32_graph.pb \ - --output-nodes head/predictions/probabilities \ - --wide_and_deep_large_ds True - ``` -5. [Evaluate the model performance](#accuracy-for-fp32-optimized-graph) using -the the optimized graph `optimized_wide_deep_fp32_graph.pb` and check the model accuracy. - -## FP32 Model Quantization to Int8 Precision -In this section, our objective is to quantize the output [FP32 optimized graph](#fp32-model-optimization) of the previous section -to `Int8` precision. -In case you did not do the FP32 model optimization by yourself, please follow the [instructions](/benchmarks//recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) to download the Intel optimized -Wide and Deep pre-trained model graph. - -The following steps show how to convert the `FP32` model to `Int8` precision to reduce the model size: - -6. Convert the FP32-graph to a dynamic range Int8-graph using the output node names (from step 1) - - ``` - $ python tensorflow/tools/quantization/quantize_graph.py \ - --input=/workspace/quantization/optimized_wide_deep_fp32_graph.pb \ - --output=/workspace/quantization/int8_dynamic_range_wide_deep_graph.pb \ - --output_node_names='import/head/predictions/probabilities' \ - --mode=eightbit \ - --intel_cpu_eightbitize=True \ - --model_name=wide_deep_large_ds - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after the model quantization. - -7. Convert from dynamic to static re-quantization range. -The following steps are to freeze the re-quantization range: - - * Insert the logging op: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/int8_dynamic_range_wide_deep_graph.pb \ - --out_graph=/workspace/quantization/logged_int8_dynamic_range_wide_deep.pb \ - --transforms='insert_logging(op=RequantizationRange, show_name=true, message="__requant_min_max:")' - ``` - - * **Generate the `wide_deep_min_max_log.txt` file**, follow [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) - to run inference (using `--batch_size=1024`, - `--data-location=/home//dataset_preprocessed_train.tfrecords`refer [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#prepare-dataset) to generate training data, - `--in-graph=/home//logged_int8_dynamic_range_wide_deep.pb`, - `--accuracy-only`), and **store the output log in `wide_deep_min_max_log.txt` file**. - - * The `wide_deep_min_max_log.txt` file is used in the following step. - - * Run the log data replace the - - `RequantizationRangeOp` with constants in the original quantized graph: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/int8_dynamic_range_wide_deep_graph.pb \ - --out_graph=/workspace/quantization/freezed_range_int8_wide_deep.pb \ - --transforms='freeze_requantization_ranges(min_max_log_file="/workspace/quantization/wide_deep_min_max_log.txt")' - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after this step. - -8. Convert from dynamic to static quantization Min range.The following steps are to freeze the Min range: - - * Insert the logging op: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_wide_deep.pb \ - --out_graph=/workspace/quantization/logged_freezed_range_int8_wide_deep.pb \ - --transforms='insert_logging(op=Min, show_name=true, message="__min:")' - ``` - - * **Generate the `wide_deep_min_log.txt` file**, follow [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) - to run inference (using `--batch_size=1024`, - `--data-location=/home//dataset_preprocessed_train.tfrecords`refer [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#prepare-dataset) to generate training data, - `--in-graph=/home//logged_freezed_range_int8_wide_deep.pb`, - `--accuracy-only`), and **store the output log in `wide_deep_min_log.txt` file**. - - * The `wide_deep_min_log.txt` file is used in the following step. - - * Run the log data replace the - - `MinOp` with constants in the original quantized graph: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_wide_deep.pb \ - --out_graph=/workspace/quantization/freezed_range_int8_wide_deep_freezemin.pb \ - --transforms='freeze_min(min_max_log_file="/workspace/quantization/wide_deep_min_log.txt")' - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after this step. - -9. Convert from dynamic to static quantization Max range. -The following steps are to freeze the Max range: - - * Insert the logging op: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_wide_deep.pb \ - --out_graph=/workspace/quantization/logged_freezed_range_int8_wide_deep.pb \ - --transforms='insert_logging(op=Max, show_name=true, message="__max:")' - ``` - - * **Generate the `wide_deep_max_log.txt` file**, follow [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) - to run inference (using `--batch_size=1024`, - `--data-location=/home//dataset_preprocessed_train.tfrecords`refer [instructions](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md#prepare-dataset) to generate training data, - `--in-graph=/home//logged_freezed_range_int8_wide_deep.pb`, - `--accuracy-only`), and **store the output log in `wide_deep_max_log.txt` file**. - - * The `wide_deep_max_log.txt` file is used in the following step. - - * Run the log data replace the - - `MaxOp` with constants in the original quantized graph: - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_wide_deep_freezemin.pb \ - --out_graph=/workspace/quantization/freezed_range_int8_wide_deep_minmaxfreeze.pb \ - --transforms='freeze_max(min_max_log_file="/workspace/quantization/wide_deep_max_log.txt")' - ``` - - [Evaluate the output int8 graph performance](#accuracy-for-int8-optimized-graph) - to check the loss in performance after this step. - -10. Fuse `RequantizeOp` with fused quantized innerproducts, and generate the final -optimized Int8 graph - ``` - $ bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=/workspace/quantization/freezed_range_int8_wide_deep_minmaxfreeze.pb \ - --out_graph=/workspace/quantization/final_int8_wide_deep.pb \ - --outputs='import/head/predictions/probabilities' \ - --transforms='fuse_quantized_matmul_and_requantize strip_unused_nodes' - ``` - Check the final quantized wide and deep model `final_int8_wide_deep.pb` performance in - the [Accuracy for Int8 Optimized Graph](#accuracy-for-int8-optimized-graph) section. - - -## Performance Evaluation - -Validating the model performance is required after each step to verify if the output graph achieves the accuracy target. -* The model accuracy is used as a performance measure. -* The accuracy target is the optimized FP32 model accuracy values. -* The quantized `Int8` graph accuracy should not drop more than ~0.5-1%. - - -This section explains how to run wide & deep inference and calculate the model accuracy using the [Intel Model Zoo](https://github.com/IntelAI/models). - -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository, -and follow the [documented steps](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md) -to run `Wide and Deep` inference performance for both FP32 and Int8 cases. - -**Note that the script should be run outside of the quantization docker container -and that some inputs to the script are slightly different for `FP32` and `Int8` models (i.e. `--precision` and `--docker-image`).** - - -### Accuracy for FP32 Optimized Graph -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository and follow the steps to run the FP32 -script to calculate `accuracy` and use the optimized FP32 graph in `--in-graph`. - ``` - $ git clone https://github.com/IntelAI/models.git - $ cd /home//models/benchmarks - $ python launch_benchmark.py \ - --in-graph /home///wide_deep_fp32_pretrained_model.pb \ - --model-name wide_deep_large_ds \ - --framework tensorflow \ - --precision fp32 \ - --mode inference \ - --accuracy-only \ - --batch-size=1000 \ - --socket-id 0 \ - --data-location /root/user/wide_deep_files/dataset_preprocessed_eval.tfrecords \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:latest - ``` -The tail of the log output when the accuracy run completes should look something like this: - ``` - -------------------------------------------------- - Total test records : 2000000 - Batch size is : 512 - Number of batches : 3907 - Throughput is (records/sec) : 314943.875 - Inference duration (seconds) : 6.1878 - Latency (millisecond/batch) : 1.625686 - Classification accuracy (%) : 77.5223 - No of correct predicitons : 1550447 - -------------------------------------------------- - ``` - -### Accuracy for Int8 Optimized Graph - -Clone the [IntelAI/models](https://github.com/IntelAI/models) repository and follow the steps to run the Int8 -script to calculate `accuracy` and use the Int8 graph in `--in-graph`. - ``` - $ git clone https://github.com/IntelAI/models.git - $ cd /home//models/benchmarks - $ python launch_benchmark.py \ - --in-graph /home///final_wide_deep_Int8_graph.pb \ - --model-name wide_deep_large_ds \ - --framework tensorflow \ - --precision int8 \ - --mode inference \ - --accuracy-only \ - --batch-size=1000 \ - --socket-id 0 \ - --data-location /home///dataset_preprocessed_eval.tfrecords \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:latest - ``` -The tail of the log output when the accuracy run completes should look something like this: - ``` - -------------------------------------------------- - Total test records : 2000000 - Batch size is : 512 - Number of batches : 3907 - Throughput is (records/sec) : 489653.313 - Inference duration (seconds) : 3.98 - Latency (millisecond/batch) : 1.045638 - Classification accuracy (%) : 77.4816 - No of correct predicitons : 1549632 - -------------------------------------------------- - ``` diff --git a/docs/recommendation/tensorflow/Tutorial.md b/docs/recommendation/tensorflow/Tutorial.md index 7b551064d..00b8a126b 100644 --- a/docs/recommendation/tensorflow/Tutorial.md +++ b/docs/recommendation/tensorflow/Tutorial.md @@ -106,12 +106,12 @@ git clone https://github.com/IntelAI/models.git ``` mkdir ~/wide_deep_files cd ~/wide_deep_files -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/wide_deep_fp32_pretrained_model.pb +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.pb ``` Refer to the Wide and Deep [README](/benchmarks/recommendation/tensorflow/wide_deep_large_ds) to get the latest location of the pretrained model. -3. Install [Docker](https://docs.docker.com/v17.09/engine/installation/) since the tutorial runs on a Docker container. +3. Install [Docker](https://docs.docker.com/install/) since the tutorial runs on a Docker container. 4. Data Preparation: You will need approximately 20GB of available disk space to complete this step. @@ -133,14 +133,15 @@ Follow the instructions below to download and prepare the dataset. docker run -it --privileged -u root:root \ -w /models \ --volume $PWD:/models \ - docker.io/intelaipg/intel-optimized-tensorflow:latest \ + docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ /bin/bash ``` - Preprocess and convert eval dataset to TFRecord format. We will use a script in the Intel Model Zoo repository. This step may take a while to complete. ``` python recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py \ - --csv-datafile eval.csv \ + --inputcsv-datafile eval.csv \ + --calibrationcsv-datafile train.csv \ --outputfile-name preprocessed ``` - Exit the docker container and find the processed dataset `eval_preprocessed.tfrecords` in the location `~/models/models`. @@ -150,7 +151,7 @@ Follow the instructions below to download and prepare the dataset. 1. Pull the relevant Intel Optimizations for TensorFlow Docker image. We'll be running the pretrained model to infer in a Docker container. [Click here](https://software.intel.com/en-us/articles/intel-optimization-for-tensorflow-installation-guide) to find all the available Docker images. ```bash -docker pull docker.io/intelaipg/intel-optimized-tensorflow:latest +docker pull intel/intel-optimized-tensorflow:2.1.0 ``` 2. cd to the inference script directory: ```bash @@ -177,7 +178,7 @@ Set this parameter to a socket id to run the workload on a single socket. --mode inference \ --framework tensorflow \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph ~/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location ~/models/models/eval_preprocessed.tfrecords \ --verbose @@ -194,7 +195,7 @@ Set this parameter to a socket id to run the workload on a single socket. --mode inference \ --framework tensorflow \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph ~/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location ~/models/models/eval_preprocessed.tfrecords \ --verbose @@ -232,7 +233,7 @@ The logs are captured in a directory outside of the container.
--mode inference \ --framework tensorflow \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph ~/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location ~/models/models/eval_preprocessed.tfrecords \ --verbose @@ -262,7 +263,7 @@ perform necessary installs, run the ```launch_benchmark.py``` script, and does n --mode inference \ --framework tensorflow \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph ~/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location ~/models/models/eval_preprocessed.tfrecords \ --debug @@ -287,7 +288,7 @@ To run inference on a large dataset, download the test dataset in `~/wide_deep_f ``` cd ~/wide_deep_files/real_dataset ``` - - Go to this [page](http://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset/) on the Criteo website. +- Go to this [page](http://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset/) on the Criteo website. Agree to the terms of use, enter your name, and submit the form. Then copy the download link for the 4.3GB tar file called `dac.tar.gz` and use it in the `wget` command in the code block below. Untar the file to create three files: 1. readme.txt @@ -308,14 +309,14 @@ Untar the file to create three files: docker run -it --privileged -u root:root \ -w /models \ --volume $PWD:/models \ - docker.io/intelaipg/intel-optimized-tensorflow:latest \ + docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ /bin/bash ``` - Preprocess and convert test dataset to TFRecord format. We will use a script in the Intel Model Zoo repository. This step may take a while to complete ``` python recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py \ - --csv-datafile test.csv \ + --inputcsv-datafile test.csv \ --outputfile-name preprocessed ``` - Exit the docker container and find the processed dataset `test_preprocessed.tfrecords` in the location `~/models/models`. @@ -331,7 +332,7 @@ Untar the file to create three files: --mode inference \ --framework tensorflow \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intel/intel-optimized-tensorflow:2.1.0 \ --in-graph ~/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location ~/models/models/test_preprocessed.tfrecords \ --verbose diff --git a/models/__init__.py b/models/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/content_creation/tensorflow/draw/inference/fp32/LICENSE b/models/content_creation/tensorflow/draw/inference/fp32/LICENSE deleted file mode 100644 index 9c8f3ea08..000000000 --- a/models/content_creation/tensorflow/draw/inference/fp32/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/models/content_creation/tensorflow/draw/inference/fp32/draw_inf.py b/models/content_creation/tensorflow/draw/inference/fp32/draw_inf.py deleted file mode 100644 index 2ba8b572c..000000000 --- a/models/content_creation/tensorflow/draw/inference/fp32/draw_inf.py +++ /dev/null @@ -1,302 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -"""" -Simple implementation of http://arxiv.org/pdf/1502.04623v2.pdf in TensorFlow - -Example Usage: - python draw.py --data_dir=/tmp/draw --read_attn=True --write_attn=True - -Author: Eric Jang -""" - -import tensorflow as tf -from tensorflow.examples.tutorials import mnist -import numpy as np -import os -import time - -tf.flags.DEFINE_string("dl", "./", "") -tf.flags.DEFINE_string("cp", "./drawmodel.ckpt", "checkpoint file") -tf.flags.DEFINE_boolean("read_attn", True, "enable attention for reader") -tf.flags.DEFINE_boolean("write_attn", True, "enable attention for writer") -tf.flags.DEFINE_integer("bs", 100, "inference batch size") -tf.flags.DEFINE_integer("num_inter_threads", 1, "number of inter_threads") -tf.flags.DEFINE_integer("num_intra_threads", 28, "number of intra_threads") -tf.flags.DEFINE_integer("nb", 200, "number of batches") -tf.flags.DEFINE_integer("nw", 100, "number of warm up steps") -FLAGS = tf.flags.FLAGS - -print("checkpoint: {}".format(FLAGS.cp)) -print("num_inter_threads: {}".format(FLAGS.num_inter_threads)) -print("num_intra_threads: {}".format(FLAGS.num_intra_threads)) -print("dl: {}".format(FLAGS.dl)) -print("bs: {}".format(FLAGS.bs)) -print("nb: {}".format(FLAGS.nb)) -print("nw: {}".format(FLAGS.nw)) - -# MODEL PARAMETERS - -A, B = 28, 28 # image width,height -img_size = B * A # the canvas size -enc_size = 256 # number of hidden units / output size in LSTM -dec_size = 256 -read_n = 5 # read glimpse grid width/height -write_n = 5 # write glimpse grid width/height -read_size = 2 * read_n * read_n if FLAGS.read_attn else 2 * img_size -write_size = write_n * write_n if FLAGS.write_attn else img_size -z_size = 10 # QSampler output size -T = 10 # MNIST generation sequence length -batch_size = FLAGS.bs # training minibatch size -train_iters = 10000 -learning_rate = 1e-3 # learning rate for optimizer -eps = 1e-8 # epsilon for numerical stability - -# BUILD MODEL - -DO_SHARE = None # workaround for variable_scope(reuse=True) - -x = tf.placeholder(tf.float32, shape=(batch_size, img_size)) # input (batch_size * img_size) -e = tf.random_normal((batch_size, z_size), mean=0, stddev=1) # Qsampler noise -lstm_enc = tf.contrib.rnn.LSTMCell(enc_size, state_is_tuple=True) # encoder Op -lstm_dec = tf.contrib.rnn.LSTMCell(dec_size, state_is_tuple=True) # decoder Op - - -def linear(x, output_dim): - """ - affine transformation Wx+b - assumes x.shape = (batch_size, num_features) - """ - w = tf.get_variable("w", [x.get_shape()[1], output_dim]) - b = tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0)) - return tf.matmul(x, w) + b - - -def filterbank(gx, gy, sigma2, delta, N): - grid_i = tf.reshape(tf.cast(tf.range(N), tf.float32), [1, -1]) - mu_x = gx + (grid_i - N / 2 - 0.5) * delta # eq 19 - mu_y = gy + (grid_i - N / 2 - 0.5) * delta # eq 20 - a = tf.reshape(tf.cast(tf.range(A), tf.float32), [1, 1, -1]) - b = tf.reshape(tf.cast(tf.range(B), tf.float32), [1, 1, -1]) - mu_x = tf.reshape(mu_x, [-1, N, 1]) - mu_y = tf.reshape(mu_y, [-1, N, 1]) - sigma2 = tf.reshape(sigma2, [-1, 1, 1]) - Fx = tf.exp(-tf.square(a - mu_x) / (2 * sigma2)) - Fy = tf.exp(-tf.square(b - mu_y) / (2 * sigma2)) # batch x N x B - # normalize, sum over A and B dims - Fx = Fx / tf.maximum(tf.reduce_sum(Fx, 2, keep_dims=True), eps) - Fy = Fy / tf.maximum(tf.reduce_sum(Fy, 2, keep_dims=True), eps) - return Fx, Fy - - -def attn_window(scope, h_dec, N): - with tf.variable_scope(scope, reuse=DO_SHARE): - params = linear(h_dec, 5) - # gx_,gy_,log_sigma2,log_delta,log_gamma=tf.split(1,5,params) - gx_, gy_, log_sigma2, log_delta, log_gamma = tf.split(params, 5, 1) - gx = (A + 1) / 2 * (gx_ + 1) - gy = (B + 1) / 2 * (gy_ + 1) - sigma2 = tf.exp(log_sigma2) - delta = (max(A, B) - 1) / (N - 1) * tf.exp(log_delta) # batch x N - return filterbank(gx, gy, sigma2, delta, N) + (tf.exp(log_gamma),) - -# READ - - -def read_no_attn(x, x_hat, h_dec_prev): - return tf.concat([x, x_hat], 1) - - -def read_attn(x, x_hat, h_dec_prev): - Fx, Fy, gamma = attn_window("read", h_dec_prev, read_n) - - def filter_img(img, Fx, Fy, gamma, N): - Fxt = tf.transpose(Fx, perm=[0, 2, 1]) - img = tf.reshape(img, [-1, B, A]) - glimpse = tf.matmul(Fy, tf.matmul(img, Fxt)) - glimpse = tf.reshape(glimpse, [-1, N * N]) - return glimpse * tf.reshape(gamma, [-1, 1]) - x = filter_img(x, Fx, Fy, gamma, read_n) # batch x (read_n*read_n) - x_hat = filter_img(x_hat, Fx, Fy, gamma, read_n) - return tf.concat([x, x_hat], 1) # concat along feature axis - - -read = read_attn if FLAGS.read_attn else read_no_attn - -# ENCODE - - -def encode(state, input): - """ - run LSTM - state = previous encoder state - input = cat(read,h_dec_prev) - returns: (output, new_state) - """ - with tf.variable_scope("encoder", reuse=DO_SHARE): - return lstm_enc(input, state) - -# Q-SAMPLER (VARIATIONAL AUTOENCODER) - - -def sampleQ(h_enc): - """ - Samples Zt ~ normrnd(mu,sigma) via reparameterization trick for normal dist - mu is (batch,z_size) - """ - with tf.variable_scope("mu", reuse=DO_SHARE): - mu = linear(h_enc, z_size) - with tf.variable_scope("sigma", reuse=DO_SHARE): - logsigma = linear(h_enc, z_size) - sigma = tf.exp(logsigma) - return (mu + sigma * e, mu, logsigma, sigma) - -# DECODER - - -def decode(state, input): - with tf.variable_scope("decoder", reuse=DO_SHARE): - return lstm_dec(input, state) - -# WRITER - - -def write_no_attn(h_dec): - with tf.variable_scope("write", reuse=DO_SHARE): - return linear(h_dec, img_size) - - -def write_attn(h_dec): - with tf.variable_scope("writeW", reuse=DO_SHARE): - w = linear(h_dec, write_size) # batch x (write_n*write_n) - N = write_n - w = tf.reshape(w, [batch_size, N, N]) - Fx, Fy, gamma = attn_window("write", h_dec, write_n) - Fyt = tf.transpose(Fy, perm=[0, 2, 1]) - wr = tf.matmul(Fyt, tf.matmul(w, Fx)) - wr = tf.reshape(wr, [batch_size, B * A]) - # gamma=tf.tile(gamma,[1,B*A]) - return wr * tf.reshape(1.0 / gamma, [-1, 1]) - - -write = write_attn if FLAGS.write_attn else write_no_attn - -# STATE VARIABLES - -cs = [0] * T # sequence of canvases -# gaussian params generated by SampleQ. We will need these for computing loss. -mus, logsigmas, sigmas = [0] * T, [0] * T, [0] * T -# initial states -h_dec_prev = tf.zeros((batch_size, dec_size)) -enc_state = lstm_enc.zero_state(batch_size, tf.float32) -dec_state = lstm_dec.zero_state(batch_size, tf.float32) - -# DRAW MODEL - -# construct the unrolled computational graph -for t in range(T): - c_prev = tf.zeros((batch_size, img_size)) if t == 0 else cs[t - 1] - x_hat = x - tf.sigmoid(c_prev) # error image - r = read(x, x_hat, h_dec_prev) - h_enc, enc_state = encode(enc_state, tf.concat([r, h_dec_prev], 1)) - z, mus[t], logsigmas[t], sigmas[t] = sampleQ(h_enc) - h_dec, dec_state = decode(dec_state, z) - cs[t] = c_prev + write(h_dec) # store results - h_dec_prev = h_dec - DO_SHARE = True # from now on, share variables - -# LOSS FUNCTION - - -def binary_crossentropy(t, o): - return -(t * tf.log(o + eps) + (1.0 - t) * tf.log(1.0 - o + eps)) - - -# reconstruction term appears to have been collapsed down to a single scalar value (rather than one per item in minibatch) -x_recons = tf.nn.sigmoid(cs[-1]) - -# after computing binary cross entropy, sum across features then take the mean of those sums across minibatches -Lx = tf.reduce_sum(binary_crossentropy(x, x_recons), 1) # reconstruction term -Lx = tf.reduce_mean(Lx) - -kl_terms = [0] * T -for t in range(T): - mu2 = tf.square(mus[t]) - sigma2 = tf.square(sigmas[t]) - logsigma = logsigmas[t] - kl_terms[t] = 0.5 * tf.reduce_sum(mu2 + sigma2 - 2 * logsigma, 1) - .5 # each kl term is (1xminibatch) -KL = tf.add_n(kl_terms) # this is 1xminibatch, corresponding to summing kl_terms from 1:T -Lz = tf.reduce_mean(KL) # average over minibatches - -cost = Lx + Lz - -# OPTIMIZER - -optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5) -grads = optimizer.compute_gradients(cost) -for i, (g, v) in enumerate(grads): - if g is not None: - grads[i] = (tf.clip_by_norm(g, 5), v) # clip gradients -train_op = optimizer.apply_gradients(grads) - -# RUN TRAINING - -data_directory = os.path.join(FLAGS.dl, "mnist") -if not os.path.exists(data_directory): - os.makedirs(data_directory) -train_data = mnist.input_data.read_data_sets(data_directory, one_hot=True).train # binarized (0-1) mnist data - -fetches = [] -fetches.extend([Lx, Lz, train_op]) -Lxs = [0] * train_iters -Lzs = [0] * train_iters - -config = tf.ConfigProto(inter_op_parallelism_threads=FLAGS.num_inter_threads, - intra_op_parallelism_threads=FLAGS.num_intra_threads) -sess = tf.InteractiveSession(config=config) - -saver = tf.train.Saver() # saves variables learned during training -tf.global_variables_initializer().run() -saver.restore(sess, tf.train.latest_checkpoint(FLAGS.cp)) # to restore from model, uncomment this line - -ttime = 0 -for i in range(FLAGS.nb): - xtrain, _ = train_data.next_batch(batch_size) - feed_dict = {x: xtrain} - stime = time.time() - canvases = sess.run(cs, feed_dict) # generate some examples - canvases = np.array(canvases) # T x batch x img_size - etime = time.time() - print("Elapsed Time %f" % (etime - stime)) - if i >= FLAGS.nw: - ttime += etime - stime -print("Batchsize: %d" % (batch_size)) -print("Time spent per BATCH: %.4f ms" % (ttime / (FLAGS.nb - FLAGS.nw) * 1000)) -print("Total samples/sec: %.4f samples/s" % ((FLAGS.nb - FLAGS.nw) * batch_size / ttime)) - -out_file = os.path.join(FLAGS.dl, "draw_data.npy") -np.save(out_file, [canvases, Lxs, Lzs]) - -# Get location outside of the container to show in log message -dataset_on_system = os.environ["DATASET_LOCATION_VOL"] -outside_container_location = os.path.join(dataset_on_system, "draw_data.npy") -print("Outputs saved in file: %s" % outside_container_location) - -sess.close() diff --git a/models/face_detection_and_alignment/__init__.py b/models/face_detection_and_alignment/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/face_detection_and_alignment/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/face_detection_and_alignment/tensorflow/__init__.py b/models/face_detection_and_alignment/tensorflow/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/face_detection_and_alignment/tensorflow/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/__init__.py b/models/face_detection_and_alignment/tensorflow/mtcc/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/MtcnnDetector.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/MtcnnDetector.py deleted file mode 100755 index 6ea2fa552..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/MtcnnDetector.py +++ /dev/null @@ -1,467 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import cv2 -import time -import numpy as np -import sys -# sys.path.append("../") -from train_models.MTCNN_config import config -from Detection.nms import py_nms - - -class MtcnnDetector(object): - - def __init__(self, - detectors, - min_face_size=25, - stride=2, - threshold=[0.6, 0.7, 0.7], - scale_factor=0.79, - # scale_factor=0.709,#change - slide_window=False): - - self.pnet_detector = detectors[0] - self.rnet_detector = detectors[1] - self.onet_detector = detectors[2] - self.min_face_size = min_face_size - self.stride = stride - self.thresh = threshold - self.scale_factor = scale_factor - self.slide_window = slide_window - - def convert_to_square(self, bbox): - """ - convert bbox to square - Parameters: - ---------- - bbox: numpy array , shape n x 5 - input bbox - Returns: - ------- - square bbox - """ - square_bbox = bbox.copy() - - h = bbox[:, 3] - bbox[:, 1] + 1 - w = bbox[:, 2] - bbox[:, 0] + 1 - max_side = np.maximum(h, w) - square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - max_side * 0.5 - square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - max_side * 0.5 - square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 - square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 - return square_bbox - - def calibrate_box(self, bbox, reg): - """ - calibrate bboxes - Parameters: - ---------- - bbox: numpy array, shape n x 5 - input bboxes - reg: numpy array, shape n x 4 - bboxes adjustment - Returns: - ------- - bboxes after refinement - """ - - bbox_c = bbox.copy() - w = bbox[:, 2] - bbox[:, 0] + 1 - w = np.expand_dims(w, 1) - h = bbox[:, 3] - bbox[:, 1] + 1 - h = np.expand_dims(h, 1) - reg_m = np.hstack([w, h, w, h]) - aug = reg_m * reg - bbox_c[:, 0:4] = bbox_c[:, 0:4] + aug - return bbox_c - - def generate_bbox(self, cls_map, reg, scale, threshold): - """ - generate bbox from feature cls_map - Parameters: - ---------- - cls_map: numpy array , n x m - detect score for each position - reg: numpy array , n x m x 4 - bbox - scale: float number - scale of this detection - threshold: float number - detect threshold - Returns: - ------- - bbox array - """ - stride = 2 - # stride = 4 - cellsize = 12 - # cellsize = 25 - - t_index = np.where(cls_map > threshold) - - # find nothing - if t_index[0].size == 0: - return np.array([]) - # offset - dx1, dy1, dx2, dy2 = [reg[t_index[0], t_index[1], i] for i in range(4)] - - reg = np.array([dx1, dy1, dx2, dy2]) - score = cls_map[t_index[0], t_index[1]] - boundingbox = np.vstack([np.round((stride * t_index[1]) / scale), - np.round((stride * t_index[0]) / scale), - np.round((stride * t_index[1] + cellsize) / scale), - np.round((stride * t_index[0] + cellsize) / scale), - score, - reg]) - - return boundingbox.T - # pre-process images - - def processed_image(self, img, scale): - height, width, channels = img.shape - new_height = int(height * scale) # resized new height - new_width = int(width * scale) # resized new width - new_dim = (new_width, new_height) - img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image - img_resized = (img_resized - 127.5) / 128 - return img_resized - - def pad(self, bboxes, w, h): - """ - pad the the bboxes, alse restrict the size of it - Parameters: - ---------- - bboxes: numpy array, n x 5 - input bboxes - w: float number - width of the input image - h: float number - height of the input image - Returns : - ------ - dy, dx : numpy array, n x 1 - start point of the bbox in target image - edy, edx : numpy array, n x 1 - end point of the bbox in target image - y, x : numpy array, n x 1 - start point of the bbox in original image - ex, ex : numpy array, n x 1 - end point of the bbox in original image - tmph, tmpw: numpy array, n x 1 - height and width of the bbox - """ - tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1 - num_box = bboxes.shape[0] - - dx, dy = np.zeros((num_box,)), np.zeros((num_box,)) - edx, edy = tmpw.copy() - 1, tmph.copy() - 1 - - x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] - - tmp_index = np.where(ex > w - 1) - edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] - ex[tmp_index] = w - 1 - - tmp_index = np.where(ey > h - 1) - edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] - ey[tmp_index] = h - 1 - - tmp_index = np.where(x < 0) - dx[tmp_index] = 0 - x[tmp_index] - x[tmp_index] = 0 - - tmp_index = np.where(y < 0) - dy[tmp_index] = 0 - y[tmp_index] - y[tmp_index] = 0 - - return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] - return_list = [item.astype(np.int32) for item in return_list] - - return return_list - - def detect_pnet(self, im): - """Get face candidates through pnet - - Parameters: - ---------- - im: numpy array - input image array - - Returns: - ------- - boxes: numpy array - detected boxes before calibration - boxes_c: numpy array - boxes after calibration - """ - h, w, c = im.shape - net_size = 12 - - current_scale = float(net_size) / self.min_face_size # find initial scale - # print("current_scale", net_size, self.min_face_size, current_scale) - im_resized = self.processed_image(im, current_scale) - current_height, current_width, _ = im_resized.shape - # fcn - all_boxes = list() - while min(current_height, current_width) > net_size: - # return the result predicted by pnet - # cls_cls_map : H*w*2 - # reg: H*w*4 - cls_cls_map, reg = self.pnet_detector.predict(im_resized) - # boxes: num*9(x1,y1,x2,y2,score,x1_offset,y1_offset,x2_offset,y2_offset) - boxes = self.generate_bbox(cls_cls_map[:, :, 1], reg, current_scale, self.thresh[0]) - - current_scale *= self.scale_factor - im_resized = self.processed_image(im, current_scale) - current_height, current_width, _ = im_resized.shape - - if boxes.size == 0: - continue - keep = py_nms(boxes[:, :5], 0.5, 'Union') - boxes = boxes[keep] - all_boxes.append(boxes) - - if len(all_boxes) == 0: - return None, None, None - - all_boxes = np.vstack(all_boxes) - - # merge the detection from first stage - keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union') - all_boxes = all_boxes[keep] - boxes = all_boxes[:, :5] - - bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1 - bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1 - - # refine the boxes - boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw, - all_boxes[:, 1] + all_boxes[:, 6] * bbh, - all_boxes[:, 2] + all_boxes[:, 7] * bbw, - all_boxes[:, 3] + all_boxes[:, 8] * bbh, - all_boxes[:, 4]]) - boxes_c = boxes_c.T - - return boxes, boxes_c, None - - def detect_rnet(self, im, dets): - """Get face candidates using rnet - - Parameters: - ---------- - im: numpy array - input image array - dets: numpy array - detection results of pnet - - Returns: - ------- - boxes: numpy array - detected boxes before calibration - boxes_c: numpy array - boxes after calibration - """ - h, w, c = im.shape - dets = self.convert_to_square(dets) - dets[:, 0:4] = np.round(dets[:, 0:4]) - - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) - num_boxes = dets.shape[0] - cropped_ims = np.zeros((num_boxes, 24, 24, 3), dtype=np.float32) - for i in range(num_boxes): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) - tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] - cropped_ims[i, :, :, :] = (cv2.resize(tmp, (24, 24)) - 127.5) / 128 - # cls_scores : num_data*2 - # reg: num_data*4 - # landmark: num_data*10 - cls_scores, reg, _ = self.rnet_detector.predict(cropped_ims) - cls_scores = cls_scores[:, 1] - keep_inds = np.where(cls_scores > self.thresh[1])[0] - if len(keep_inds) > 0: - boxes = dets[keep_inds] - boxes[:, 4] = cls_scores[keep_inds] - reg = reg[keep_inds] - # landmark = landmark[keep_inds] - else: - return None, None, None - - keep = py_nms(boxes, 0.6) - boxes = boxes[keep] - boxes_c = self.calibrate_box(boxes, reg[keep]) - return boxes, boxes_c, None - - def detect_onet(self, im, dets): - """Get face candidates using onet - - Parameters: - ---------- - im: numpy array - input image array - dets: numpy array - detection results of rnet - - Returns: - ------- - boxes: numpy array - detected boxes before calibration - boxes_c: numpy array - boxes after calibration - """ - h, w, c = im.shape - dets = self.convert_to_square(dets) - dets[:, 0:4] = np.round(dets[:, 0:4]) - [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) - num_boxes = dets.shape[0] - cropped_ims = np.zeros((num_boxes, 48, 48, 3), dtype=np.float32) - for i in range(num_boxes): - tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) - tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] - cropped_ims[i, :, :, :] = (cv2.resize(tmp, (48, 48)) - 127.5) / 128 - - cls_scores, reg, landmark = self.onet_detector.predict(cropped_ims) - # prob belongs to face - cls_scores = cls_scores[:, 1] - keep_inds = np.where(cls_scores > self.thresh[2])[0] - if len(keep_inds) > 0: - # pickout filtered box - boxes = dets[keep_inds] - boxes[:, 4] = cls_scores[keep_inds] - reg = reg[keep_inds] - landmark = landmark[keep_inds] - else: - return None, None, None - - # width - w = boxes[:, 2] - boxes[:, 0] + 1 - # height - h = boxes[:, 3] - boxes[:, 1] + 1 - landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(boxes[:, 0], (5, 1)) - 1).T - landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(boxes[:, 1], (5, 1)) - 1).T - boxes_c = self.calibrate_box(boxes, reg) - - boxes = boxes[py_nms(boxes, 0.6, "Minimum")] - keep = py_nms(boxes_c, 0.6, "Minimum") - boxes_c = boxes_c[keep] - landmark = landmark[keep] - return boxes, boxes_c, landmark - # use for video - - def detect(self, img): - """Detect face over image - """ - boxes = None - t = time.time() - - # pnet - t1 = 0 - if self.pnet_detector: - boxes, boxes_c, _ = self.detect_pnet(img) - if boxes_c is None: - return np.array([]), np.array([]) - - t1 = time.time() - t - t = time.time() - - # rnet - t2 = 0 - if self.rnet_detector: - boxes, boxes_c, _ = self.detect_rnet(img, boxes_c) - if boxes_c is None: - return np.array([]), np.array([]) - - t2 = time.time() - t - t = time.time() - - # onet - t3 = 0 - if self.onet_detector: - boxes, boxes_c, landmark = self.detect_onet(img, boxes_c) - if boxes_c is None: - return np.array([]), np.array([]) - - t3 = time.time() - t - t = time.time() - print( - "time cost " + '{:.3f}'.format(t1 + t2 + t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, - t3)) - - return boxes_c, landmark - - def detect_face(self, test_data): - all_boxes = [] # save each image's bboxes - landmarks = [] - batch_idx = 0 - sum_time = 0 - # test_data is iter_ - for databatch in test_data: - # databatch(image returned) - if batch_idx % 100 == 0: - print("%d images done" % batch_idx) - im = databatch - # pnet - t1 = 0 - if self.pnet_detector: - t = time.time() - # ignore landmark - boxes, boxes_c, landmark = self.detect_pnet(im) - t1 = time.time() - t - sum_time += t1 - if boxes_c is None: - print("boxes_c is None...") - all_boxes.append(np.array([])) - # pay attention - landmarks.append(np.array([])) - batch_idx += 1 - continue - # rnet - t2 = 0 - if self.rnet_detector: - t = time.time() - # ignore landmark - boxes, boxes_c, landmark = self.detect_rnet(im, boxes_c) - t2 = time.time() - t - sum_time += t2 - if boxes_c is None: - all_boxes.append(np.array([])) - landmarks.append(np.array([])) - batch_idx += 1 - continue - # onet - t3 = 0 - if self.onet_detector: - t = time.time() - boxes, boxes_c, landmark = self.detect_onet(im, boxes_c) - t3 = time.time() - t - sum_time += t3 - if boxes_c is None: - all_boxes.append(np.array([])) - landmarks.append(np.array([])) - batch_idx += 1 - continue - print( - "time cost " + '{:.3f}'.format(sum_time) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, t3)) - - all_boxes.append(boxes_c) - landmarks.append(landmark) - batch_idx += 1 - # num_of_data*9,num_of_data*10 - return all_boxes, landmarks diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/detector.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/detector.py deleted file mode 100755 index 428aa2533..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/detector.py +++ /dev/null @@ -1,96 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -import tensorflow as tf -import numpy as np - - -class Detector(object): - # net_factory:rnet or onet - # datasize:24 or 48 - def __init__(self, net_factory, data_size, batch_size, model_path, num_inter_threads=0, num_intra_threads=0): - graph = tf.Graph() - with graph.as_default(): - self.image_op = tf.placeholder(tf.float32, shape=[batch_size, data_size, data_size, 3], name='input_image') - # figure out landmark - self.cls_prob, self.bbox_pred, self.landmark_pred = net_factory(self.image_op, training=False) - self.sess = tf.Session( - config=tf.ConfigProto(allow_soft_placement=True, - inter_op_parallelism_threads=num_inter_threads, - intra_op_parallelism_threads=num_intra_threads, - gpu_options=tf.GPUOptions(allow_growth=True))) - saver = tf.train.Saver() - # check whether the dictionary is valid - model_dict = '/'.join(model_path.split('/')[:-1]) - ckpt = tf.train.get_checkpoint_state(model_dict) - print(model_path) - readstate = ckpt and ckpt.model_checkpoint_path - assert readstate, "the params dictionary is not valid" - print("restore models' param") - saver.restore(self.sess, model_path) - - self.data_size = data_size - self.batch_size = batch_size - # rnet and onet minibatch(test) - - def predict(self, databatch): - # access data - # databatch: N x 3 x data_size x data_size - scores = [] - batch_size = self.batch_size - - minibatch = [] - cur = 0 - # num of all_data - n = databatch.shape[0] - while cur < n: - # split mini-batch - minibatch.append(databatch[cur:min(cur + batch_size, n), :, :, :]) - cur += batch_size - # every batch prediction result - cls_prob_list = [] - bbox_pred_list = [] - landmark_pred_list = [] - for idx, data in enumerate(minibatch): - m = data.shape[0] - real_size = self.batch_size - # the last batch - if m < batch_size: - keep_inds = np.arange(m) - # gap (difference) - gap = self.batch_size - m - while gap >= len(keep_inds): - gap -= len(keep_inds) - keep_inds = np.concatenate((keep_inds, keep_inds)) - if gap != 0: - keep_inds = np.concatenate((keep_inds, keep_inds[:gap])) - data = data[keep_inds] - real_size = m - # cls_prob batch*2 - # bbox_pred batch*4 - cls_prob, bbox_pred, landmark_pred = self.sess.run( - [self.cls_prob, self.bbox_pred, self.landmark_pred], feed_dict={self.image_op: data}) - # num_batch * batch_size *2 - cls_prob_list.append(cls_prob[:real_size]) - # num_batch * batch_size *4 - bbox_pred_list.append(bbox_pred[:real_size]) - # num_batch * batch_size*10 - landmark_pred_list.append(landmark_pred[:real_size]) - # num_of_data*2,num_of_data*4,num_of_data*10 - return np.concatenate(cls_prob_list, axis=0), np.concatenate(bbox_pred_list, axis=0), np.concatenate(landmark_pred_list, axis=0) diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/fcn_detector.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/fcn_detector.py deleted file mode 100755 index 27b9c4482..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/fcn_detector.py +++ /dev/null @@ -1,69 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import numpy as np -import tensorflow as tf -import sys -# sys.path.append("../") -from train_models.MTCNN_config import config - - -class FcnDetector(object): - # net_factory: which net - # model_path: where the params'file is - def __init__(self, net_factory, model_path, num_inter_threads=0, num_intra_threads=0): - # create a graph - graph = tf.Graph() - with graph.as_default(): - # define tensor and op in graph(-1,1) - self.image_op = tf.placeholder(tf.float32, name='input_image') - self.width_op = tf.placeholder(tf.int32, name='image_width') - self.height_op = tf.placeholder(tf.int32, name='image_height') - image_reshape = tf.reshape(self.image_op, [1, self.height_op, self.width_op, 3]) - # self.cls_prob batch*2 - # self.bbox_pred batch*4 - # construct model here - # self.cls_prob, self.bbox_pred = net_factory(image_reshape, training=False) - # contains landmark - self.cls_prob, self.bbox_pred, _ = net_factory(image_reshape, training=False) - - # allow - self.sess = tf.Session( - config=tf.ConfigProto(allow_soft_placement=True, - inter_op_parallelism_threads=num_inter_threads, - intra_op_parallelism_threads=num_intra_threads, - gpu_options=tf.GPUOptions(allow_growth=True))) - saver = tf.train.Saver() - # check whether the dictionary is valid - model_dict = '/'.join(model_path.split('/')[:-1]) - ckpt = tf.train.get_checkpoint_state(model_dict) - print(model_path) - readstate = ckpt and ckpt.model_checkpoint_path - assert readstate, "the params dictionary is not valid" - print("restore models' param") - saver.restore(self.sess, model_path) - - def predict(self, databatch): - height, width, _ = databatch.shape - # print(height, width) - cls_prob, bbox_pred = self.sess.run([self.cls_prob, self.bbox_pred], - feed_dict={self.image_op: databatch, self.width_op: width, - self.height_op: height}) - return cls_prob, bbox_pred diff --git a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/one_image_test.py b/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/one_image_test.py deleted file mode 100644 index 9c1f95215..000000000 --- a/models/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/one_image_test.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# coding:utf-8 -import sys - -from MtcnnDetector import MtcnnDetector -from detector import Detector -from fcn_detector import FcnDetector -from train_models.mtcnn_model import P_Net, R_Net, O_Net -from prepare_data.loader import TestLoader -import cv2 -import time -import os -import numpy as np - -import tensorflow as tf - -flags = tf.flags - -# opmization parameters -flags.DEFINE_integer('num_intra_threads', 0, - 'Specifiy the number threads within layers') -flags.DEFINE_integer('num_inter_threads', 0, - 'Specify the number threads between layers') -flags.DEFINE_string('dl', None, 'Location of data.') -flags.DEFINE_string('ckpt', None, - 'Directory where the model was written to.') - -FLAGS = flags.FLAGS - -print(FLAGS.num_inter_threads) -print(FLAGS.num_intra_threads) -print(FLAGS.ckpt) - - -test_mode = "ONet" -thresh = [0.9, 0.6, 0.7] -min_face_size = 24 -stride = 2 -slide_window = False -shuffle = False -detectors = [None, None, None] -prefix = [FLAGS.ckpt + '/PNet_landmark/PNet', FLAGS.ckpt + '/RNet_landmark/RNet', FLAGS.ckpt + '/ONet_landmark/ONet'] - -epoch = [18, 14, 16] -batch_size = [2048, 256, 16] -model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] -# load pnet model -if slide_window: - PNet = Detector(P_Net, 12, batch_size[0], model_path[0], FLAGS.num_inter_threads, FLAGS.num_intra_threads) -else: - PNet = FcnDetector(P_Net, model_path[0], FLAGS.num_inter_threads, FLAGS.num_intra_threads) -detectors[0] = PNet - -# load rnet model -if test_mode in ["RNet", "ONet"]: - RNet = Detector(R_Net, 24, batch_size[1], model_path[1], FLAGS.num_inter_threads, FLAGS.num_intra_threads) - detectors[1] = RNet - -# load onet model -if test_mode == "ONet": - ONet = Detector(O_Net, 48, batch_size[2], model_path[2], FLAGS.num_inter_threads, FLAGS.num_intra_threads) - detectors[2] = ONet - -mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, - stride=stride, threshold=thresh, slide_window=slide_window) -gt_imdb = [] -# gt_imdb.append("35_Basketball_Basketball_35_515.jpg") -# imdb_ = dict()" -# imdb_['image'] = im_path -# imdb_['label'] = 5 -# path = "lala" -path = FLAGS.dl -# path = "prepare_data/WIDER_train/images/0--Parade" -for item in os.listdir(path): - gt_imdb.append(os.path.join(path, item)) -test_data = TestLoader(gt_imdb) - -start = time.time() -all_boxes, landmarks = mtcnn_detector.detect_face(test_data) -end = time.time() -count = 0 - -accuracy = 0 -for imagepah in gt_imdb: - for bbox in all_boxes[count]: - accuracy = accuracy + bbox[4] - count = count + 1 -accuracy = accuracy / count -print("Accuracy: %.2f" % (accuracy)) - -latency = (end - start) / count * 1000 -tpt = count / (end - start) -print("Total images: %d" % count) -print("Latency is: %.2f, Throughput is: %.2f" % (latency, tpt)) - -""" -count = 0 -for imagepath in gt_imdb: - print imagepath - image = cv2.imread(imagepath) - for bbox in all_boxes[count]: - cv2.putText(image,str(np.round(bbox[4],2)),(int(bbox[0]),int(bbox[1])),cv2.FONT_HERSHEY_TRIPLEX,1,color=(255,0,255)) - cv2.rectangle(image, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) - - for landmark in landmarks[count]: - for i in range(len(landmark)/2): - cv2.circle(image, (int(landmark[2*i]),int(int(landmark[2*i+1]))), 3, (0,0,255)) - - count = count + 1 - cv2.imwrite("result_landmark/%d.png" %(count),image) - #cv2.imshow("lala",image) - #cv2.waitKey(0) -""" -''' -for data in test_data: - print type(data) - for bbox in all_boxes[0]: - print bbox - print (int(bbox[0]),int(bbox[1])) - cv2.rectangle(data, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) - #print data - cv2.imshow("lala",data) - cv2.waitKey(0) -''' diff --git a/models/image_recognition/__init__.py b/models/image_recognition/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/__init__.py +++ b/models/image_recognition/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/__init__.py b/models/image_recognition/tensorflow/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/tensorflow/__init__.py +++ b/models/image_recognition/tensorflow/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py index cc06b1145..446fa053c 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py @@ -30,110 +30,114 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') - return graph + import os + file_ext = os.path.splitext(model_file)[1] + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - input_height = args.input_height - input_width = args.input_width - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + + data_graph = tf.Graph() ### + with data_graph.as_default(): ### dataset = dataset.ImagenetData(data_location) preprocessor = image_preprocessing.ImagePreprocessor( input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference resize_method='crop') images, labels = preprocessor.minibatch(dataset, subset='validation') - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - rewrite_options = rewriter_config_pb2.RewriterConfig( - layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - config.graph_options.rewrite_options.remapping = ( - rewriter_config_pb2.RewriterConfig.OFF) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + config.graph_options.rewrite_options.remapping = ( + rewriter_config_pb2.RewriterConfig.OFF) - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images - top1 = 0 - with tf.Session(config=config) as sess: - sess_graph = tf.Session(graph=graph, config=config) + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + top1 = 0 + with tf.compat.v1.Session(graph=data_graph) as sess: ### + sess_graph = tf.compat.v1.Session(graph=graph, config=config) - while num_remaining_images >= batch_size: - # Reads and preprocess data - # import pdb - # pdb.set_trace() - np_images, np_labels = sess.run([images[0], labels[0]]) - np_labels -= 1 - # print(np_labels.shape) - num_processed_images += batch_size - num_remaining_images -= batch_size - start_time = time.time() - # Compute inference on the preprocessed data - predictions1 = sess_graph.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - if(batch_size != 1): - predictions1 = sess.run(tf.squeeze(predictions1)) - else: - predictions1 = sess.run(tf.reshape(predictions1, [1, 1000])) - predictions2 = tf.argmax(predictions1, axis=1) - predictions = sess.run(predictions2) - top1 += batch_size - (np.count_nonzero(predictions - np_labels)) - print("Iteration time: %0.4f ms" % elapsed_time) - print(top1 / num_processed_images) + while num_remaining_images >= batch_size: + # Reads and preprocess data + #import pdb + #pdb.set_trace() + np_images, np_labels = sess.run([images[0], labels[0]]) + np_labels -= 1 + #print(np_labels.shape) + num_processed_images += batch_size + num_remaining_images -= batch_size + start_time = time.time() + # Compute inference on the preprocessed data + predictions1 = sess_graph.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + if(batch_size !=1): + predictions1 = sess.run(tf.squeeze(predictions1)) + else : + predictions1 = sess.run(tf.reshape(predictions1,[1,1000])) + predictions2 = tf.argmax(input=predictions1, axis=1) + predictions = sess.run(predictions2) + top1 += batch_size - (np.count_nonzero(predictions - np_labels)) + print("Iteration time: %0.4f ms" % elapsed_time) + print(top1/num_processed_images) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py index 891c33e35..f984fdecb 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py @@ -45,119 +45,126 @@ from google.protobuf import text_format import tensorflow as tf - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument("-gpu", "--gpu", - default=-1, - type=int, help="Run on gpu, other wise cpu", - required=False) - - parser.add_argument("--warmup_steps", type=int, default=40, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=100, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - input_height = args.input_height - input_width = args.input_width - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - print(steps) - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("-gpu", "--gpu", + default = -1, + type=int, help="Run on gpu, other wise cpu", + required=False) + + parser.add_argument("--warmup_steps", type=int, default=40, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=100, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + print(steps) + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_graph = tf.Graph() ## + with data_graph.as_default():## input_shape = [batch_size, input_height, input_width, 3] - images = tf.truncated_normal( - input_shape, - dtype=tf.float32, - stddev=10, - name='synthetic_images') - - image_data = None - graph = load_graph(model_file) - - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - rewrite_options = rewriter_config_pb2.RewriterConfig( - layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) - config = tf.ConfigProto() - if (args.gpu < 0): - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - config.graph_options.rewrite_options.remapping = ( - rewriter_config_pb2.RewriterConfig.OFF) - # os.environ["OMP_NUM_THREADS"] = "14" - with tf.Session(config=config) as sess: - image_data = sess.run(images) - - with tf.Session(graph=graph, config=config) as sess: - sys.stdout.flush() - print("[Running warmup steps...]") - for t in range(warmup_steps): - start_time = time.time() - sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time)) - avg = 0 - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - for t in range(steps): - start_time = time.time() - results = sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - avg += elapsed_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size * (t + 1) / avg)) - print(" Latency: {0} ms" - "".format(avg * 1000. / (t + 1))) + images = tf.random.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + + #image_data = None + + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + config = tf.compat.v1.ConfigProto() + if (args.gpu < 0): + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + config.graph_options.rewrite_options.remapping = ( + rewriter_config_pb2.RewriterConfig.OFF) + #os.environ["OMP_NUM_THREADS"] = "14" + #with tf.compat.v1.Session(config=config) as sess: + # image_data = sess.run(images) + + data_config = tf.compat.v1.ConfigProto()### + data_config.inter_op_parallelism_threads = num_inter_threads ### + data_config.intra_op_parallelism_threads = num_intra_threads ### + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) ### + with tf.compat.v1.Session(graph=graph, config=config) as sess: + sys.stdout.flush() + print("[Running warmup steps...]") + image_data = data_sess.run(images) ### + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + avg = 0 + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + avg += elapsed_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size*(t+1)/avg)); + print(" Latency: {0} ms" + "".format(avg*1000. /(t+1))) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py index e2a42ee76..a9a4e2ca3 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py @@ -39,11 +39,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py index 129de043e..910223fc4 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py @@ -43,61 +43,61 @@ class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, data_dir=None): - self.name = name - if data_dir is None: - raise ValueError('Data directory not specified') - self.data_dir = data_dir + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.TFRecordReader() - @abstractmethod - def num_classes(self): - pass + @abstractmethod + def num_classes(self): + pass - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name class FlowersData(Dataset): - def __init__(self, data_dir=None): - super(FlowersData, self).__init__('Flowers', data_dir) + def __init__(self, data_dir=None): + super(FlowersData, self).__init__('Flowers', data_dir) - def num_classes(self): - return 5 + def num_classes(self): + return 5 - def num_examples_per_epoch(self, subset): - if subset == 'train': - return 3170 - elif subset == 'validation': - return 500 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset): + if subset == 'train': + return 3170 + elif subset == 'validation': + return 500 + else: + raise ValueError('Invalid data subset "%s"' % subset) class ImagenetData(Dataset): - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('ImageNet', data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) - def num_classes(self): - return 1000 + def num_classes(self): + return 1000 - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 1281167 - elif subset == 'validation': - return 50000 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 1281167 + elif subset == 'validation': + return 50000 + else: + raise ValueError('Invalid data subset "%s"' % subset) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py index 6c42d36bb..1ac269ae9 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py @@ -54,7 +54,7 @@ import tensorflow as tf -slim = tf.contrib.slim +#slim = tf.contrib.slim ### _R_MEAN = 123.68 _G_MEAN = 116.78 @@ -67,242 +67,242 @@ def _crop(image, offset_height, offset_width, crop_height, crop_width): - """Crops the given image using the provided offsets and sizes. + """Crops the given image using the provided offsets and sizes. - Note that the method doesn't assume we know the input image size but it does - assume we know the input image rank. + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. - Args: - image: an image of shape [height, width, channels]. - offset_height: a scalar tensor indicating the height offset. - offset_width: a scalar tensor indicating the width offset. - crop_height: the height of the cropped image. - crop_width: the width of the cropped image. + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. - Returns: - the cropped (and resized) image. + Returns: + the cropped (and resized) image. - Raises: - InvalidArgumentError: if the rank is not 3 or if the image dimensions are - less than the crop size. - """ - original_shape = tf.shape(image) + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(input=image) - rank_assertion = tf.Assert( - tf.equal(tf.rank(image), 3), - ['Rank of image must be equal to 3.']) - with tf.control_dependencies([rank_assertion]): - cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + with tf.control_dependencies([rank_assertion]): + cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) - size_assertion = tf.Assert( - tf.logical_and( - tf.greater_equal(original_shape[0], crop_height), - tf.greater_equal(original_shape[1], crop_width)), - ['Crop size greater than the image size.']) + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) - offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), dtype=tf.int32) - # Use tf.slice instead of crop_to_bounding box as it accepts tensors to - # define the crop size. - with tf.control_dependencies([size_assertion]): - image = tf.slice(image, offsets, cropped_shape) - return tf.reshape(image, cropped_shape) + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + with tf.control_dependencies([size_assertion]): + image = tf.slice(image, offsets, cropped_shape) + return tf.reshape(image, cropped_shape) def _random_crop(image_list, crop_height, crop_width): - """Crops the given list of images. - - The function applies the same crop to each image in the list. This can be - effectively applied when there are multiple image inputs of the same - dimension such as: - - image, depths, normals = _random_crop([image, depths, normals], 120, 150) - - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the new height. - crop_width: the new width. - - Returns: - the image_list with cropped images. - - Raises: - ValueError: if there are multiple image inputs provided with different size - or the images are smaller than the crop dimensions. - """ - if not image_list: - raise ValueError('Empty image_list.') - - # Compute the rank assertions. - rank_assertions = [] - for i in range(len(image_list)): - image_rank = tf.rank(image_list[i]) - rank_assert = tf.Assert( - tf.equal(image_rank, 3), - ['Wrong rank for tensor %s [expected] [actual]', - image_list[i].name, 3, image_rank]) - rank_assertions.append(rank_assert) - - with tf.control_dependencies([rank_assertions[0]]): - image_shape = tf.shape(image_list[0]) - image_height = image_shape[0] - image_width = image_shape[1] - crop_size_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(image_height, crop_height), - tf.greater_equal(image_width, crop_width)), - ['Crop size greater than the image size.']) - - asserts = [rank_assertions[0], crop_size_assert] - - for i in range(1, len(image_list)): - image = image_list[i] - asserts.append(rank_assertions[i]) - with tf.control_dependencies([rank_assertions[i]]): - shape = tf.shape(image) - height = shape[0] - width = shape[1] - - height_assert = tf.Assert( - tf.equal(height, image_height), - ['Wrong height for tensor %s [expected][actual]', - image.name, height, image_height]) - width_assert = tf.Assert( - tf.equal(width, image_width), - ['Wrong width for tensor %s [expected][actual]', - image.name, width, image_width]) - asserts.extend([height_assert, width_assert]) - - # Create a random bounding box. - # - # Use tf.random_uniform and not numpy.random.rand as doing the former would - # generate random numbers at graph eval time, unlike the latter which - # generates random numbers at graph definition time. - with tf.control_dependencies(asserts): - max_offset_height = tf.reshape(image_height - crop_height + 1, []) - with tf.control_dependencies(asserts): - max_offset_width = tf.reshape(image_width - crop_width + 1, []) - offset_height = tf.random_uniform( - [], maxval=max_offset_height, dtype=tf.int32) - offset_width = tf.random_uniform( - [], maxval=max_offset_width, dtype=tf.int32) - - return [_crop(image, offset_height, offset_width, - crop_height, crop_width) for image in image_list] + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + with tf.control_dependencies([rank_assertions[0]]): + image_shape = tf.shape(input=image_list[0]) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + with tf.control_dependencies([rank_assertions[i]]): + shape = tf.shape(input=image) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + with tf.control_dependencies(asserts): + max_offset_height = tf.reshape(image_height - crop_height + 1, []) + with tf.control_dependencies(asserts): + max_offset_width = tf.reshape(image_width - crop_width + 1, []) + offset_height = tf.random.uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random.uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] def _central_crop(image_list, crop_height, crop_width): - """Performs central crops of the given image list. + """Performs central crops of the given image list. - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the height of the image following the crop. - crop_width: the width of the image following the crop. + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. - Returns: - the list of cropped images. - """ - outputs = [] - for image in image_list: - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(input=image)[0] + image_width = tf.shape(input=image)[1] - offset_height = (image_height - crop_height) / 2 - offset_width = (image_width - crop_width) / 2 + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 - outputs.append(_crop(image, offset_height, offset_width, - crop_height, crop_width)) - return outputs + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs def _mean_image_subtraction(image, means): - """Subtracts the given means from each image channel. + """Subtracts the given means from each image channel. - For example: - means = [123.68, 116.779, 103.939] - image = _mean_image_subtraction(image, means) + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) - Note that the rank of `image` must be known. + Note that the rank of `image` must be known. - Args: - image: a tensor of size [height, width, C]. - means: a C-vector of values to subtract from each channel. + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. - Returns: - the centered image. + Returns: + the centered image. - Raises: - ValueError: If the rank of `image` is unknown, if `image` has a rank other - than three or if the number of channels in `image` doesn't match the - number of values in `means`. - """ - if image.get_shape().ndims != 3: - raise ValueError('Input must be of size [height, width, C>0]') - num_channels = image.get_shape().as_list()[-1] - if len(means) != num_channels: - raise ValueError('len(means) must match the number of channels') + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') - channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) - for i in range(num_channels): - channels[i] -= means[i] - return tf.concat(axis=2, values=channels) + channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(axis=2, values=channels) def _smallest_size_at_least(height, width, smallest_side): - """Computes new shape with the smallest side equal to `smallest_side`. + """Computes new shape with the smallest side equal to `smallest_side`. - Computes new shape with the smallest side equal to `smallest_side` while - preserving the original aspect ratio. + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. - Args: - height: an int32 scalar tensor indicating the current height. - width: an int32 scalar tensor indicating the current width. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. - Returns: - new_height: an int32 scalar tensor indicating the new height. - new_width: and int32 scalar tensor indicating the new width. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) - height = tf.to_float(height) - width = tf.to_float(width) - smallest_side = tf.to_float(smallest_side) + height = tf.cast(height, dtype=tf.float32) + width = tf.cast(width, dtype=tf.float32) + smallest_side = tf.cast(smallest_side, dtype=tf.float32) - scale = tf.cond(tf.greater(height, width), - lambda: smallest_side / width, - lambda: smallest_side / height) - new_height = tf.to_int32(height * scale) - new_width = tf.to_int32(width * scale) - return new_height, new_width + scale = tf.cond(pred=tf.greater(height, width), + true_fn=lambda: smallest_side / width, + false_fn=lambda: smallest_side / height) + new_height = tf.cast(height * scale, dtype=tf.int32) + new_width = tf.cast(width * scale, dtype=tf.int32) + return new_height, new_width def _aspect_preserving_resize(image, smallest_side): - """Resize images preserving the original aspect ratio. - - Args: - image: A 3-D image `Tensor`. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. - - Returns: - resized_image: A 3-D tensor containing the resized image. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) - - shape = tf.shape(image) - height = shape[0] - width = shape[1] - new_height, new_width = _smallest_size_at_least(height, width, smallest_side) - image = tf.expand_dims(image, 0) - resized_image = tf.image.resize_bilinear(image, [new_height, new_width], - align_corners=False) - resized_image = tf.squeeze(resized_image) - resized_image.set_shape([None, None, 3]) - return resized_image + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) + + shape = tf.shape(input=image) + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, smallest_side) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize(image, [new_height, new_width], + method=tf.image.ResizeMethod.BILINEAR) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image def preprocess_for_train(image, @@ -310,82 +310,82 @@ def preprocess_for_train(image, output_width, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image for training. + """Preprocesses the given image for training. - Note that the actual resizing scale is sampled from - [`resize_size_min`, `resize_size_max`]. + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. - Returns: - A preprocessed image. - """ - resize_side = tf.random_uniform( - [], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32) + Returns: + A preprocessed image. + """ + resize_side = tf.random.uniform( + [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) - image = _aspect_preserving_resize(image, resize_side) - image = _random_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - image = tf.image.random_flip_left_right(image) + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) + image = tf.image.random_flip_left_right(image) - image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) - return image * _SCALE_FACTOR + image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + return image * _SCALE_FACTOR def preprocess_for_eval(image, output_height, output_width, resize_side): - """Preprocesses the given image for evaluation. + """Preprocesses the given image for evaluation. - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side: The smallest side of the image for aspect-preserving resizing. + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side: The smallest side of the image for aspect-preserving resizing. - Returns: - A preprocessed image. - """ - image = _aspect_preserving_resize(image, resize_side) - image = _central_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, resize_side) + image = _central_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) - image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) - return image * _SCALE_FACTOR + image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + return image * _SCALE_FACTOR def preprocess_image(image, output_height, output_width, is_training=False, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, then this value - is used for rescaling. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, this value is - ignored. Otherwise, the resize side is sampled from - [resize_size_min, resize_size_max]. - - Returns: - A preprocessed image. - """ - if is_training: - return preprocess_for_train(image, output_height, output_width, - resize_side_min, resize_side_max) - else: - return preprocess_for_eval(image, output_height, output_width, - resize_side_min) + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py index 228b6a8d3..0dc46651c 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py @@ -42,381 +42,378 @@ from tensorflow.python.ops import data_flow_ops import cnn_util - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3, - fancy_upscaling=False, - dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image def eval_image(image, height, width, bbox, thread_id, resize): - """Get the image for model evaluation.""" - with tf.name_scope('eval_image'): - if not thread_id: - tf.summary.image( - 'original_image', tf.expand_dims(image, 0)) - - if resize == 'crop': - # Note: This is much slower than crop_to_bounding_box - # It seems that the redundant pad step has huge overhead - # distorted_image = tf.image.resize_image_with_crop_or_pad(image, - # height, width) - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, tf.convert_to_tensor( - [256, 256 * shape[1] / shape[0]], dtype=tf.int32)), - lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32))) - shape = tf.shape(image) - - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) - # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) - # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, - width) - else: - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.5, - aspect_ratio_range=[0.90, 1.10], - area_range=[0.10, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, _ = sample_distorted_bounding_box - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - resize_method = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - }[resize] - # This resizing operation may distort the images because the aspect - # ratio is not respected. - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], - resize_method, - align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) - image = distorted_image - return image + """Get the image for model evaluation.""" + with tf.compat.v1.name_scope('eval_image'): + if not thread_id: + tf.compat.v1.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256, 256*shape[1]/shape[0]], dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(input=image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], + resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image def distort_image(image, height, width, bbox, thread_id=0, scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D float Tensor of image - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - thread_id: integer indicating the preprocessing thread. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor of distorted image used for training. - """ - # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): - # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - with tf.name_scope(scope or 'distort_image'): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - # Display the bounding box in the first thread only. - if not thread_id: - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image( - 'image_with_bounding_boxes', image_with_box) - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an allowed - # range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.99, 1.01], - area_range=[0.05, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if not thread_id: - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image( - 'images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - resize_method = thread_id % 4 - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], resize_method, align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, thread_id) - - # Note: This ensures the scaling matches the output of eval_image - distorted_image *= 256 - - if not thread_id: - tf.summary.image( - 'final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.compat.v1.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.compat.v1.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.compat.v1.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image def distort_color(image, thread_id=0, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: Tensor containing single image. - thread_id: preprocessing thread ID. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - # with tf.op_scope([image], scope, 'distort_color'): - # with tf.name_scope(scope, 'distort_color', [image]): - with tf.name_scope(scope or 'distort_color'): - color_ordering = thread_id % 2 - - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class ImagePreprocessor(object): - """Preprocessor for input images.""" - - def __init__(self, - height, - width, - batch_size, - device_count, - dtype=tf.float32, - train=True, - distortions=None, - resize_method=None): - self.height = height - self.width = width - self.batch_size = batch_size - self.device_count = device_count - self.dtype = dtype - self.train = train - self.resize_method = resize_method - if distortions is None: - distortions = False - self.distortions = distortions - if self.batch_size % self.device_count != 0: - raise ValueError( - ('batch_size must be a multiple of device_count: ' - 'batch_size %d, device_count: %d') % - (self.batch_size, self.device_count)) - self.batch_size_per_device = self.batch_size // self.device_count - - def preprocess(self, image_buffer, bbox, thread_id): - """Preprocessing image_buffer using thread_id.""" - # Note: Width and height of image is known only at runtime. - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - if self.train and self.distortions: - image = distort_image(image, self.height, self.width, bbox, thread_id) - else: - # image = eval_image(image, self.height, self.width, bbox, thread_id, - # self.resize_method) - image = densenet_preprocessing.preprocess_image(image, 224, 224, False) - # Note: image is now float32 [height,width,3] with range [0, 255] - - # image = tf.cast(image, tf.uint8) # HACK TESTING - - return image - - def minibatch(self, dataset, subset): - with tf.name_scope('batch_processing'): - images = [[] for i in range(self.device_count)] - labels = [[] for i in range(self.device_count)] - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=randint(0, 9000), - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for i in xrange(self.batch_size): - value = records[i] - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.preprocess(image_buffer, bbox, i % 4) - - device_index = i % self.device_count - images[device_index].append(image) - labels[device_index].append(label_index) - label_index_batch = [None] * self.device_count - for device_index in xrange(self.device_count): - images[device_index] = tf.parallel_stack(images[device_index]) - label_index_batch[device_index] = tf.concat(labels[device_index], 0) - - # dynamic_pad=True) # HACK TESTING dynamic_pad=True - images[device_index] = tf.cast(images[device_index], self.dtype) - depth = 3 - images[device_index] = tf.reshape( - images[device_index], - shape=[self.batch_size_per_device, self.height, self.width, depth]) - label_index_batch[device_index] = tf.reshape( - label_index_batch[device_index], [self.batch_size_per_device]) - # Display the training images in the visualizer. - # tf.summary.image('images', images) - - return images, label_index_batch + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + #image = eval_image(image, self.height, self.width, bbox, thread_id, + # self.resize_method) + image = densenet_preprocessing.preprocess_image(image,224,224,False) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.compat.v1.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/cnn_util.py b/models/image_recognition/tensorflow/inception_resnet_v2/cnn_util.py index cd1f0d4d1..b5639df6f 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/cnn_util.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/cnn_util.py @@ -40,11 +40,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/dataset_factory.py b/models/image_recognition/tensorflow/inception_resnet_v2/dataset_factory.py index 637b55ed4..d91f55283 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/dataset_factory.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/dataset_factory.py @@ -47,26 +47,26 @@ def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None): - """Given a dataset name and a split_name returns a Dataset. + """Given a dataset name and a split_name returns a Dataset. - Args: - name: String, the name of the dataset. - split_name: A train/test split name. - dataset_dir: The directory where the dataset files are stored. - file_pattern: The file pattern to use for matching the dataset source files. - reader: The subclass of tf.ReaderBase. If left as `None`, then the default - reader defined by each dataset is used. + Args: + name: String, the name of the dataset. + split_name: A train/test split name. + dataset_dir: The directory where the dataset files are stored. + file_pattern: The file pattern to use for matching the dataset source files. + reader: The subclass of tf.ReaderBase. If left as `None`, then the default + reader defined by each dataset is used. - Returns: - A `Dataset` class. + Returns: + A `Dataset` class. - Raises: - ValueError: If the dataset `name` is unknown. - """ - if name not in datasets_map: - raise ValueError('Name of dataset unknown %s' % name) - return datasets_map[name].get_split( - split_name, - dataset_dir, - file_pattern, - reader) + Raises: + ValueError: If the dataset `name` is unknown. + """ + if name not in datasets_map: + raise ValueError('Name of dataset unknown %s' % name) + return datasets_map[name].get_split( + split_name, + dataset_dir, + file_pattern, + reader) diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/dataset_utils.py b/models/image_recognition/tensorflow/inception_resnet_v2/dataset_utils.py index 41791b6c6..fa579b233 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/dataset_utils.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/dataset_utils.py @@ -49,123 +49,123 @@ def int64_feature(values): - """Returns a TF-Feature of int64s. + """Returns a TF-Feature of int64s. - Args: - values: A scalar or list of values. + Args: + values: A scalar or list of values. - Returns: - A TF-Feature. - """ - if not isinstance(values, (tuple, list)): - values = [values] - return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) + Returns: + A TF-Feature. + """ + if not isinstance(values, (tuple, list)): + values = [values] + return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) def bytes_feature(values): - """Returns a TF-Feature of bytes. + """Returns a TF-Feature of bytes. - Args: - values: A string. + Args: + values: A string. - Returns: - A TF-Feature. - """ - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) + Returns: + A TF-Feature. + """ + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) def float_feature(values): - """Returns a TF-Feature of floats. + """Returns a TF-Feature of floats. - Args: - values: A scalar of list of values. + Args: + values: A scalar of list of values. - Returns: - A TF-Feature. - """ - if not isinstance(values, (tuple, list)): - values = [values] - return tf.train.Feature(float_list=tf.train.FloatList(value=values)) + Returns: + A TF-Feature. + """ + if not isinstance(values, (tuple, list)): + values = [values] + return tf.train.Feature(float_list=tf.train.FloatList(value=values)) def image_to_tfexample(image_data, image_format, height, width, class_id): - return tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': bytes_feature(image_data), - 'image/format': bytes_feature(image_format), - 'image/class/label': int64_feature(class_id), - 'image/height': int64_feature(height), - 'image/width': int64_feature(width), - })) + return tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': bytes_feature(image_data), + 'image/format': bytes_feature(image_format), + 'image/class/label': int64_feature(class_id), + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + })) def download_and_uncompress_tarball(tarball_url, dataset_dir): - """Downloads the `tarball_url` and uncompresses it locally. - - Args: - tarball_url: The URL of a tarball file. - dataset_dir: The directory where the temporary files are stored. - """ - filename = tarball_url.split('/')[-1] - filepath = os.path.join(dataset_dir, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dataset_dir) + """Downloads the `tarball_url` and uncompresses it locally. + + Args: + tarball_url: The URL of a tarball file. + dataset_dir: The directory where the temporary files are stored. + """ + filename = tarball_url.split('/')[-1] + filepath = os.path.join(dataset_dir, filename) + + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % ( + filename, float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') + tarfile.open(filepath, 'r:gz').extractall(dataset_dir) def write_label_file(labels_to_class_names, dataset_dir, filename=LABELS_FILENAME): - """Writes a file with the list of class names. + """Writes a file with the list of class names. - Args: - labels_to_class_names: A map of (integer) labels to class names. - dataset_dir: The directory in which the labels file should be written. - filename: The filename where the class names are written. - """ - labels_filename = os.path.join(dataset_dir, filename) - with tf.gfile.Open(labels_filename, 'w') as f: - for label in labels_to_class_names: - class_name = labels_to_class_names[label] - f.write('%d:%s\n' % (label, class_name)) + Args: + labels_to_class_names: A map of (integer) labels to class names. + dataset_dir: The directory in which the labels file should be written. + filename: The filename where the class names are written. + """ + labels_filename = os.path.join(dataset_dir, filename) + with tf.gfile.Open(labels_filename, 'w') as f: + for label in labels_to_class_names: + class_name = labels_to_class_names[label] + f.write('%d:%s\n' % (label, class_name)) def has_labels(dataset_dir, filename=LABELS_FILENAME): - """Specifies whether or not the dataset directory contains a label map file. + """Specifies whether or not the dataset directory contains a label map file. - Args: - dataset_dir: The directory in which the labels file is found. - filename: The filename where the class names are written. + Args: + dataset_dir: The directory in which the labels file is found. + filename: The filename where the class names are written. - Returns: - `True` if the labels file exists and `False` otherwise. - """ - return tf.gfile.Exists(os.path.join(dataset_dir, filename)) + Returns: + `True` if the labels file exists and `False` otherwise. + """ + return tf.gfile.Exists(os.path.join(dataset_dir, filename)) def read_label_file(dataset_dir, filename=LABELS_FILENAME): - """Reads the labels file and returns a mapping from ID to class name. - - Args: - dataset_dir: The directory in which the labels file is found. - filename: The filename where the class names are written. - - Returns: - A map from a label (integer) to class name. - """ - labels_filename = os.path.join(dataset_dir, filename) - with tf.gfile.Open(labels_filename, 'rb') as f: - lines = f.read().decode() - lines = lines.split('\n') - lines = filter(None, lines) - - labels_to_class_names = {} - for line in lines: - index = line.index(':') - labels_to_class_names[int(line[:index])] = line[index + 1:] - return labels_to_class_names + """Reads the labels file and returns a mapping from ID to class name. + + Args: + dataset_dir: The directory in which the labels file is found. + filename: The filename where the class names are written. + + Returns: + A map from a label (integer) to class name. + """ + labels_filename = os.path.join(dataset_dir, filename) + with tf.gfile.Open(labels_filename, 'rb') as f: + lines = f.read().decode() + lines = lines.split('\n') + lines = filter(None, lines) + + labels_to_class_names = {} + for line in lines: + index = line.index(':') + labels_to_class_names[int(line[:index])] = line[index+1:] + return labels_to_class_names diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/datasets.py b/models/image_recognition/tensorflow/inception_resnet_v2/datasets.py index d144f9212..d69a696c7 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/datasets.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/datasets.py @@ -48,94 +48,94 @@ def create_dataset(data_dir, data_name): - """Create a Dataset instance based on data_dir and data_name.""" + """Create a Dataset instance based on data_dir and data_name.""" - supported_datasets = { - 'imagenet': ImagenetData, - } + supported_datasets = { + 'imagenet': ImagenetData, + } - if not data_dir and not data_name: - # When using synthetic data, use synthetic imagenet images by default. - data_name = 'imagenet' + if not data_dir and not data_name: + # When using synthetic data, use synthetic imagenet images by default. + data_name = 'imagenet' - if data_name is None: - for supported_name in supported_datasets: - if supported_name in data_dir: - data_name = supported_name - break + if data_name is None: + for supported_name in supported_datasets: + if supported_name in data_dir: + data_name = supported_name + break - if data_name is None: - raise ValueError('Could not identify name of dataset. ' - 'Please specify with --data_name option.') + if data_name is None: + raise ValueError('Could not identify name of dataset. ' + 'Please specify with --data_name option.') - if data_name not in supported_datasets: - raise ValueError('Unknown dataset. Must be one of %s', ', '.join( - [key for key in sorted(supported_datasets.keys())])) + if data_name not in supported_datasets: + raise ValueError('Unknown dataset. Must be one of %s', ', '.join( + [key for key in sorted(supported_datasets.keys())])) - return supported_datasets[data_name](data_dir) + return supported_datasets[data_name](data_dir) class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" - - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.RecordInputImagePreprocessor + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py index fad6007f5..3b430d5c5 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py @@ -53,116 +53,116 @@ def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() + graph = tf.Graph() + graph_def = tf.GraphDef() - import os - file_ext = os.path.splitext(model_file)[1] + import os + file_ext = os.path.splitext(model_file)[1] - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') - return graph + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=None, - type=int, help="input height") - parser.add_argument("--input_width", default=None, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = dataset.get_image_preprocessor()( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='bilinear') - - images, labels = preprocessor.minibatch(dataset, subset='validation', - use_datasets=True, cache_data=False) - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images - - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels = sess.run([images[0], labels[0]]) - num_processed_images += batch_size - num_remaining_images -= batch_size - start_time = time.time() - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=None, + type=int, help="input height") + parser.add_argument("--input_width", default=None, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + + images, labels = preprocessor.minibatch(dataset, subset='validation', + use_datasets=True, cache_data=False) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + + with tf.Session() as sess: + sess_graph = tf.Session(graph=graph, config=config) + + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + start_time = time.time() + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + accuracy1 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 1), tf.float32)) + + accuracy5 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_benchmark.py b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_benchmark.py index 1487b78d2..2fb6bd228 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_benchmark.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_benchmark.py @@ -41,82 +41,83 @@ class EvalClassifierBenchmark: - """Evaluate image classifier with int8 TensorFlow graph""" - - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--inter-op-parallelism-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--intra-op-parallelism-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - self.args = arg_parser.parse_args() - - def run(self): - """run benchmark with optimized graph""" - - with tf.Graph().as_default() as graph: - - config = tf.ConfigProto() - config.allow_soft_placement = True - config.intra_op_parallelism_threads = self.args.num_intra_threads - config.inter_op_parallelism_threads = self.args.num_inter_threads - - with tf.Session(config=config) as sess: - # import the quantized graph - with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: - graph_def = tf.GraphDef() - input_graph_content = input_file.read() - graph_def.ParseFromString(input_graph_content) - - sess.graph.as_default() - tf.import_graph_def(graph_def, name='') - - # Definite input and output Tensors for detection_graph - image = graph.get_tensor_by_name('input:0') - predict = graph.get_tensor_by_name('InceptionResnetV2/Logits/Predictions:0') - tf.global_variables_initializer() - - i = 0 - num_iteration = 40 - warm_up_iteration = 10 - total_time = 0 - for _ in range(num_iteration): - i += 1 - image_np = np.random.rand(self.args.batch_size, 299, 299, 3).astype(np.uint8) - start_time = time.time() - (predicts) = sess.run([predict], feed_dict={image: image_np}) - time_consume = time.time() - start_time - print('Iteration %d: %.3f sec' % (i, time_consume)) - if i > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (num_iteration - warm_up_iteration) - print('Average time: %.3f sec' % (time_average)) - - print('Batch size = %d' % self.args.batch_size) - if (self.args.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - # print throughput for both batch size 1 and 128 - print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + """Evaluate image classifier with int8 TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + self.args = arg_parser.parse_args() + + + def run(self): + """run benchmark with optimized graph""" + + with tf.Graph().as_default() as graph: + + config = tf.ConfigProto() + config.allow_soft_placement = True + config.intra_op_parallelism_threads = self.args.num_intra_threads + config.inter_op_parallelism_threads = self.args.num_inter_threads + + with tf.Session(config=config) as sess: + # import the quantized graph + with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + graph_def = tf.GraphDef() + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + sess.graph.as_default() + tf.import_graph_def(graph_def, name='') + + # Definite input and output Tensors for detection_graph + image = graph.get_tensor_by_name('input:0') + predict = graph.get_tensor_by_name('InceptionResnetV2/Logits/Predictions:0') + tf.global_variables_initializer() + + i = 0 + num_iteration = 40 + warm_up_iteration = 10 + total_time = 0 + for _ in range(num_iteration): + i += 1 + image_np = np.random.rand(self.args.batch_size, 299, 299, 3).astype(np.uint8) + start_time = time.time() + (predicts) = sess.run([predict], feed_dict={image: image_np}) + time_consume = time.time() - start_time + print('Iteration %d: %.3f sec' % (i, time_consume)) + if i > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (num_iteration - warm_up_iteration) + print('Average time: %.3f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + # print throughput for both batch size 1 and 128 + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) if __name__ == "__main__": - eval_benchmark = EvalClassifierBenchmark() - eval_benchmark.run() + eval_benchmark = EvalClassifierBenchmark() + eval_benchmark.run() diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/imagenet.py b/models/image_recognition/tensorflow/inception_resnet_v2/imagenet.py index 4266acf64..8ddfb783d 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/imagenet.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/imagenet.py @@ -81,138 +81,138 @@ def create_readable_names_for_imagenet_labels(): - """Create a dict mapping label id to human readable string. - - Returns: - labels_to_names: dictionary where keys are integers from to 1000 - and values are human-readable names. - - We retrieve a synset file, which contains a list of valid synset labels used - by ILSVRC competition. There is one synset one per line, eg. - # n01440764 - # n01443537 - We also retrieve a synset_to_human_file, which contains a mapping from synsets - to human-readable names for every synset in Imagenet. These are stored in a - tsv format, as follows: - # n02119247 black fox - # n02119359 silver fox - We assign each synset (in alphabetical order) an integer, starting from 1 - (since 0 is reserved for the background class). - - Code is based on - https://github.com/tensorflow/models/blob/master/research/inception/inception/data/build_imagenet_data.py#L463 - """ - - # pylint: disable=g-line-too-long - # base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/' - # synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) - # synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) - - # filename, _ = urllib.request.urlretrieve(synset_url) - current_dir = os.path.dirname(os.path.realpath(__file__)) - filename = current_dir + "/imagenet_lsvrc_2015_synsets.txt" - synset_list = [s.strip() for s in open(filename).readlines()] - num_synsets_in_ilsvrc = len(synset_list) - assert num_synsets_in_ilsvrc == 1000 - - # filename, _ = urllib.request.urlretrieve(synset_to_human_url) - filename = current_dir + "/imagenet_metadata.txt" - synset_to_human_list = open(filename).readlines() - num_synsets_in_all_imagenet = len(synset_to_human_list) - assert num_synsets_in_all_imagenet == 21842 - - synset_to_human = {} - for s in synset_to_human_list: - parts = s.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - - label_index = 1 - labels_to_names = {0: 'background'} - for synset in synset_list: - name = synset_to_human[synset] - labels_to_names[label_index] = name - label_index += 1 - - return labels_to_names + """Create a dict mapping label id to human readable string. + + Returns: + labels_to_names: dictionary where keys are integers from to 1000 + and values are human-readable names. + + We retrieve a synset file, which contains a list of valid synset labels used + by ILSVRC competition. There is one synset one per line, eg. + # n01440764 + # n01443537 + We also retrieve a synset_to_human_file, which contains a mapping from synsets + to human-readable names for every synset in Imagenet. These are stored in a + tsv format, as follows: + # n02119247 black fox + # n02119359 silver fox + We assign each synset (in alphabetical order) an integer, starting from 1 + (since 0 is reserved for the background class). + + Code is based on + https://github.com/tensorflow/models/blob/master/research/inception/inception/data/build_imagenet_data.py#L463 + """ + + # pylint: disable=g-line-too-long + # base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/' + # synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) + # synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) + + #filename, _ = urllib.request.urlretrieve(synset_url) + current_dir = os.path.dirname(os.path.realpath(__file__)) + filename = current_dir + "/imagenet_lsvrc_2015_synsets.txt" + synset_list = [s.strip() for s in open(filename).readlines()] + num_synsets_in_ilsvrc = len(synset_list) + assert num_synsets_in_ilsvrc == 1000 + + #filename, _ = urllib.request.urlretrieve(synset_to_human_url) + filename = current_dir + "/imagenet_metadata.txt" + synset_to_human_list = open(filename).readlines() + num_synsets_in_all_imagenet = len(synset_to_human_list) + assert num_synsets_in_all_imagenet == 21842 + + synset_to_human = {} + for s in synset_to_human_list: + parts = s.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + + label_index = 1 + labels_to_names = {0: 'background'} + for synset in synset_list: + name = synset_to_human[synset] + labels_to_names[label_index] = name + label_index += 1 + + return labels_to_names def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading ImageNet. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in _SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature( - (), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature( - (), tf.string, default_value='jpeg'), - 'image/class/label': tf.FixedLenFeature( - [], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature( - [], dtype=tf.string, default_value=''), - 'image/object/bbox/xmin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/xmax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/class/label': tf.VarLenFeature( - dtype=tf.int64), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), - 'label': slim.tfexample_decoder.Tensor('image/class/label'), - 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), - 'object/bbox': slim.tfexample_decoder.BoundingBox( - ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), - 'object/label': slim.tfexample_decoder.Tensor( - 'image/object/class/label'), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - else: - labels_to_names = create_readable_names_for_imagenet_labels() - # dataset_utils.write_label_file(labels_to_names, dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=_SPLITS_TO_SIZES[split_name], - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - num_classes=_NUM_CLASSES, - labels_to_names=labels_to_names) + """Gets a dataset tuple with instructions for reading ImageNet. + + Args: + split_name: A train/test split name. + dataset_dir: The base directory of the dataset sources. + file_pattern: The file pattern to use when matching the dataset sources. + It is assumed that the pattern contains a '%s' string so that the split + name can be inserted. + reader: The TensorFlow reader type. + + Returns: + A `Dataset` namedtuple. + + Raises: + ValueError: if `split_name` is not a valid train/test split. + """ + if split_name not in _SPLITS_TO_SIZES: + raise ValueError('split name %s was not recognized.' % split_name) + + if not file_pattern: + file_pattern = _FILE_PATTERN + file_pattern = os.path.join(dataset_dir, file_pattern % split_name) + + # Allowing None in the signature so that dataset_factory can use the default. + if reader is None: + reader = tf.TFRecordReader + + keys_to_features = { + 'image/encoded': tf.FixedLenFeature( + (), tf.string, default_value=''), + 'image/format': tf.FixedLenFeature( + (), tf.string, default_value='jpeg'), + 'image/class/label': tf.FixedLenFeature( + [], dtype=tf.int64, default_value=-1), + 'image/class/text': tf.FixedLenFeature( + [], dtype=tf.string, default_value=''), + 'image/object/bbox/xmin': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/ymin': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/xmax': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/ymax': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/class/label': tf.VarLenFeature( + dtype=tf.int64), + } + + items_to_handlers = { + 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), + 'label': slim.tfexample_decoder.Tensor('image/class/label'), + 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), + 'object/bbox': slim.tfexample_decoder.BoundingBox( + ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), + 'object/label': slim.tfexample_decoder.Tensor( + 'image/object/class/label'), + } + + decoder = slim.tfexample_decoder.TFExampleDecoder( + keys_to_features, items_to_handlers) + + labels_to_names = None + if dataset_utils.has_labels(dataset_dir): + labels_to_names = dataset_utils.read_label_file(dataset_dir) + else: + labels_to_names = create_readable_names_for_imagenet_labels() + #dataset_utils.write_label_file(labels_to_names, dataset_dir) + + return slim.dataset.Dataset( + data_sources=file_pattern, + reader=reader, + decoder=decoder, + num_samples=_SPLITS_TO_SIZES[split_name], + items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, + num_classes=_NUM_CLASSES, + labels_to_names=labels_to_names) diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/inception_preprocessing.py b/models/image_recognition/tensorflow/inception_resnet_v2/inception_preprocessing.py index 58c47b872..230df3f16 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/inception_preprocessing.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/inception_preprocessing.py @@ -45,76 +45,76 @@ def apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([ - func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) - for case in range(num_cases)])[0] + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] def distort_color(image, color_ordering=0, fast_mode=True, scope=None): - """Distort the color of a Tensor image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: 3-D Tensor containing single image in [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0-3). - fast_mode: Avoids slower ops (random_hue and random_contrast) - scope: Optional scope for name_scope. - Returns: - 3-D Tensor color-distorted image on range [0, 1] - Raises: - ValueError: if color_ordering not in [0, 3] - """ - with tf.name_scope(scope, 'distort_color', [image]): - if fast_mode: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - elif color_ordering == 2: - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - elif color_ordering == 3: - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - raise ValueError('color_ordering must be in [0, 3]') - - # The random_* ops do not necessarily clamp. - return tf.clip_by_value(image, 0.0, 1.0) + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) def distorted_bounding_box_crop(image, @@ -124,183 +124,183 @@ def distorted_bounding_box_crop(image, area_range=(0.05, 1.0), max_attempts=100, scope=None): - """Generates cropped_image using a one of the bboxes randomly distorted. - - See `tf.image.sample_distorted_bounding_box` for more documentation. - - Args: - image: 3-D Tensor of image (it will be converted to floats in [0, 1]). - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole - image. - min_object_covered: An optional `float`. Defaults to `0.1`. The cropped - area of the image must contain at least this fraction of any bounding box - supplied. - aspect_ratio_range: An optional list of `floats`. The cropped area of the - image must have an aspect ratio = width / height within this range. - area_range: An optional list of `floats`. The cropped area of the image - must contain a fraction of the supplied image within in this range. - max_attempts: An optional `int`. Number of attempts at generating a cropped - region of the image of the specified constraints. After `max_attempts` - failures, return the entire image. - scope: Optional scope for name_scope. - Returns: - A tuple, a 3-D Tensor cropped_image and the distorted bbox - """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - cropped_image = tf.slice(image, bbox_begin, bbox_size) - return cropped_image, distort_bbox + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox def preprocess_for_train(image, height, width, bbox, fast_mode=True, scope=None, add_image_summaries=True): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Additionally it would create image_summaries to display the different - transformations applied to the image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - fast_mode: Optional boolean, if True avoids slower transformations (i.e. - bi-cubic resizing, random_hue or random_contrast). - scope: Optional scope for name_scope. - add_image_summaries: Enable image summaries. - Returns: - 3-D float Tensor of distorted image used for training with range [-1, 1]. - """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - if bbox is None: - bbox = tf.constant([0.0, 0.0, 1.0, 1.0], - dtype=tf.float32, - shape=[1, 1, 4]) - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) - - distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([None, None, 3]) - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distorted_bbox) - if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - - # We select only 1 case for fast_mode bilinear. - num_resize_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method), - num_cases=num_resize_cases) - - if add_image_summaries: - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. There are 1 or 4 ways to do it. - num_distort_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, ordering: distort_color(x, ordering, fast_mode), - num_cases=num_distort_cases) - - if add_image_summaries: - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) - distorted_image = tf.subtract(distorted_image, 0.5) - distorted_image = tf.multiply(distorted_image, 2.0) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Additionally it would create image_summaries to display the different + transformations applied to the image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + fast_mode: Optional boolean, if True avoids slower transformations (i.e. + bi-cubic resizing, random_hue or random_contrast). + scope: Optional scope for name_scope. + add_image_summaries: Enable image summaries. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + if add_image_summaries: + tf.summary.image('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + if add_image_summaries: + tf.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize_images(x, [height, width], method), + num_cases=num_resize_cases) + + if add_image_summaries: + tf.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. There are 1 or 4 ways to do it. + num_distort_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=num_distort_cases) + + if add_image_summaries: + tf.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + distorted_image = tf.subtract(distorted_image, 0.5) + distorted_image = tf.multiply(distorted_image, 2.0) + return distorted_image def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): - """Prepare one image for evaluation. - - If height and width are specified it would output an image with that size by - applying resize_bilinear. - - If central_fraction is specified it would crop the central fraction of the - input image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - central_fraction: Optional Float, fraction of the image to crop. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would crop the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image def preprocess_image(image, height, width, @@ -308,32 +308,32 @@ def preprocess_image(image, height, width, bbox=None, fast_mode=True, add_image_summaries=True): - """Pre-process one image for training or evaluation. - - Args: - image: 3-D Tensor [height, width, channels] with the image. If dtype is - tf.float32 then the range should be [0, 1], otherwise it would converted - to tf.float32 assuming that the range is [0, MAX], where MAX is largest - positive representable number for int(8/16/32) data type (see - `tf.image.convert_image_dtype` for details). - height: integer, image expected height. - width: integer, image expected width. - is_training: Boolean. If true it would transform an image for train, - otherwise it would transform it for evaluation. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - fast_mode: Optional boolean, if True avoids slower transformations. - add_image_summaries: Enable image summaries. - - Returns: - 3-D float Tensor containing an appropriately scaled image - - Raises: - ValueError: if user does not provide bounding box - """ - if is_training: - return preprocess_for_train(image, height, width, bbox, fast_mode, - add_image_summaries=add_image_summaries) - else: - return preprocess_for_eval(image, height, width) + """Pre-process one image for training or evaluation. + + Args: + image: 3-D Tensor [height, width, channels] with the image. If dtype is + tf.float32 then the range should be [0, 1], otherwise it would converted + to tf.float32 assuming that the range is [0, MAX], where MAX is largest + positive representable number for int(8/16/32) data type (see + `tf.image.convert_image_dtype` for details). + height: integer, image expected height. + width: integer, image expected width. + is_training: Boolean. If true it would transform an image for train, + otherwise it would transform it for evaluation. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + fast_mode: Optional boolean, if True avoids slower transformations. + add_image_summaries: Enable image summaries. + + Returns: + 3-D float Tensor containing an appropriately scaled image + + Raises: + ValueError: if user does not provide bounding box + """ + if is_training: + return preprocess_for_train(image, height, width, bbox, fast_mode, + add_image_summaries=add_image_summaries) + else: + return preprocess_for_eval(image, height, width) diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/inception_resnet_v2.py b/models/image_recognition/tensorflow/inception_resnet_v2/inception_resnet_v2.py index efebf9446..535433f38 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/inception_resnet_v2.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/inception_resnet_v2.py @@ -52,82 +52,82 @@ def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 35x35 resnet block.""" - with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') - mixed = tf.concat( - axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net + """Builds the 35x35 resnet block.""" + with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') + mixed = tf.concat( + axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + scaled_up = up * scale + if activation_fn == tf.nn.relu6: + # Use clip_by_value to simulate bandpass activation. + scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) + + net += scaled_up + if activation_fn: + net = activation_fn(net) + return net def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 17x17 resnet block.""" - with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], - scope='Conv2d_0b_1x7') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], - scope='Conv2d_0c_7x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net + """Builds the 17x17 resnet block.""" + with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], + scope='Conv2d_0b_1x7') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], + scope='Conv2d_0c_7x1') + mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + + scaled_up = up * scale + if activation_fn == tf.nn.relu6: + # Use clip_by_value to simulate bandpass activation. + scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) + + net += scaled_up + if activation_fn: + net = activation_fn(net) + return net def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 8x8 resnet block.""" - with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], - scope='Conv2d_0b_1x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], - scope='Conv2d_0c_3x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net + """Builds the 8x8 resnet block.""" + with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], + scope='Conv2d_0b_1x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], + scope='Conv2d_0c_3x1') + mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) + up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, + activation_fn=None, scope='Conv2d_1x1') + + scaled_up = up * scale + if activation_fn == tf.nn.relu6: + # Use clip_by_value to simulate bandpass activation. + scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) + + net += scaled_up + if activation_fn: + net = activation_fn(net) + return net def inception_resnet_v2_base(inputs, @@ -136,191 +136,179 @@ def inception_resnet_v2_base(inputs, align_feature_maps=False, scope=None, activation_fn=tf.nn.relu): - """Inception model from http://arxiv.org/abs/1602.07261. - - Constructs an Inception Resnet v2 network from inputs to the given final - endpoint. This method can construct the network up to the final inception - block Conv2d_7b_1x1. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', - 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] - output_stride: A scalar that specifies the requested ratio of input to - output spatial resolution. Only supports 8 and 16. - align_feature_maps: When true, changes all the VALID paddings in the network - to SAME padding so that the feature maps are aligned. - scope: Optional variable_scope. - activation_fn: Activation function for block scopes. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or if the output_stride is not 8 or 16, or if the output_stride is 8 and - we request an end point after 'PreAuxLogits'. - """ - if output_stride != 8 and output_stride != 16: - raise ValueError('output_stride must be 8 or 16.') - - padding = 'SAME' if align_feature_maps else 'VALID' - - end_points = {} - - def add_and_check_final(name, net): - end_points[name] = net - return name == final_endpoint - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # 149 x 149 x 32 - net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, - scope='Conv2d_1a_3x3') - if add_and_check_final('Conv2d_1a_3x3', net): - return net, end_points - - # 147 x 147 x 32 - net = slim.conv2d(net, 32, 3, padding=padding, - scope='Conv2d_2a_3x3') - if add_and_check_final('Conv2d_2a_3x3', net): - return net, end_points - # 147 x 147 x 64 - net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') - if add_and_check_final('Conv2d_2b_3x3', net): - return net, end_points - # 73 x 73 x 64 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_3a_3x3') - if add_and_check_final('MaxPool_3a_3x3', net): - return net, end_points - # 73 x 73 x 80 - net = slim.conv2d(net, 80, 1, padding=padding, - scope='Conv2d_3b_1x1') - if add_and_check_final('Conv2d_3b_1x1', net): - return net, end_points - # 71 x 71 x 192 - net = slim.conv2d(net, 192, 3, padding=padding, - scope='Conv2d_4a_3x3') - if add_and_check_final('Conv2d_4a_3x3', net): - return net, end_points - # 35 x 35 x 192 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_5a_3x3') - if add_and_check_final('MaxPool_5a_3x3', net): - return net, end_points - - # 35 x 35 x 320 - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', - scope='AvgPool_0a_3x3') - tower_pool_1 = slim.conv2d(tower_pool, 64, 1, - scope='Conv2d_0b_1x1') - net = tf.concat( - [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) - - if add_and_check_final('Mixed_5b', net): - return net, end_points - # TODO(alemi): Register intermediate endpoints - net = slim.repeat(net, 10, block35, scale=0.17, - activation_fn=activation_fn) - - # 17 x 17 x 1088 if output_stride == 8, - # 33 x 33 x 1088 if output_stride == 16 - use_atrous = output_stride == 8 - - with tf.variable_scope('Mixed_6a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, - scope='Conv2d_0b_3x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, - stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) - - if add_and_check_final('Mixed_6a', net): - return net, end_points - - # TODO(alemi): register intermediate endpoints - with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): - net = slim.repeat(net, 20, block17, scale=0.10, - activation_fn=activation_fn) - if add_and_check_final('PreAuxLogits', net): - return net, end_points - - if output_stride == 8: - # TODO(gpapan): Properly support output_stride for the rest of the net. - raise ValueError('output_stride==8 is only supported up to the ' - 'PreAuxlogits end_point for now.') - - # 8 x 8 x 2080 - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d(net, 3, stride=2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat( - [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - - if add_and_check_final('Mixed_7a', net): - return net, end_points - - # TODO(alemi): register intermediate endpoints - net = slim.repeat( - net, 9, block8, scale=0.20, activation_fn=activation_fn) - net = block8(net, activation_fn=None) - - # 8 x 8 x 1536 - net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') - if add_and_check_final('Conv2d_7b_1x1', net): - return net, end_points - - raise ValueError('final_endpoint (%s) not recognized', final_endpoint) + """Inception model from http://arxiv.org/abs/1602.07261. + + Constructs an Inception Resnet v2 network from inputs to the given final + endpoint. This method can construct the network up to the final inception + block Conv2d_7b_1x1. + + Args: + inputs: a tensor of size [batch_size, height, width, channels]. + final_endpoint: specifies the endpoint to construct the network up to. It + can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', + 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] + output_stride: A scalar that specifies the requested ratio of input to + output spatial resolution. Only supports 8 and 16. + align_feature_maps: When true, changes all the VALID paddings in the network + to SAME padding so that the feature maps are aligned. + scope: Optional variable_scope. + activation_fn: Activation function for block scopes. + + Returns: + tensor_out: output tensor corresponding to the final_endpoint. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + or if the output_stride is not 8 or 16, or if the output_stride is 8 and + we request an end point after 'PreAuxLogits'. + """ + if output_stride != 8 and output_stride != 16: + raise ValueError('output_stride must be 8 or 16.') + + padding = 'SAME' if align_feature_maps else 'VALID' + + end_points = {} + + def add_and_check_final(name, net): + end_points[name] = net + return name == final_endpoint + + with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # 149 x 149 x 32 + net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, + scope='Conv2d_1a_3x3') + if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points + + # 147 x 147 x 32 + net = slim.conv2d(net, 32, 3, padding=padding, + scope='Conv2d_2a_3x3') + if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points + # 147 x 147 x 64 + net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') + if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points + # 73 x 73 x 64 + net = slim.max_pool2d(net, 3, stride=2, padding=padding, + scope='MaxPool_3a_3x3') + if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points + # 73 x 73 x 80 + net = slim.conv2d(net, 80, 1, padding=padding, + scope='Conv2d_3b_1x1') + if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points + # 71 x 71 x 192 + net = slim.conv2d(net, 192, 3, padding=padding, + scope='Conv2d_4a_3x3') + if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points + # 35 x 35 x 192 + net = slim.max_pool2d(net, 3, stride=2, padding=padding, + scope='MaxPool_5a_3x3') + if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points + + # 35 x 35 x 320 + with tf.variable_scope('Mixed_5b'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, + scope='Conv2d_0b_5x5') + with tf.variable_scope('Branch_2'): + tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, + scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', + scope='AvgPool_0a_3x3') + tower_pool_1 = slim.conv2d(tower_pool, 64, 1, + scope='Conv2d_0b_1x1') + net = tf.concat( + [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) + + if add_and_check_final('Mixed_5b', net): return net, end_points + # TODO(alemi): Register intermediate endpoints + net = slim.repeat(net, 10, block35, scale=0.17, + activation_fn=activation_fn) + + # 17 x 17 x 1088 if output_stride == 8, + # 33 x 33 x 1088 if output_stride == 16 + use_atrous = output_stride == 8 + + with tf.variable_scope('Mixed_6a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, + padding=padding, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, + scope='Conv2d_0b_3x3') + tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, + stride=1 if use_atrous else 2, + padding=padding, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, + padding=padding, + scope='MaxPool_1a_3x3') + net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) + + if add_and_check_final('Mixed_6a', net): return net, end_points + + # TODO(alemi): register intermediate endpoints + with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): + net = slim.repeat(net, 20, block17, scale=0.10, + activation_fn=activation_fn) + if add_and_check_final('PreAuxLogits', net): return net, end_points + + if output_stride == 8: + # TODO(gpapan): Properly support output_stride for the rest of the net. + raise ValueError('output_stride==8 is only supported up to the ' + 'PreAuxlogits end_point for now.') + + # 8 x 8 x 2080 + with tf.variable_scope('Mixed_7a'): + with tf.variable_scope('Branch_0'): + tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, + padding=padding, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, + padding=padding, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') + tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, + scope='Conv2d_0b_3x3') + tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, + padding=padding, + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_3'): + tower_pool = slim.max_pool2d(net, 3, stride=2, + padding=padding, + scope='MaxPool_1a_3x3') + net = tf.concat( + [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) + + if add_and_check_final('Mixed_7a', net): return net, end_points + + # TODO(alemi): register intermediate endpoints + net = slim.repeat( + net, 9, block8, scale=0.20, activation_fn=activation_fn) + net = block8(net, activation_fn=None) + + # 8 x 8 x 1536 + net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') + if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points + + raise ValueError('final_endpoint (%s) not recognized', final_endpoint) def inception_resnet_v2(inputs, num_classes=1001, is_training=True, @@ -329,77 +317,75 @@ def inception_resnet_v2(inputs, num_classes=1001, is_training=True, scope='InceptionResnetV2', create_aux_logits=True, activation_fn=tf.nn.relu): - """Creates the Inception Resnet V2 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - Dimension batch_size may be undefined. If create_aux_logits is false, - also height and width may be undefined. - num_classes: number of predicted classes. If 0 or None, the logits layer - is omitted and the input features to the logits layer (before dropout) - are returned instead. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - create_aux_logits: Whether to include the auxilliary logits. - activation_fn: Activation function for conv2d. - - Returns: - net: the output of the logits layer (if num_classes is a non-zero integer), - or the non-dropped-out input to the logits layer (if num_classes is 0 or - None). - end_points: the set of end_points from the inception model. - """ - end_points = {} - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - - net, end_points = inception_resnet_v2_base(inputs, scope=scope, - activation_fn=activation_fn) - - if create_aux_logits and num_classes: - with tf.variable_scope('AuxLogits'): - aux = end_points['PreAuxLogits'] - aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', - scope='Conv2d_1a_3x3') - aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') - aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a_5x5') - aux = slim.flatten(aux) - aux = slim.fully_connected(aux, num_classes, activation_fn=None, - scope='Logits') - end_points['AuxLogits'] = aux - - with tf.variable_scope('Logits'): - # TODO(sguada,arnoegw): Consider adding a parameter global_pool which - # can be set to False to disable pooling here (as in resnet_*()). - kernel_size = net.get_shape()[1:3] - if kernel_size.is_fully_defined(): - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a_8x8') - else: - net = tf.reduce_mean( - net, [1, 2], keep_dims=True, name='global_pool') - end_points['global_pool'] = net - if not num_classes: - return net, end_points - net = slim.flatten(net) - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='Dropout') - end_points['PreLogitsFlatten'] = net - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') - - return logits, end_points - - + """Creates the Inception Resnet V2 model. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + Dimension batch_size may be undefined. If create_aux_logits is false, + also height and width may be undefined. + num_classes: number of predicted classes. If 0 or None, the logits layer + is omitted and the input features to the logits layer (before dropout) + are returned instead. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + create_aux_logits: Whether to include the auxilliary logits. + activation_fn: Activation function for conv2d. + + Returns: + net: the output of the logits layer (if num_classes is a non-zero integer), + or the non-dropped-out input to the logits layer (if num_classes is 0 or + None). + end_points: the set of end_points from the inception model. + """ + end_points = {} + + with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], + reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + + net, end_points = inception_resnet_v2_base(inputs, scope=scope, + activation_fn=activation_fn) + + if create_aux_logits and num_classes: + with tf.variable_scope('AuxLogits'): + aux = end_points['PreAuxLogits'] + aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', + scope='Conv2d_1a_3x3') + aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') + aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], + padding='VALID', scope='Conv2d_2a_5x5') + aux = slim.flatten(aux) + aux = slim.fully_connected(aux, num_classes, activation_fn=None, + scope='Logits') + end_points['AuxLogits'] = aux + + with tf.variable_scope('Logits'): + # TODO(sguada,arnoegw): Consider adding a parameter global_pool which + # can be set to False to disable pooling here (as in resnet_*()). + kernel_size = net.get_shape()[1:3] + if kernel_size.is_fully_defined(): + net = slim.avg_pool2d(net, kernel_size, padding='VALID', + scope='AvgPool_1a_8x8') + else: + net = tf.reduce_mean( + net, [1, 2], keep_dims=True, name='global_pool') + end_points['global_pool'] = net + if not num_classes: + return net, end_points + net = slim.flatten(net) + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, + scope='Dropout') + end_points['PreLogitsFlatten'] = net + logits = slim.fully_connected(net, num_classes, activation_fn=None, + scope='Logits') + end_points['Logits'] = logits + end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') + + return logits, end_points inception_resnet_v2.default_image_size = 299 @@ -407,29 +393,29 @@ def inception_resnet_v2_arg_scope(weight_decay=0.00004, batch_norm_decay=0.9997, batch_norm_epsilon=0.001, activation_fn=tf.nn.relu): - """Returns the scope with the default parameters for inception_resnet_v2. - - Args: - weight_decay: the weight decay for weights variables. - batch_norm_decay: decay for the moving average of batch_norm momentums. - batch_norm_epsilon: small float added to variance to avoid dividing by zero. - activation_fn: Activation function for conv2d. - - Returns: - a arg_scope with the parameters needed for inception_resnet_v2. - """ - # Set weight_decay for weights in conv2d and fully_connected layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_regularizer=slim.l2_regularizer(weight_decay)): - - batch_norm_params = { - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon, - 'fused': None, # Use fused batch norm if possible. - } - # Set activation_fn and parameters for batch_norm. - with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params) as scope: - return scope + """Returns the scope with the default parameters for inception_resnet_v2. + + Args: + weight_decay: the weight decay for weights variables. + batch_norm_decay: decay for the moving average of batch_norm momentums. + batch_norm_epsilon: small float added to variance to avoid dividing by zero. + activation_fn: Activation function for conv2d. + + Returns: + a arg_scope with the parameters needed for inception_resnet_v2. + """ + # Set weight_decay for weights in conv2d and fully_connected layers. + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay), + biases_regularizer=slim.l2_regularizer(weight_decay)): + + batch_norm_params = { + 'decay': batch_norm_decay, + 'epsilon': batch_norm_epsilon, + 'fused': None, # Use fused batch norm if possible. + } + # Set activation_fn and parameters for batch_norm. + with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params) as scope: + return scope diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/nets_factory.py b/models/image_recognition/tensorflow/inception_resnet_v2/nets_factory.py index 4f074ad5f..de8d0b39c 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/nets_factory.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/nets_factory.py @@ -48,53 +48,53 @@ networks_map = { 'inception_resnet_v2': inception_resnet_v2.inception_resnet_v2, -} + } arg_scopes_map = {'inception_resnet_v2': inception_resnet_v2.inception_resnet_v2_arg_scope, - } + } def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): - """Returns a network_fn such as `logits, end_points = network_fn(images)`. + """Returns a network_fn such as `logits, end_points = network_fn(images)`. - Args: - name: The name of the network. - num_classes: The number of classes to use for classification. If 0 or None, - the logits layer is omitted and its input features are returned instead. - weight_decay: The l2 coefficient for the model weights. - is_training: `True` if the model is being used for training and `False` - otherwise. + Args: + name: The name of the network. + num_classes: The number of classes to use for classification. If 0 or None, + the logits layer is omitted and its input features are returned instead. + weight_decay: The l2 coefficient for the model weights. + is_training: `True` if the model is being used for training and `False` + otherwise. - Returns: - network_fn: A function that applies the model to a batch of images. It has - the following signature: - net, end_points = network_fn(images) - The `images` input is a tensor of shape [batch_size, height, width, 3] - with height = width = network_fn.default_image_size. (The permissibility - and treatment of other sizes depends on the network_fn.) - The returned `end_points` are a dictionary of intermediate activations. - The returned `net` is the topmost layer, depending on `num_classes`: - If `num_classes` was a non-zero integer, `net` is a logits tensor - of shape [batch_size, num_classes]. - If `num_classes` was 0 or `None`, `net` is a tensor with the input - to the logits layer of shape [batch_size, 1, 1, num_features] or - [batch_size, num_features]. Dropout has not been applied to this - (even if the network's original classification does); it remains for - the caller to do this or not. + Returns: + network_fn: A function that applies the model to a batch of images. It has + the following signature: + net, end_points = network_fn(images) + The `images` input is a tensor of shape [batch_size, height, width, 3] + with height = width = network_fn.default_image_size. (The permissibility + and treatment of other sizes depends on the network_fn.) + The returned `end_points` are a dictionary of intermediate activations. + The returned `net` is the topmost layer, depending on `num_classes`: + If `num_classes` was a non-zero integer, `net` is a logits tensor + of shape [batch_size, num_classes]. + If `num_classes` was 0 or `None`, `net` is a tensor with the input + to the logits layer of shape [batch_size, 1, 1, num_features] or + [batch_size, num_features]. Dropout has not been applied to this + (even if the network's original classification does); it remains for + the caller to do this or not. - Raises: - ValueError: If network `name` is not recognized. - """ - if name not in networks_map: - raise ValueError('Name of network unknown %s' % name) - func = networks_map[name] - @functools.wraps(func) - def network_fn(images, **kwargs): - arg_scope = arg_scopes_map[name](weight_decay=weight_decay) - with slim.arg_scope(arg_scope): - return func(images, num_classes, is_training=is_training, **kwargs) - if hasattr(func, 'default_image_size'): - network_fn.default_image_size = func.default_image_size + Raises: + ValueError: If network `name` is not recognized. + """ + if name not in networks_map: + raise ValueError('Name of network unknown %s' % name) + func = networks_map[name] + @functools.wraps(func) + def network_fn(images, **kwargs): + arg_scope = arg_scopes_map[name](weight_decay=weight_decay) + with slim.arg_scope(arg_scope): + return func(images, num_classes, is_training=is_training, **kwargs) + if hasattr(func, 'default_image_size'): + network_fn.default_image_size = func.default_image_size - return network_fn + return network_fn diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing.py b/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing.py index 2cb5c46ee..08e22dc61 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing.py @@ -51,263 +51,263 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def get_image_resize_method(resize_method, batch_position=0): - """Get tensorflow resize method. + """Get tensorflow resize method. - If resize_method is 'round_robin', return different methods based on batch - position in a round-robin fashion. NOTE: If the batch size is not a multiple - of the number of methods, then the distribution of methods will not be - uniform. + If resize_method is 'round_robin', return different methods based on batch + position in a round-robin fashion. NOTE: If the batch size is not a multiple + of the number of methods, then the distribution of methods will not be + uniform. - Args: - resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. - batch_position: position of the image in a batch. NOTE: this argument can - be an integer or a tensor - Returns: - one of resize type defined in tf.image.ResizeMethod. - """ - resize_methods_map = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - } + Args: + resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. + batch_position: position of the image in a batch. NOTE: this argument can + be an integer or a tensor + Returns: + one of resize type defined in tf.image.ResizeMethod. + """ + resize_methods_map = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + } - if resize_method != 'round_robin': - return resize_methods_map[resize_method] + if resize_method != 'round_robin': + return resize_methods_map[resize_method] - # return a resize method based on batch position in a round-robin fashion. - resize_methods = resize_methods_map.values() + # return a resize method based on batch position in a round-robin fashion. + resize_methods = resize_methods_map.values() - def lookup(index): - return resize_methods[index] + def lookup(index): + return resize_methods[index] - def resize_method_0(): - return utils.smart_cond(batch_position % len(resize_methods) == 0, - lambda: lookup(0), resize_method_1) + def resize_method_0(): + return utils.smart_cond(batch_position % len(resize_methods) == 0, + lambda: lookup(0), resize_method_1) - def resize_method_1(): - return utils.smart_cond(batch_position % len(resize_methods) == 1, - lambda: lookup(1), resize_method_2) + def resize_method_1(): + return utils.smart_cond(batch_position % len(resize_methods) == 1, + lambda: lookup(1), resize_method_2) - def resize_method_2(): - return utils.smart_cond(batch_position % len(resize_methods) == 2, - lambda: lookup(2), lambda: lookup(3)) + def resize_method_2(): + return utils.smart_cond(batch_position % len(resize_methods) == 2, + lambda: lookup(2), lambda: lookup(3)) - # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here - # because TF would not be able to construct a finite graph. + # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here + # because TF would not be able to construct a finite graph. - return resize_method_0() + return resize_method_0() def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3) # , - # fancy_upscaling=False, - # dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3) # , + # fancy_upscaling=False, + # dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + return image def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): - """Prepare one image for evaluation. - - If height and width are specified it would output an image with that size by - applying resize_bilinear. - - If central_fraction is specified it would crop the central fraction of the - input image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - central_fraction: Optional Float, fraction of the image to crop. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would crop the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image def apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([ - func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) - for case in range(num_cases)])[0] + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] def distort_color(image, color_ordering=0, fast_mode=True, scope=None): - """Distort the color of a Tensor image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: 3-D Tensor containing single image in [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0-3). - fast_mode: Avoids slower ops (random_hue and random_contrast) - scope: Optional scope for name_scope. - Returns: - 3-D Tensor color-distorted image on range [0, 1] - Raises: - ValueError: if color_ordering not in [0, 3] - """ - with tf.name_scope(scope, 'distort_color', [image]): - if fast_mode: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - elif color_ordering == 2: - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - elif color_ordering == 3: - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - raise ValueError('color_ordering must be in [0, 3]') - - # The random_* ops do not necessarily clamp. - return tf.clip_by_value(image, 0.0, 1.0) + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) def distorted_bounding_box_crop(image, @@ -317,54 +317,54 @@ def distorted_bounding_box_crop(image, area_range=(0.05, 1.0), max_attempts=100, scope=None): - """Generates cropped_image using a one of the bboxes randomly distorted. - - See `tf.image.sample_distorted_bounding_box` for more documentation. - - Args: - image: 3-D Tensor of image (it will be converted to floats in [0, 1]). - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole - image. - min_object_covered: An optional `float`. Defaults to `0.1`. The cropped - area of the image must contain at least this fraction of any bounding box - supplied. - aspect_ratio_range: An optional list of `floats`. The cropped area of the - image must have an aspect ratio = width / height within this range. - area_range: An optional list of `floats`. The cropped area of the image - must contain a fraction of the supplied image within in this range. - max_attempts: An optional `int`. Number of attempts at generating a cropped - region of the image of the specified constraints. After `max_attempts` - failures, return the entire image. - scope: Optional scope for name_scope. - Returns: - A tuple, a 3-D Tensor cropped_image and the distorted bbox - """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - cropped_image = tf.slice(image, bbox_begin, bbox_size) - return cropped_image, distort_bbox + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox def preprocess_for_train(image, height, width, bbox, @@ -372,258 +372,258 @@ def preprocess_for_train(image, height, width, bbox, fast_mode=True, scope=None, add_image_summaries=True): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - batch_position: position of the image in a batch, which affects how images - are distorted and resized. NOTE: this argument can be an integer or a - tensor - scope: Optional scope for op_scope. - add_image_summaries: Enable image summaries. - Returns: - 3-D float Tensor of distorted image used for training with range [-1, 1]. - """ - - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - if bbox is None: - bbox = tf.constant([0.0, 0.0, 1.0, 1.0], - dtype=tf.float32, - shape=[1, 1, 4]) - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) - - distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([None, None, 3]) - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distorted_bbox) - if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - - # We select only 1 case for fast_mode bilinear. - num_resize_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method), - num_cases=num_resize_cases) - - if add_image_summaries: - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - # Randomly distort the colors. There are 1 or 4 ways to do it. - num_distort_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, ordering: distort_color(x, ordering, fast_mode), - num_cases=num_distort_cases) - - if add_image_summaries: - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) - distorted_image = tf.subtract(distorted_image, 0.5) - distorted_image = tf.multiply(distorted_image, 2.0) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + batch_position: position of the image in a batch, which affects how images + are distorted and resized. NOTE: this argument can be an integer or a + tensor + scope: Optional scope for op_scope. + add_image_summaries: Enable image summaries. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + + with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + if add_image_summaries: + tf.summary.image('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + if add_image_summaries: + tf.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize_images(x, [height, width], method), + num_cases=num_resize_cases) + + if add_image_summaries: + tf.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + # Randomly distort the colors. There are 1 or 4 ways to do it. + num_distort_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=num_distort_cases) + + if add_image_summaries: + tf.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + distorted_image = tf.subtract(distorted_image, 0.5) + distorted_image = tf.multiply(distorted_image, 2.0) + return distorted_image def distort_color(image, batch_position=0, distort_color_in_yiq=False, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops based on the position of the image in a batch. - - Args: - image: float32 Tensor containing single image. Tensor values should be in - range [0, 1]. - batch_position: the position of the image in a batch. NOTE: this argument - can be an integer or a tensor - distort_color_in_yiq: distort color of input images in YIQ space. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - with tf.name_scope(scope or 'distort_color'): - def distort_fn_0(image=image): - """Variant 0 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - # if distort_color_in_yiq: - # image = distort_image_ops.random_hsv_in_yiq( - # image, lower_saturation=0.5, upper_saturation=1.5, - # max_delta_hue=0.2 * math.pi) - # else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - return image - - def distort_fn_1(image=image): - """Variant 1 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - # if distort_color_in_yiq: - # image = distort_image_ops.random_hsv_in_yiq( - # image, lower_saturation=0.5, upper_saturation=1.5, - # max_delta_hue=0.2 * math.pi) - # else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - return image - - image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, - distort_fn_1) - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops based on the position of the image in a batch. + + Args: + image: float32 Tensor containing single image. Tensor values should be in + range [0, 1]. + batch_position: the position of the image in a batch. NOTE: this argument + can be an integer or a tensor + distort_color_in_yiq: distort color of input images in YIQ space. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + with tf.name_scope(scope or 'distort_color'): + def distort_fn_0(image=image): + """Variant 0 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + # if distort_color_in_yiq: + # image = distort_image_ops.random_hsv_in_yiq( + # image, lower_saturation=0.5, upper_saturation=1.5, + # max_delta_hue=0.2 * math.pi) + # else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + return image + + def distort_fn_1(image=image): + """Variant 1 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + # if distort_color_in_yiq: + # image = distort_image_ops.random_hsv_in_yiq( + # image, lower_saturation=0.5, upper_saturation=1.5, + # max_delta_hue=0.2 * math.pi) + # else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + return image + + image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, + distort_fn_1) + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_splits, - dtype, - train, - distortions=False, - resize_method="bilinear", - shift_ratio=0, - summary_verbosity=1, - distort_color_in_yiq=False, - fuse_decode_and_crop=False): - self.height = height - self.width = width - self.batch_size = batch_size - self.num_splits = num_splits - self.dtype = dtype - self.train = train - self.resize_method = resize_method - self.shift_ratio = shift_ratio - self.distortions = distortions - self.distort_color_in_yiq = distort_color_in_yiq - self.fuse_decode_and_crop = fuse_decode_and_crop - if self.batch_size % self.num_splits != 0: - raise ValueError( - ('batch_size must be a multiple of num_splits: ' - 'batch_size %d, num_splits: %d') % - (self.batch_size, self.num_splits)) - self.batch_size_per_split = self.batch_size // self.num_splits - self.summary_verbosity = summary_verbosity - - def image_preprocess(self, image_buffer, bbox, batch_position): - """Preprocessing image_buffer as a function of its batch position.""" - if self.train: - image_buffer = tf.image.decode_jpeg( - image_buffer, channels=3, dct_method='INTEGER_FAST') - image = preprocess_for_train(image_buffer, self.height, self.width, bbox, - batch_position) - else: - image = tf.image.decode_jpeg( - image_buffer, channels=3, dct_method='INTEGER_FAST') - image = preprocess_for_eval(image, self.height, self.width) - return image - - def parse_and_preprocess(self, value, batch_position): - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.image_preprocess(image_buffer, bbox, batch_position) - return (label_index, image) - - def minibatch(self, dataset, subset, use_datasets, cache_data, - shift_ratio=-1): - if shift_ratio < 0: - shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): - # Build final results per split. - images = [[] for _ in range(self.num_splits)] - labels = [[] for _ in range(self.num_splits)] - if use_datasets: - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - ds = ds.apply( - interleave_ops.parallel_interleave( - tf.data.TFRecordDataset, cycle_length=10)) - if cache_data: - ds = ds.take(1).cache().repeat() - counter = tf.data.Dataset.range(self.batch_size) - counter = counter.repeat() - ds = tf.data.Dataset.zip((ds, counter)) - ds = ds.prefetch(buffer_size=self.batch_size) - ds = ds.shuffle(buffer_size=10000) - ds = ds.repeat() - ds = ds.apply( - batching.map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size_per_split, - num_parallel_batches=self.num_splits)) - ds = ds.prefetch(buffer_size=self.num_splits) - ds_iterator = ds.make_one_shot_iterator() - for d in xrange(self.num_splits): - labels[d], images[d] = ds_iterator.get_next() - - else: - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=301, - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - shift_ratio=shift_ratio, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for idx in xrange(self.batch_size): - value = records[idx] - (label, image) = self.parse_and_preprocess(value, idx) - split_index = idx % self.num_splits - labels[split_index].append(label) - images[split_index].append(image) - - for split_index in xrange(self.num_splits): - if not use_datasets: - images[split_index] = tf.parallel_stack(images[split_index]) - labels[split_index] = tf.concat(labels[split_index], 0) - images[split_index] = tf.cast(images[split_index], self.dtype) - depth = 3 - images[split_index] = tf.reshape( - images[split_index], - shape=[self.batch_size_per_split, self.height, self.width, depth]) - labels[split_index] = tf.reshape(labels[split_index], - [self.batch_size_per_split]) - return images, labels + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_splits, + dtype, + train, + distortions=False, + resize_method="bilinear", + shift_ratio=0, + summary_verbosity=1, + distort_color_in_yiq=False, + fuse_decode_and_crop=False): + self.height = height + self.width = width + self.batch_size = batch_size + self.num_splits = num_splits + self.dtype = dtype + self.train = train + self.resize_method = resize_method + self.shift_ratio = shift_ratio + self.distortions = distortions + self.distort_color_in_yiq = distort_color_in_yiq + self.fuse_decode_and_crop = fuse_decode_and_crop + if self.batch_size % self.num_splits != 0: + raise ValueError( + ('batch_size must be a multiple of num_splits: ' + 'batch_size %d, num_splits: %d') % + (self.batch_size, self.num_splits)) + self.batch_size_per_split = self.batch_size // self.num_splits + self.summary_verbosity = summary_verbosity + + def image_preprocess(self, image_buffer, bbox, batch_position): + """Preprocessing image_buffer as a function of its batch position.""" + if self.train: + image_buffer = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + image = preprocess_for_train(image_buffer, self.height, self.width, bbox, + batch_position) + else: + image = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + image = preprocess_for_eval(image, self.height, self.width) + return image + + def parse_and_preprocess(self, value, batch_position): + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.image_preprocess(image_buffer, bbox, batch_position) + return (label_index, image) + + def minibatch(self, dataset, subset, use_datasets, cache_data, + shift_ratio=-1): + if shift_ratio < 0: + shift_ratio = self.shift_ratio + with tf.name_scope('batch_processing'): + # Build final results per split. + images = [[] for _ in range(self.num_splits)] + labels = [[] for _ in range(self.num_splits)] + if use_datasets: + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + ds = ds.apply( + interleave_ops.parallel_interleave( + tf.data.TFRecordDataset, cycle_length=10)) + if cache_data: + ds = ds.take(1).cache().repeat() + counter = tf.data.Dataset.range(self.batch_size) + counter = counter.repeat() + ds = tf.data.Dataset.zip((ds, counter)) + ds = ds.prefetch(buffer_size=self.batch_size) + ds = ds.shuffle(buffer_size=10000) + ds = ds.repeat() + ds = ds.apply( + batching.map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size_per_split, + num_parallel_batches=self.num_splits)) + ds = ds.prefetch(buffer_size=self.num_splits) + ds_iterator = ds.make_one_shot_iterator() + for d in xrange(self.num_splits): + labels[d], images[d] = ds_iterator.get_next() + + else: + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=301, + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + shift_ratio=shift_ratio, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for idx in xrange(self.batch_size): + value = records[idx] + (label, image) = self.parse_and_preprocess(value, idx) + split_index = idx % self.num_splits + labels[split_index].append(label) + images[split_index].append(image) + + for split_index in xrange(self.num_splits): + if not use_datasets: + images[split_index] = tf.parallel_stack(images[split_index]) + labels[split_index] = tf.concat(labels[split_index], 0) + images[split_index] = tf.cast(images[split_index], self.dtype) + depth = 3 + images[split_index] = tf.reshape( + images[split_index], + shape=[self.batch_size_per_split, self.height, self.width, depth]) + labels[split_index] = tf.reshape(labels[split_index], + [self.batch_size_per_split]) + return images, labels diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing_factory.py b/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing_factory.py index a31785fe4..a28ce6397 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing_factory.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/preprocessing_factory.py @@ -47,30 +47,30 @@ def get_preprocessing(name, is_training=False): - """Returns preprocessing_fn(image, height, width, **kwargs). + """Returns preprocessing_fn(image, height, width, **kwargs). - Args: - name: The name of the preprocessing function. - is_training: `True` if the model is being used for training and `False` - otherwise. + Args: + name: The name of the preprocessing function. + is_training: `True` if the model is being used for training and `False` + otherwise. - Returns: - preprocessing_fn: A function that preprocessing a single image (pre-batch). - It has the following signature: - image = preprocessing_fn(image, output_height, output_width, ...). + Returns: + preprocessing_fn: A function that preprocessing a single image (pre-batch). + It has the following signature: + image = preprocessing_fn(image, output_height, output_width, ...). - Raises: - ValueError: If Preprocessing `name` is not recognized. - """ - preprocessing_fn_map = { - 'inception_resnet_v2': inception_preprocessing, - } + Raises: + ValueError: If Preprocessing `name` is not recognized. + """ + preprocessing_fn_map = { + 'inception_resnet_v2': inception_preprocessing, + } - if name not in preprocessing_fn_map: - raise ValueError('Preprocessing name [%s] was not recognized' % name) + if name not in preprocessing_fn_map: + raise ValueError('Preprocessing name [%s] was not recognized' % name) - def preprocessing_fn(image, output_height, output_width, **kwargs): - return preprocessing_fn_map[name].preprocess_image( - image, output_height, output_width, is_training=is_training, **kwargs) + def preprocessing_fn(image, output_height, output_width, **kwargs): + return preprocessing_fn_map[name].preprocess_image( + image, output_height, output_width, is_training=is_training, **kwargs) - return preprocessing_fn + return preprocessing_fn diff --git a/models/image_recognition/tensorflow/inceptionv3/fp32/datasets.py b/models/image_recognition/tensorflow/inceptionv3/fp32/datasets.py index 354221adf..1039fa8ee 100644 --- a/models/image_recognition/tensorflow/inceptionv3/fp32/datasets.py +++ b/models/image_recognition/tensorflow/inceptionv3/fp32/datasets.py @@ -43,65 +43,65 @@ IMAGENET_NUM_TRAIN_IMAGES = 1281167 IMAGENET_NUM_VAL_IMAGES = 50000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) + def get_image_preprocessor(self): + return preprocessing.RecordInputImagePreprocessor - def get_image_preprocessor(self): - return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py index a5fd0cc6c..65e0423f7 100644 --- a/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py @@ -22,210 +22,206 @@ from argparse import ArgumentParser import tensorflow as tf - -try: - import tensorflow.tools.graph_transforms as graph_transforms -except: - import tensorflow_core.tools.graph_transforms as graph_transforms +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes import datasets INPUTS = 'input' OUTPUTS = 'predict' -OPTIMIZATION = 'strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' INCEPTION_V3_IMAGE_SIZE = 299 class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph""" - - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--num-inter-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--num-intra-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - - arg_parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - arg_parser.add_argument("--steps", type=int, default=50, - help="number of steps") - - arg_parser.add_argument( - '--data-num-inter-threads', dest='data_num_inter_threads', - help='number threads across operators', - type=int, default=16) - arg_parser.add_argument( - '--data-num-intra-threads', dest='data_num_intra_threads', - help='number threads for data layer operator', - type=int, default=14) - arg_parser.add_argument( - '--num-cores', dest='num_cores', - help='number of cores', - type=int, default=28) - - self.args = arg_parser.parse_args() - - # validate the arguments specific for InceptionV3 - self.validate_args() - - def run(self): - """run benchmark with optimized graph""" - - print("Run inference") - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads - data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = self.args.num_intra_threads - infer_config.inter_op_parallelism_threads = self.args.num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if (self.args.data_location): - print("Inference with real data.") - dataset = datasets.ImagenetData(self.args.data_location) - preprocessor = dataset.get_image_preprocessor()( - INCEPTION_V3_IMAGE_SIZE, INCEPTION_V3_IMAGE_SIZE, self.args.batch_size, - num_cores=self.args.num_cores, - resize_method='bilinear') - images, labels = preprocessor.minibatch(dataset, subset='validation') - else: - print("Inference with dummy data.") - input_shape = [self.args.batch_size, INCEPTION_V3_IMAGE_SIZE, INCEPTION_V3_IMAGE_SIZE, 3] - images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: - input_graph_content = input_file.read() - graph_def.ParseFromString(input_graph_content) - - output_graph = graph_transforms.TransformGraph(graph_def, - [INPUTS], [OUTPUTS], [OPTIMIZATION]) - tf.import_graph_def(output_graph, name='') - - # Definite input and output Tensors for detection_graph - input_tensor = infer_graph.get_tensor_by_name('input:0') - output_tensor = infer_graph.get_tensor_by_name('predict:0') - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - num_processed_images = 0 - num_remaining_images = datasets.IMAGENET_NUM_VAL_IMAGES - - if (not self.args.accuracy_only): - iteration = 0 - warm_up_iteration = self.args.warmup_steps - total_run = self.args.steps - total_time = 0 - - while num_remaining_images >= self.args.batch_size and iteration < total_run: - iteration += 1 - - data_load_start = time.time() - image_np = data_sess.run(images) - data_load_time = time.time() - data_load_start - - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - infer_sess.run([output_tensor], feed_dict={input_tensor: image_np}) - time_consume = time.time() - start_time - - # only add data loading time for real data, not for dummy data - if self.args.data_location: - time_consume += data_load_time - - print('Iteration %d: %.6f sec' % (iteration, time_consume)) - if iteration > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (iteration - warm_up_iteration) - print('Average time: %.6f sec' % (time_average)) - - print('Batch size = %d' % self.args.batch_size) - if (self.args.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - - print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) - - else: # accuracy check - total_accuracy1, total_accuracy5 = (0.0, 0.0) - - while num_remaining_images >= self.args.batch_size: - # Reads and preprocess data - np_images, np_labels = data_sess.run([images, labels]) - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - # Compute inference on the preprocessed data - predictions = infer_sess.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - - with tf.Graph().as_default() as accu_graph: - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - with tf.Session() as accu_sess: - np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) - - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - - def validate_args(self): - """validate the arguments""" + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + arg_parser.add_argument( + '--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=16) + arg_parser.add_argument( + '--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=14) + arg_parser.add_argument( + '--num-cores', dest='num_cores', + help='number of cores', + type=int, default=28) + + self.args = arg_parser.parse_args() + + # validate the arguments specific for InceptionV3 + self.validate_args() + + def run(self): + """run benchmark with optimized graph""" + + print("Run inference") + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if (self.args.data_location): + print("Inference with real data.") + dataset = datasets.ImagenetData(self.args.data_location) + preprocessor = dataset.get_image_preprocessor()( + INCEPTION_V3_IMAGE_SIZE, INCEPTION_V3_IMAGE_SIZE, self.args.batch_size, + num_cores=self.args.num_cores, + resize_method='bilinear') + images, labels = preprocessor.minibatch(dataset, subset='validation') + else: + print("Inference with dummy data.") + input_shape = [self.args.batch_size, INCEPTION_V3_IMAGE_SIZE, INCEPTION_V3_IMAGE_SIZE, 3] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + output_graph = optimize_for_inference(graph_def, [INPUTS], + [OUTPUTS], dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name='') + + # Definite input and output Tensors for detection_graph + input_tensor = infer_graph.get_tensor_by_name('input:0') + output_tensor = infer_graph.get_tensor_by_name('predict:0') + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + num_processed_images = 0 + num_remaining_images = datasets.IMAGENET_NUM_VAL_IMAGES + + if (not self.args.accuracy_only): + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + total_time = 0 + + while num_remaining_images >= self.args.batch_size and iteration < total_run: + iteration += 1 + + data_load_start = time.time() + image_np = data_sess.run(images) + data_load_time = time.time() - data_load_start + + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + infer_sess.run([output_tensor], feed_dict={input_tensor: image_np}) + time_consume = time.time() - start_time + + # only add data loading time for real data, not for dummy data + if self.args.data_location: + time_consume += data_load_time + + print('Iteration %d: %.6f sec' % (iteration, time_consume)) + if iteration > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (iteration - warm_up_iteration) + print('Average time: %.6f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + + else: # accuracy check + total_accuracy1, total_accuracy5 = (0.0, 0.0) + + while num_remaining_images >= self.args.batch_size: + # Reads and preprocess data + np_images, np_labels = data_sess.run([images, labels]) + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + # Compute inference on the preprocessed data + predictions = infer_sess.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + + with tf.Graph().as_default() as accu_graph: + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + with tf.compat.v1.Session() as accu_sess: + np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) + + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) + + def validate_args(self): + """validate the arguments""" - if not self.args.data_location: - if self.args.accuracy_only: - raise ValueError("You must use real data for accuracy measurement.") + if not self.args.data_location: + if self.args.accuracy_only: + raise ValueError("You must use real data for accuracy measurement.") if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py b/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py index 31cb9be3a..baacf3820 100644 --- a/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py +++ b/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py @@ -44,129 +44,129 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1) - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1) + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - return distorted_image - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + return distorted_image + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method="bilinear"): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - - return (image, label_index) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned - - ds_iterator = ds.make_one_shot_iterator() - images, labels = ds_iterator.get_next() - # reshape - labels = tf.reshape(labels, [self.batch_size]) - - return images, labels + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method="bilinear"): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2 * self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, labels = ds_iterator.get_next() + # reshape + labels = tf.reshape(labels, [self.batch_size]) + + return images, labels diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py b/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py index 1be67fe7a..c22314c7d 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py @@ -30,114 +30,117 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=299, - type=int, help="input height") - parser.add_argument("--input_width", default=299, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 299 - if args.input_width: - input_width = args.input_width - else: - input_width = 299 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=299, + type=int, help="input height") + parser.add_argument("--input_width", default=299, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 299 + if args.input_width: + input_width = args.input_width + else: + input_width = 299 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + + data_graph = tf.Graph() + with data_graph.as_default(): dataset = datasets.ImagenetData(data_location) preprocessor = dataset.get_image_preprocessor()( input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference resize_method='bilinear') images, labels = preprocessor.minibatch(dataset, subset='validation', - use_datasets=True, cache_data=False) - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels = sess.run([images[0], labels[0]]) - num_processed_images += batch_size - num_remaining_images -= batch_size - start_time = time.time() - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + use_datasets=True, cache_data=False) + + infer_graph = load_graph(model_file) + + input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") + output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + with tf.compat.v1.Session(graph=data_graph) as sess: + sess_graph = tf.compat.v1.Session(graph=infer_graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + start_time = time.time() + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/benchmark.py b/models/image_recognition/tensorflow/inceptionv3/int8/benchmark.py index 4eb39b4e1..bce8bf39f 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/benchmark.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/benchmark.py @@ -44,150 +44,151 @@ import tensorflow as tf if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=299, - type=int, help="input height") - parser.add_argument("--input_width", default=299, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--data_location", default=None, - help="dataset location") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument("--num_cores", default=28, - type=int, help="number of physical cores") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument( - '--data_num_inter_threads', - help='number threads across data layer operators', - type=int, default=16) - parser.add_argument( - '--data_num_intra_threads', - help='number threads for an data layer operator', - type=int, default=14) - parser.add_argument("--warmup_steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=299, + type=int, help="input height") + parser.add_argument("--input_width", default=299, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--data_location", default=None, + help="dataset location") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument("--num_cores", default=28, + type=int, help="number of physical cores") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument( + '--data_num_inter_threads', + help='number threads across data layer operators', + type=int, default=16) + parser.add_argument( + '--data_num_intra_threads', + help='number threads for an data layer operator', + type=int, default=14) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 299 + if args.input_width: + input_width = args.input_width + else: + input_width = 299 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = args.data_num_intra_threads + data_config.inter_op_parallelism_threads = args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = num_intra_threads + infer_config.inter_op_parallelism_threads = num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if args.data_location: + print("inference with real data") + # get the images from dataset + dataset = datasets.ImagenetData(args.data_location) + preprocessor = dataset.get_image_preprocessor(benchmark=True)( + input_height, input_width, batch_size, + num_cores=args.num_cores, + resize_method='bilinear') + images = preprocessor.minibatch(dataset, subset='validation') else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 299 - if args.input_width: - input_width = args.input_width - else: - input_width = 299 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = args.data_num_intra_threads - data_config.inter_op_parallelism_threads = args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = num_intra_threads - infer_config.inter_op_parallelism_threads = num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if args.data_location: - print("inference with real data") - # get the images from dataset - dataset = datasets.ImagenetData(args.data_location) - preprocessor = dataset.get_image_preprocessor(benchmark=True)( - input_height, input_width, batch_size, - num_cores=args.num_cores, - resize_method='bilinear') - images = preprocessor.minibatch(dataset, subset='validation') - else: - # synthetic images - print("inference with dummy data") - input_shape = [batch_size, input_height, input_width, 3] - images = tf.random.uniform( - input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with open(model_file, "rb") as f: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name='') - - input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") - output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") - tf.global_variables_initializer() - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - print("[Running warmup steps...]") - for t in range(warmup_steps): - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time)) - - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - - for t in range(steps): - try: - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - total_time += elapsed_time - total_images += batch_size - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time)) - except tf.errors.OutOfRangeError: - print("Running out of images from dataset.") - break - - print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) + # synthetic images + print("inference with dummy data") + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.uniform( + input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") + output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") + tf.compat.v1.global_variables_initializer() + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + print("[Running warmup steps...]") + for t in range(warmup_steps): + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, batch_size / elapsed_time)) + + print("[Running benchmark steps...]") + total_time = 0 + total_images = 0 + + for t in range(steps): + try: + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + total_time += elapsed_time + total_images += batch_size + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, batch_size / elapsed_time)) + except tf.errors.OutOfRangeError: + print("Running out of images from dataset.") + break + + print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) + diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/calibration.py b/models/image_recognition/tensorflow/inceptionv3/int8/calibration.py index 7edd39f7c..ad7fdee40 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/calibration.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/calibration.py @@ -30,111 +30,109 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] - - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - return graph - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=299, - type=int, help="input height") - parser.add_argument("--input_width", default=299, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() + import os + file_ext = os.path.splitext(model_file)[1] - if args.input_graph: - model_file = args.input_graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = dataset.get_image_preprocessor()( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='bilinear') - - images, labels = preprocessor.minibatch(dataset, subset='train', - use_datasets=True, cache_data=False) - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads + return graph - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ - - num_processed_images - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - while (num_remaining_images >= batch_size) and (num_processed_images < 10000): - # Reads and preprocess data - np_images, np_labels = sess.run([images[0], labels[0]]) - num_processed_images += batch_size - num_remaining_images -= batch_size - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=299, + type=int, help="input height") + parser.add_argument("--input_width", default=299, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + + images, labels = preprocessor.minibatch(dataset, subset='train', + use_datasets=True, cache_data=False) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ + - num_processed_images + with tf.compat.v1.Session() as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + while (num_remaining_images >= batch_size) and (num_processed_images < 10000): + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/cnn_util.py b/models/image_recognition/tensorflow/inceptionv3/int8/cnn_util.py index 1b60e7175..e74df9a41 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/cnn_util.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/cnn_util.py @@ -38,11 +38,11 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/datasets.py b/models/image_recognition/tensorflow/inceptionv3/int8/datasets.py index d60fa7383..b06e196ce 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/datasets.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/datasets.py @@ -41,72 +41,71 @@ IMAGENET_NUM_TRAIN_IMAGES = 1281167 IMAGENET_NUM_VAL_IMAGES = 50000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" - - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - elif subset == 'calibrate' or subset == 'calibration': - return 100 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self, benchmark=False): - if benchmark: - import preprocessing_benchmark - return preprocessing_benchmark.RecordInputImagePreprocessor - else: - import preprocessing - return preprocessing.RecordInputImagePreprocessor + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self, benchmark=False): + if benchmark: + import preprocessing_benchmark + return preprocessing_benchmark.RecordInputImagePreprocessor + else: + import preprocessing + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing.py b/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing.py index 454903f9d..6db0001ba 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing.py @@ -41,9 +41,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.image.python.ops import distort_image_ops from tensorflow.python.layers import utils from tensorflow.python.ops import data_flow_ops from tensorflow.python.platform import gfile @@ -54,11 +51,11 @@ def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. - + The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: - + image/height: 462 image/width: 581 image/colorspace: 'RGB' @@ -74,11 +71,11 @@ def parse_example_proto(example_serialized): image/format: 'JPEG' image/filename: 'ILSVRC2012_val_00041207.JPEG' image/encoded: - + Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. - + Returns: image_buffer: Tensor tf.string containing the contents of a JPEG file. label: Tensor tf.int32 containing the label. @@ -89,14 +86,14 @@ def parse_example_proto(example_serialized): """ # Dense features in Example proto. feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update( {k: sparse_float32 for k in ['image/object/bbox/xmin', @@ -104,7 +101,7 @@ def parse_example_proto(example_serialized): 'image/object/bbox/xmax', 'image/object/bbox/ymax']}) - features = tf.parse_single_example(example_serialized, feature_map) + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) @@ -118,19 +115,19 @@ def parse_example_proto(example_serialized): # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) return features['image/encoded'], label, bbox, features['image/class/text'] def get_image_resize_method(resize_method, batch_position=0): """Get tensorflow resize method. - + If resize_method is 'round_robin', return different methods based on batch position in a round-robin fashion. NOTE: If the batch size is not a multiple of the number of methods, then the distribution of methods will not be uniform. - + Args: resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. batch_position: position of the image in a batch. NOTE: this argument can @@ -174,7 +171,7 @@ def resize_method_2(): def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """Decode a JPEG string into one 3-D float image Tensor. - + Args: image_buffer: scalar string Tensor. scope: Optional scope for op_scope. @@ -183,7 +180,7 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """ # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height @@ -201,13 +198,13 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): """Prepare one image for evaluation. - + If height and width are specified it would output an image with that size by applying resize_bilinear. - + If central_fraction is specified it would crop the central fraction of the input image. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -220,7 +217,7 @@ def preprocess_for_eval(image, height, width, Returns: 3-D float Tensor of prepared image. """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): + with tf.compat.v1.name_scope(scope, 'eval_image', [image, height, width]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of @@ -232,8 +229,8 @@ def preprocess_for_eval(image, height, width, if height and width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) @@ -242,17 +239,17 @@ def preprocess_for_eval(image, height, width, def apply_with_random_selector(x, func, num_cases): """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - + Args: x: input Tensor. func: Python function to apply. num_cases: Python int32, number of cases to sample sel from. - + Returns: The result of func(x, sel), where func receives the value of the selector as a python integer, but sel is sampled dynamically. """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) # Pass the real x only to one of the func calls. return control_flow_ops.merge([ func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) @@ -261,12 +258,12 @@ def apply_with_random_selector(x, func, num_cases): def distort_color(image, color_ordering=0, fast_mode=True, scope=None): """Distort the color of a Tensor image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops for each preprocessing thread. - + Args: image: 3-D Tensor containing single image in [0, 1]. color_ordering: Python int, a type of distortion (valid values: 0-3). @@ -277,7 +274,7 @@ def distort_color(image, color_ordering=0, fast_mode=True, scope=None): Raises: ValueError: if color_ordering not in [0, 3] """ - with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope, 'distort_color', [image]): if fast_mode: if color_ordering == 0: image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -321,9 +318,9 @@ def distorted_bounding_box_crop(image, max_attempts=100, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. - + See `tf.image.sample_distorted_bounding_box` for more documentation. - + Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] @@ -344,7 +341,7 @@ def distorted_bounding_box_crop(image, Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + with tf.compat.v1.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. @@ -356,7 +353,7 @@ def distorted_bounding_box_crop(image, # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), + image_size=tf.shape(input=image), bounding_boxes=bbox, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, @@ -376,11 +373,11 @@ def preprocess_for_train(image, height, width, bbox, scope=None, add_image_summaries=True): """Distort one image for training a network. - + Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -400,7 +397,7 @@ def preprocess_for_train(image, height, width, bbox, 3-D float Tensor of distorted image used for training with range [-1, 1]. """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope, 'distort_image', [image, height, width, bbox]): if bbox is None: bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, @@ -412,7 +409,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), bbox) if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) + tf.compat.v1.summary.image('image_with_bounding_boxes', image_with_box) distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) @@ -422,7 +419,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distorted_bbox) if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', + tf.compat.v1.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # This resizing operation may distort the images because the aspect @@ -434,12 +431,12 @@ def preprocess_for_train(image, height, width, bbox, num_resize_cases = 1 if fast_mode else 4 distorted_image = apply_with_random_selector( distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], + lambda x, method: tf.image.resize(x, [height, width], method), num_cases=num_resize_cases) if add_image_summaries: - tf.summary.image('cropped_resized_image', + tf.compat.v1.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. @@ -452,7 +449,7 @@ def preprocess_for_train(image, height, width, bbox, num_cases=num_distort_cases) if add_image_summaries: - tf.summary.image('final_distorted_image', + tf.compat.v1.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) distorted_image = tf.subtract(distorted_image, 0.5) distorted_image = tf.multiply(distorted_image, 2.0) @@ -462,12 +459,12 @@ def preprocess_for_train(image, height, width, bbox, def distort_color(image, batch_position=0, distort_color_in_yiq=False, scope=None): """Distort the color of the image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops based on the position of the image in a batch. - + Args: image: float32 Tensor containing single image. Tensor values should be in range [0, 1]. @@ -478,7 +475,7 @@ def distort_color(image, batch_position=0, distort_color_in_yiq=False, Returns: color-distorted image """ - with tf.name_scope(scope or 'distort_color'): + with tf.compat.v1.name_scope(scope or 'distort_color'): def distort_fn_0(image=image): """Variant 0 of distort function.""" image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -570,7 +567,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): + with tf.compat.v1.name_scope('batch_processing'): # Build final results per split. images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] @@ -583,7 +580,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, .format(glob_pattern)) ds = tf.data.TFRecordDataset.list_files(file_names) ds = ds.apply( - interleave_ops.parallel_interleave( + tf.data.experimental.parallel_interleave( tf.data.TFRecordDataset, cycle_length=10)) if cache_data: ds = ds.take(1).cache().repeat() @@ -594,12 +591,12 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, ds = ds.shuffle(buffer_size=10000) ds = ds.repeat() ds = ds.apply( - batching.map_and_batch( + tf.compat.v1.data.experimental.map_and_batch( map_func=self.parse_and_preprocess, batch_size=self.batch_size_per_split, num_parallel_batches=self.num_splits)) ds = ds.prefetch(buffer_size=self.num_splits) - ds_iterator = ds.make_one_shot_iterator() + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) for d in xrange(self.num_splits): labels[d], images[d] = ds_iterator.get_next() diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing_benchmark.py b/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing_benchmark.py index 1cfe036df..84a52998c 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing_benchmark.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/preprocessing_benchmark.py @@ -43,126 +43,126 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - return distorted_image - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + return distorted_image + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - - return (image, label_index) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) # this number should be tuned - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned - - ds_iterator = ds.make_one_shot_iterator() - images, _ = ds_iterator.get_next() - - return images + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2*self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) # this number should be tuned + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, _ = ds_iterator.get_next() + + return images diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py b/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py index 3ca9aff46..eacb42212 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py @@ -52,7 +52,7 @@ def load_graph(model_file): graph = tf.Graph() - graph_def = tf.GraphDef() + graph_def = tf.compat.v1.GraphDef() import os file_ext = os.path.splitext(model_file)[1] @@ -113,32 +113,35 @@ def load_graph(model_file): num_inter_threads = args.num_inter_threads num_intra_threads = args.num_intra_threads data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = dataset.get_image_preprocessor()( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='bilinear') - - images, labels = preprocessor.minibatch(dataset, subset='validation', + + + data_graph = tf.Graph() ### + with data_graph.as_default(): ### + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + images, labels = preprocessor.minibatch(dataset, subset='validation', use_datasets=True, cache_data=False) graph = load_graph(model_file) input_tensor = graph.get_tensor_by_name(input_layer + ":0") output_tensor = graph.get_tensor_by_name(output_layer + ":0") - config = tf.ConfigProto() + config = tf.compat.v1.ConfigProto() config.inter_op_parallelism_threads = num_inter_threads config.intra_op_parallelism_threads = num_intra_threads total_accuracy1, total_accuracy5 = (0.0, 0.0) num_processed_images = 0 num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images + - num_processed_images - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) + with tf.compat.v1.Session(graph=data_graph) as sess: ### + sess_graph = tf.compat.v1.Session(graph=graph, config=config) while num_remaining_images >= batch_size: # Reads and preprocess data np_images, np_labels = sess.run([images[0], labels[0]]) @@ -150,18 +153,18 @@ def load_graph(model_file): {input_tensor: np_images}) elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 print("Iteration time: %0.4f ms" % elapsed_time) print( - "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" + "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % ( - num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/benchmark.py b/models/image_recognition/tensorflow/inceptionv4/inference/benchmark.py index d53add848..3c51662a4 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/benchmark.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/benchmark.py @@ -45,116 +45,124 @@ from google.protobuf import text_format import tensorflow as tf - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=None, - type=int, help="input height") - parser.add_argument("--input_width", default=None, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="InceptionV4/Logits/Predictions", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument("--warmup_steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 299 - if args.input_width: - input_width = args.input_width - else: - input_width = 299 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=None, + type=int, help="input height") + parser.add_argument("--input_width", default=None, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="InceptionV4/Logits/Predictions", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 299 + if args.input_width: + input_width = args.input_width + else: + input_width = 299 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_graph = tf.Graph() ## + with data_graph.as_default():## input_shape = [batch_size, input_height, input_width, 3] - images = tf.truncated_normal( - input_shape, - dtype=tf.float32, - stddev=10, - name='synthetic_images') - - image_data = None - with tf.Session() as sess: - image_data = sess.run(images) - - graph = load_graph(model_file) - - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - with tf.Session(graph=graph, config=config) as sess: - sys.stdout.flush() - print("[Running warmup steps...]") - for t in range(warmup_steps): - start_time = time.time() - sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time)) - - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - for t in range(steps): - start_time = time.time() - results = sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time)) - total_time += elapsed_time - total_images += batch_size - average_time = total_time / total_images - if batch_size == 1: - print('Latency: %.3f ms' % (average_time * 1000)) + images = tf.random.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + + #image_data = None + #with tf.compat.v1.Session() as sess: + # image_data = sess.run(images) + + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + tf.compat.v1.global_variables_initializer()### + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + data_config = tf.compat.v1.ConfigProto()### + data_config.inter_op_parallelism_threads = num_inter_threads ### + data_config.intra_op_parallelism_threads = num_intra_threads ### + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) ### + + with tf.compat.v1.Session(graph=graph, config=config) as sess: + sys.stdout.flush() + print("[Running warmup steps...]") + image_data = data_sess.run(images) ### + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)); + total_time += elapsed_time + total_images += batch_size + average_time = total_time / total_images + if batch_size == 1: + print('Latency: %.3f ms' % (average_time * 1000)) diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/cnn_util.py b/models/image_recognition/tensorflow/inceptionv4/inference/cnn_util.py index 1b60e7175..e74df9a41 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/cnn_util.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/cnn_util.py @@ -38,11 +38,11 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/datasets.py b/models/image_recognition/tensorflow/inceptionv4/inference/datasets.py index 22d3603de..93148f888 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/datasets.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/datasets.py @@ -52,143 +52,143 @@ def create_dataset(data_dir, data_name): - """Create a Dataset instance based on data_dir and data_name.""" - supported_datasets = { - 'imagenet': ImagenetData, - 'cifar10': Cifar10Data, - } - if not data_dir and not data_name: - # When using synthetic data, use synthetic imagenet images by default. - data_name = 'imagenet' + """Create a Dataset instance based on data_dir and data_name.""" + supported_datasets = { + 'imagenet': ImagenetData, + 'cifar10': Cifar10Data, + } + if not data_dir and not data_name: + # When using synthetic data, use synthetic imagenet images by default. + data_name = 'imagenet' - if data_name is None: - for supported_name in supported_datasets: - if supported_name in data_dir: - data_name = supported_name - break + if data_name is None: + for supported_name in supported_datasets: + if supported_name in data_dir: + data_name = supported_name + break - if data_name is None: - raise ValueError('Could not identify name of dataset. ' - 'Please specify with --data_name option.') + if data_name is None: + raise ValueError('Could not identify name of dataset. ' + 'Please specify with --data_name option.') - if data_name not in supported_datasets: - raise ValueError('Unknown dataset. Must be one of %s', ', '.join( - [key for key in sorted(supported_datasets.keys())])) + if data_name not in supported_datasets: + raise ValueError('Unknown dataset. Must be one of %s', ', '.join( + [key for key in sorted(supported_datasets.keys())])) - return supported_datasets[data_name](data_dir) + return supported_datasets[data_name](data_dir) class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" + """Configuration for Imagenet dataset.""" - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.RecordInputImagePreprocessor class Cifar10Data(Dataset): - """Configuration for cifar 10 dataset. - - It will mount all the input images to memory. - """ - - def __init__(self, data_dir=None): - super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, - queue_runner_required=True, - num_classes=10) - - def read_data_files(self, subset='train'): - """Reads from data file and returns images and labels in a numpy array.""" - assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' - 'data') - if subset == 'train': - filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) - for i in xrange(1, 6)] - elif subset == 'validation': - filenames = [os.path.join(self.data_dir, 'test_batch')] - else: - raise ValueError('Invalid data subset "%s"' % subset) - - inputs = [] - for filename in filenames: - with gfile.Open(filename, 'r') as f: - inputs.append(cPickle.load(f)) - # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the - # input format. - all_images = np.concatenate( - [each_input['data'] for each_input in inputs]).astype(np.float32) - all_labels = np.concatenate( - [each_input['labels'] for each_input in inputs]) - return all_images, all_labels - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 50000 - elif subset == 'validation': - return 10000 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.Cifar10ImagePreprocessor + """Configuration for cifar 10 dataset. + + It will mount all the input images to memory. + """ + + def __init__(self, data_dir=None): + super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, + queue_runner_required=True, + num_classes=10) + + def read_data_files(self, subset='train'): + """Reads from data file and returns images and labels in a numpy array.""" + assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' + 'data') + if subset == 'train': + filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) + for i in xrange(1, 6)] + elif subset == 'validation': + filenames = [os.path.join(self.data_dir, 'test_batch')] + else: + raise ValueError('Invalid data subset "%s"' % subset) + + inputs = [] + for filename in filenames: + with gfile.Open(filename, 'r') as f: + inputs.append(cPickle.load(f)) + # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the + # input format. + all_images = np.concatenate( + [each_input['data'] for each_input in inputs]).astype(np.float32) + all_labels = np.concatenate( + [each_input['labels'] for each_input in inputs]) + return all_images, all_labels + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 50000 + elif subset == 'validation': + return 10000 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.Cifar10ImagePreprocessor diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/preprocessing.py b/models/image_recognition/tensorflow/inceptionv4/inference/preprocessing.py index 454903f9d..d398f1030 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/preprocessing.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/preprocessing.py @@ -41,9 +41,13 @@ from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.image.python.ops import distort_image_ops +#from tensorflow.contrib.data.python.ops import batching +#from tensorflow.contrib.data.python.ops import interleave_ops +#from tensorflow.contrib.image.python.ops import distort_image_ops + +from tensorflow.python.data.experimental import parallel_interleave### +from tensorflow.python.data.experimental import map_and_batch### + from tensorflow.python.layers import utils from tensorflow.python.ops import data_flow_ops from tensorflow.python.platform import gfile @@ -54,11 +58,11 @@ def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. - + The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: - + image/height: 462 image/width: 581 image/colorspace: 'RGB' @@ -74,11 +78,11 @@ def parse_example_proto(example_serialized): image/format: 'JPEG' image/filename: 'ILSVRC2012_val_00041207.JPEG' image/encoded: - + Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. - + Returns: image_buffer: Tensor tf.string containing the contents of a JPEG file. label: Tensor tf.int32 containing the label. @@ -89,14 +93,14 @@ def parse_example_proto(example_serialized): """ # Dense features in Example proto. feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update( {k: sparse_float32 for k in ['image/object/bbox/xmin', @@ -104,7 +108,7 @@ def parse_example_proto(example_serialized): 'image/object/bbox/xmax', 'image/object/bbox/ymax']}) - features = tf.parse_single_example(example_serialized, feature_map) + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) @@ -118,19 +122,19 @@ def parse_example_proto(example_serialized): # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) return features['image/encoded'], label, bbox, features['image/class/text'] def get_image_resize_method(resize_method, batch_position=0): """Get tensorflow resize method. - + If resize_method is 'round_robin', return different methods based on batch position in a round-robin fashion. NOTE: If the batch size is not a multiple of the number of methods, then the distribution of methods will not be uniform. - + Args: resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. batch_position: position of the image in a batch. NOTE: this argument can @@ -174,7 +178,7 @@ def resize_method_2(): def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """Decode a JPEG string into one 3-D float image Tensor. - + Args: image_buffer: scalar string Tensor. scope: Optional scope for op_scope. @@ -183,7 +187,7 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """ # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height @@ -201,13 +205,13 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): """Prepare one image for evaluation. - + If height and width are specified it would output an image with that size by applying resize_bilinear. - + If central_fraction is specified it would crop the central fraction of the input image. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -220,7 +224,7 @@ def preprocess_for_eval(image, height, width, Returns: 3-D float Tensor of prepared image. """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): + with tf.compat.v1.name_scope(scope, 'eval_image', [image, height, width]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of @@ -232,8 +236,8 @@ def preprocess_for_eval(image, height, width, if height and width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) @@ -242,17 +246,17 @@ def preprocess_for_eval(image, height, width, def apply_with_random_selector(x, func, num_cases): """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - + Args: x: input Tensor. func: Python function to apply. num_cases: Python int32, number of cases to sample sel from. - + Returns: The result of func(x, sel), where func receives the value of the selector as a python integer, but sel is sampled dynamically. """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) # Pass the real x only to one of the func calls. return control_flow_ops.merge([ func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) @@ -261,12 +265,12 @@ def apply_with_random_selector(x, func, num_cases): def distort_color(image, color_ordering=0, fast_mode=True, scope=None): """Distort the color of a Tensor image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops for each preprocessing thread. - + Args: image: 3-D Tensor containing single image in [0, 1]. color_ordering: Python int, a type of distortion (valid values: 0-3). @@ -277,7 +281,7 @@ def distort_color(image, color_ordering=0, fast_mode=True, scope=None): Raises: ValueError: if color_ordering not in [0, 3] """ - with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope, 'distort_color', [image]): if fast_mode: if color_ordering == 0: image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -321,9 +325,9 @@ def distorted_bounding_box_crop(image, max_attempts=100, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. - + See `tf.image.sample_distorted_bounding_box` for more documentation. - + Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] @@ -344,7 +348,7 @@ def distorted_bounding_box_crop(image, Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + with tf.compat.v1.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. @@ -356,7 +360,7 @@ def distorted_bounding_box_crop(image, # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), + image_size=tf.shape(input=image), bounding_boxes=bbox, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, @@ -376,11 +380,11 @@ def preprocess_for_train(image, height, width, bbox, scope=None, add_image_summaries=True): """Distort one image for training a network. - + Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -400,7 +404,7 @@ def preprocess_for_train(image, height, width, bbox, 3-D float Tensor of distorted image used for training with range [-1, 1]. """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope, 'distort_image', [image, height, width, bbox]): if bbox is None: bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, @@ -412,7 +416,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), bbox) if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) + tf.compat.v1.summary.image('image_with_bounding_boxes', image_with_box) distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) @@ -422,7 +426,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distorted_bbox) if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', + tf.compat.v1.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # This resizing operation may distort the images because the aspect @@ -434,12 +438,12 @@ def preprocess_for_train(image, height, width, bbox, num_resize_cases = 1 if fast_mode else 4 distorted_image = apply_with_random_selector( distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], + lambda x, method: tf.image.resize(x, [height, width], method), num_cases=num_resize_cases) if add_image_summaries: - tf.summary.image('cropped_resized_image', + tf.compat.v1.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. @@ -452,7 +456,7 @@ def preprocess_for_train(image, height, width, bbox, num_cases=num_distort_cases) if add_image_summaries: - tf.summary.image('final_distorted_image', + tf.compat.v1.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) distorted_image = tf.subtract(distorted_image, 0.5) distorted_image = tf.multiply(distorted_image, 2.0) @@ -462,12 +466,12 @@ def preprocess_for_train(image, height, width, bbox, def distort_color(image, batch_position=0, distort_color_in_yiq=False, scope=None): """Distort the color of the image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops based on the position of the image in a batch. - + Args: image: float32 Tensor containing single image. Tensor values should be in range [0, 1]. @@ -478,7 +482,7 @@ def distort_color(image, batch_position=0, distort_color_in_yiq=False, Returns: color-distorted image """ - with tf.name_scope(scope or 'distort_color'): + with tf.compat.v1.name_scope(scope or 'distort_color'): def distort_fn_0(image=image): """Variant 0 of distort function.""" image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -570,7 +574,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): + with tf.compat.v1.name_scope('batch_processing'): # Build final results per split. images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] @@ -583,7 +587,8 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, .format(glob_pattern)) ds = tf.data.TFRecordDataset.list_files(file_names) ds = ds.apply( - interleave_ops.parallel_interleave( + #interleave_ops.parallel_interleave( + parallel_interleave( # tf.data.TFRecordDataset, cycle_length=10)) if cache_data: ds = ds.take(1).cache().repeat() @@ -594,12 +599,13 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, ds = ds.shuffle(buffer_size=10000) ds = ds.repeat() ds = ds.apply( - batching.map_and_batch( + #batching.map_and_batch( + map_and_batch( ### map_func=self.parse_and_preprocess, batch_size=self.batch_size_per_split, num_parallel_batches=self.num_splits)) ds = ds.prefetch(buffer_size=self.num_splits) - ds_iterator = ds.make_one_shot_iterator() + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) for d in xrange(self.num_splits): labels[d], images[d] = ds_iterator.get_next() diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py index a688730c6..25fb80020 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py @@ -52,7 +52,7 @@ def load_graph(model_file): graph = tf.Graph() - graph_def = tf.GraphDef() + graph_def = tf.compat.v1.GraphDef() import os file_ext = os.path.splitext(model_file)[1] @@ -121,23 +121,23 @@ def load_graph(model_file): train=False, # doing inference resize_method='bilinear') - images, labels = preprocessor.minibatch(dataset, subset='validation', - use_datasets=True, - cache_data=False) + with tf.compat.v1.get_default_graph().as_default(): + images, labels = preprocessor.minibatch(dataset, subset='validation', + use_datasets=True, cache_data=False) graph = load_graph(model_file) input_tensor = graph.get_tensor_by_name(input_layer + ":0") output_tensor = graph.get_tensor_by_name(output_layer + ":0") - config = tf.ConfigProto() + config = tf.compat.v1.ConfigProto() config.inter_op_parallelism_threads = num_inter_threads config.intra_op_parallelism_threads = num_intra_threads total_accuracy1, total_accuracy5 = (0.0, 0.0) num_processed_images = 0 num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) + - num_processed_images + with tf.compat.v1.Session() as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) while num_remaining_images >= batch_size: # Reads and preprocess data np_images, np_labels = sess.run([images[0], labels[0]]) @@ -149,18 +149,18 @@ def load_graph(model_file): {input_tensor: np_images}) elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 print("Iteration time: %0.4f ms" % elapsed_time) print( - "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" + "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % ( - num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_datasets.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_datasets.py index 40b9dceac..7f4c9dd0e 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_datasets.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_datasets.py @@ -52,143 +52,143 @@ def create_dataset(data_dir, data_name): - """Create a Dataset instance based on data_dir and data_name.""" - supported_datasets = { - 'imagenet': ImagenetData, - 'cifar10': Cifar10Data, - } - if not data_dir and not data_name: - # When using synthetic data, use synthetic imagenet images by default. - data_name = 'imagenet' + """Create a Dataset instance based on data_dir and data_name.""" + supported_datasets = { + 'imagenet': ImagenetData, + 'cifar10': Cifar10Data, + } + if not data_dir and not data_name: + # When using synthetic data, use synthetic imagenet images by default. + data_name = 'imagenet' - if data_name is None: - for supported_name in supported_datasets: - if supported_name in data_dir: - data_name = supported_name - break + if data_name is None: + for supported_name in supported_datasets: + if supported_name in data_dir: + data_name = supported_name + break - if data_name is None: - raise ValueError('Could not identify name of dataset. ' - 'Please specify with --data_name option.') + if data_name is None: + raise ValueError('Could not identify name of dataset. ' + 'Please specify with --data_name option.') - if data_name not in supported_datasets: - raise ValueError('Unknown dataset. Must be one of %s', ', '.join( - [key for key in sorted(supported_datasets.keys())])) + if data_name not in supported_datasets: + raise ValueError('Unknown dataset. Must be one of %s', ', '.join( + [key for key in sorted(supported_datasets.keys())])) - return supported_datasets[data_name](data_dir) + return supported_datasets[data_name](data_dir) class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" + """Configuration for Imagenet dataset.""" - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.RecordInputImagePreprocessor class Cifar10Data(Dataset): - """Configuration for cifar 10 dataset. - - It will mount all the input images to memory. - """ - - def __init__(self, data_dir=None): - super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, - queue_runner_required=True, - num_classes=10) - - def read_data_files(self, subset='train'): - """Reads from data file and returns images and labels in a numpy array.""" - assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' - 'data') - if subset == 'train': - filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) - for i in xrange(1, 6)] - elif subset == 'validation': - filenames = [os.path.join(self.data_dir, 'test_batch')] - else: - raise ValueError('Invalid data subset "%s"' % subset) - - inputs = [] - for filename in filenames: - with gfile.Open(filename, 'r') as f: - inputs.append(cPickle.load(f)) - # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the - # input format. - all_images = np.concatenate( - [each_input['data'] for each_input in inputs]).astype(np.float32) - all_labels = np.concatenate( - [each_input['labels'] for each_input in inputs]) - return all_images, all_labels - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 50000 - elif subset == 'validation': - return 10000 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.Cifar10ImagePreprocessor + """Configuration for cifar 10 dataset. + + It will mount all the input images to memory. + """ + + def __init__(self, data_dir=None): + super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, + queue_runner_required=True, + num_classes=10) + + def read_data_files(self, subset='train'): + """Reads from data file and returns images and labels in a numpy array.""" + assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' + 'data') + if subset == 'train': + filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) + for i in xrange(1, 6)] + elif subset == 'validation': + filenames = [os.path.join(self.data_dir, 'test_batch')] + else: + raise ValueError('Invalid data subset "%s"' % subset) + + inputs = [] + for filename in filenames: + with gfile.Open(filename, 'r') as f: + inputs.append(cPickle.load(f)) + # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the + # input format. + all_images = np.concatenate( + [each_input['data'] for each_input in inputs]).astype(np.float32) + all_labels = np.concatenate( + [each_input['labels'] for each_input in inputs]) + return all_images, all_labels + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 50000 + elif subset == 'validation': + return 10000 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.Cifar10ImagePreprocessor diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_preprocessing.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_preprocessing.py index d7f5e75e1..25c3e9452 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_preprocessing.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy_preprocessing.py @@ -41,9 +41,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.image.python.ops import distort_image_ops +from tensorflow.python.data.experimental.ops import batching +from tensorflow.python.data.experimental.ops import interleave_ops from tensorflow.python.layers import utils from tensorflow.python.ops import data_flow_ops from tensorflow.python.platform import gfile @@ -54,11 +53,11 @@ def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. - + The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: - + image/height: 462 image/width: 581 image/colorspace: 'RGB' @@ -74,11 +73,11 @@ def parse_example_proto(example_serialized): image/format: 'JPEG' image/filename: 'ILSVRC2012_val_00041207.JPEG' image/encoded: - + Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. - + Returns: image_buffer: Tensor tf.string containing the contents of a JPEG file. label: Tensor tf.int32 containing the label. @@ -89,14 +88,14 @@ def parse_example_proto(example_serialized): """ # Dense features in Example proto. feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update( {k: sparse_float32 for k in ['image/object/bbox/xmin', @@ -104,7 +103,7 @@ def parse_example_proto(example_serialized): 'image/object/bbox/xmax', 'image/object/bbox/ymax']}) - features = tf.parse_single_example(example_serialized, feature_map) + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) @@ -118,19 +117,19 @@ def parse_example_proto(example_serialized): # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) return features['image/encoded'], label, bbox, features['image/class/text'] def get_image_resize_method(resize_method, batch_position=0): """Get tensorflow resize method. - + If resize_method is 'round_robin', return different methods based on batch position in a round-robin fashion. NOTE: If the batch size is not a multiple of the number of methods, then the distribution of methods will not be uniform. - + Args: resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. batch_position: position of the image in a batch. NOTE: this argument can @@ -174,7 +173,7 @@ def resize_method_2(): def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """Decode a JPEG string into one 3-D float image Tensor. - + Args: image_buffer: scalar string Tensor. scope: Optional scope for op_scope. @@ -183,7 +182,7 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): """ # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height @@ -201,13 +200,13 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): """Prepare one image for evaluation. - + If height and width are specified it would output an image with that size by applying resize_bilinear. - + If central_fraction is specified it would crop the central fraction of the input image. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -220,7 +219,7 @@ def preprocess_for_eval(image, height, width, Returns: 3-D float Tensor of prepared image. """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): + with tf.compat.v1.name_scope(scope, 'eval_image', [image, height, width]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of @@ -232,8 +231,8 @@ def preprocess_for_eval(image, height, width, if height and width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) + image = tf.compat.v1.image.resize_bilinear(image, [height, width], + align_corners=False) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) @@ -242,17 +241,17 @@ def preprocess_for_eval(image, height, width, def apply_with_random_selector(x, func, num_cases): """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - + Args: x: input Tensor. func: Python function to apply. num_cases: Python int32, number of cases to sample sel from. - + Returns: The result of func(x, sel), where func receives the value of the selector as a python integer, but sel is sampled dynamically. """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) # Pass the real x only to one of the func calls. return control_flow_ops.merge([ func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) @@ -261,12 +260,12 @@ def apply_with_random_selector(x, func, num_cases): def distort_color(image, color_ordering=0, fast_mode=True, scope=None): """Distort the color of a Tensor image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops for each preprocessing thread. - + Args: image: 3-D Tensor containing single image in [0, 1]. color_ordering: Python int, a type of distortion (valid values: 0-3). @@ -277,7 +276,7 @@ def distort_color(image, color_ordering=0, fast_mode=True, scope=None): Raises: ValueError: if color_ordering not in [0, 3] """ - with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope, 'distort_color', [image]): if fast_mode: if color_ordering == 0: image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -321,9 +320,9 @@ def distorted_bounding_box_crop(image, max_attempts=100, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. - + See `tf.image.sample_distorted_bounding_box` for more documentation. - + Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] @@ -344,7 +343,7 @@ def distorted_bounding_box_crop(image, Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + with tf.compat.v1.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. @@ -356,7 +355,7 @@ def distorted_bounding_box_crop(image, # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), + image_size=tf.shape(input=image), bounding_boxes=bbox, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, @@ -376,11 +375,11 @@ def preprocess_for_train(image, height, width, bbox, scope=None, add_image_summaries=True): """Distort one image for training a network. - + Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. - + Args: image: 3-D Tensor of image. If dtype is tf.float32 then the range should be [0, 1], otherwise it would converted to tf.float32 assuming that the range @@ -400,7 +399,7 @@ def preprocess_for_train(image, height, width, bbox, 3-D float Tensor of distorted image used for training with range [-1, 1]. """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope, 'distort_image', [image, height, width, bbox]): if bbox is None: bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, @@ -412,7 +411,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), bbox) if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) + tf.compat.v1.summary.image('image_with_bounding_boxes', image_with_box) distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) @@ -422,7 +421,7 @@ def preprocess_for_train(image, height, width, bbox, image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distorted_bbox) if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', + tf.compat.v1.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # This resizing operation may distort the images because the aspect @@ -434,12 +433,12 @@ def preprocess_for_train(image, height, width, bbox, num_resize_cases = 1 if fast_mode else 4 distorted_image = apply_with_random_selector( distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], + lambda x, method: tf.image.resize(x, [height, width], method), num_cases=num_resize_cases) if add_image_summaries: - tf.summary.image('cropped_resized_image', + tf.compat.v1.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. @@ -452,7 +451,7 @@ def preprocess_for_train(image, height, width, bbox, num_cases=num_distort_cases) if add_image_summaries: - tf.summary.image('final_distorted_image', + tf.compat.v1.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) distorted_image = tf.subtract(distorted_image, 0.5) distorted_image = tf.multiply(distorted_image, 2.0) @@ -462,12 +461,12 @@ def preprocess_for_train(image, height, width, bbox, def distort_color(image, batch_position=0, distort_color_in_yiq=False, scope=None): """Distort the color of the image. - + Each color distortion is non-commutative and thus ordering of the color ops matters. Ideally we would randomly permute the ordering of the color ops. Rather then adding that level of complication, we select a distinct ordering of color ops based on the position of the image in a batch. - + Args: image: float32 Tensor containing single image. Tensor values should be in range [0, 1]. @@ -478,7 +477,7 @@ def distort_color(image, batch_position=0, distort_color_in_yiq=False, Returns: color-distorted image """ - with tf.name_scope(scope or 'distort_color'): + with tf.compat.v1.name_scope(scope or 'distort_color'): def distort_fn_0(image=image): """Variant 0 of distort function.""" image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -547,6 +546,18 @@ def __init__(self, self.batch_size_per_split = self.batch_size // self.num_splits self.summary_verbosity = summary_verbosity + def center_crop(self, img, init_h, init_w): + height, width, _ = img.shape + + left = int((width - init_w) // 2) + right = int((width + init_w) // 2) + top = int((height - init_h) // 2) + bottom = int((height + init_h) // 2) + + img = img[top: bottom, left: right] + + return img + def image_preprocess(self, image_buffer, bbox, batch_position): """Preprocessing image_buffer as a function of its batch position.""" if self.train: @@ -558,7 +569,20 @@ def image_preprocess(self, image_buffer, bbox, batch_position): else: image = tf.image.decode_jpeg( image_buffer, channels=3, dct_method='INTEGER_FAST') - image = preprocess_for_eval(image, self.height, self.width) + + new_height = int(100. * self.height / 87.5) + new_width = int(100. * self.width / 87.5) + + if(self.height > self.width): + w = new_width + h = int(new_height * self.height / self.width) + else: + h = new_height + w = int(new_width * self.width / self.height) + + image = preprocess_for_eval(image, h, w) + image = self.center_crop(image, self.height, self.width) + return image def parse_and_preprocess(self, value, batch_position): @@ -570,7 +594,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): if shift_ratio < 0: shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): + with tf.compat.v1.name_scope('batch_processing'): # Build final results per split. images = [[] for _ in range(self.num_splits)] labels = [[] for _ in range(self.num_splits)] @@ -581,7 +605,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, raise ValueError( 'Found no files in --data_dir matching: {}' .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) ds = ds.apply( interleave_ops.parallel_interleave( tf.data.TFRecordDataset, cycle_length=10)) @@ -591,7 +615,6 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, counter = counter.repeat() ds = tf.data.Dataset.zip((ds, counter)) ds = ds.prefetch(buffer_size=self.batch_size) - ds = ds.shuffle(buffer_size=10000) ds = ds.repeat() ds = ds.apply( batching.map_and_batch( @@ -599,7 +622,7 @@ def minibatch(self, dataset, subset, use_datasets, cache_data, batch_size=self.batch_size_per_split, num_parallel_batches=self.num_splits)) ds = ds.prefetch(buffer_size=self.num_splits) - ds_iterator = ds.make_one_shot_iterator() + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) for d in xrange(self.num_splits): labels[d], images[d] = ds_iterator.get_next() diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/benchmark.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/benchmark.py new file mode 100644 index 000000000..c21a2904d --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/benchmark.py @@ -0,0 +1,144 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np + +from google.protobuf import text_format +import tensorflow as tf + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + with tf.compat.v1.Session(graph=graph, config=config) as sess: + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + image_data = sess.run(images) + + sys.stdout.flush() + print("[Running warmup steps...]") + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)); diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/cnn_util.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/cnn_util.py index 2ec667aac..c07238697 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/cnn_util.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/cnn_util.py @@ -38,11 +38,11 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py index 301de9390..0dd6e3511 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py @@ -103,7 +103,7 @@ tf.app.flags.DEFINE_integer( 'eval_log_frequency', 10, 'Number of eval steps to run between displaying ' - 'eval metrics.') + 'eval metrics.') tf.app.flags.DEFINE_integer( 'inter_op_parallelism_threads', 1, 'The number of inter-thread.') @@ -114,161 +114,158 @@ FLAGS = tf.app.flags.FLAGS - class _LoggerHook(tf.train.SessionRunHook): - """ Logs loss and runtime.""" - - def begin(self): - self._step = -1 - self._displayed_steps = 0 - self._total_images_per_sec = 0 - - def before_run(self, run_context): - self._step += 1 - self._start_time = time.time() - - def after_run(self, run_context, run_values): - duration = time.time() - self._start_time - if (self._step + 1) % FLAGS.eval_log_frequency == 0: - images_per_sec = FLAGS.batch_size / duration - self._displayed_steps += 1 - self._total_images_per_sec += images_per_sec - - format_str = ('%s: step %d, %.1f images/sec') - print(format_str % (datetime.now(), (self._step + 1), images_per_sec)) - - def end(self, run_context): - print('self._total_images_per_sec = %.1f' % self._total_images_per_sec) - print('self._displayed_steps = %d' % self._displayed_steps) - images_per_sec = self._total_images_per_sec / self._displayed_steps - print('Total images/sec = %.1f' % (images_per_sec)) - if FLAGS.batch_size == 1: - latency = 1000 / images_per_sec - print('Latency ms/step = %.1f' % (latency)) - + """ Logs loss and runtime.""" + + def begin(self): + self._step = -1 + self._displayed_steps = 0 + self._total_images_per_sec = 0 + + def before_run(self, run_context): + self._step += 1 + self._start_time = time.time() + + def after_run(self, run_context, run_values): + duration = time.time() - self._start_time + if (self._step + 1) % FLAGS.eval_log_frequency == 0: + images_per_sec = FLAGS.batch_size / duration + self._displayed_steps += 1 + self._total_images_per_sec += images_per_sec + + format_str = ('%s: step %d, %.1f images/sec') + print (format_str % (datetime.now(), (self._step+1), images_per_sec)) + + def end(self, run_context): + print('self._total_images_per_sec = %.1f' % self._total_images_per_sec) + print('self._displayed_steps = %d' % self._displayed_steps) + images_per_sec = self._total_images_per_sec / self._displayed_steps + print('Total images/sec = %.1f' %(images_per_sec)) + if FLAGS.batch_size == 1: + latency = 1000 / images_per_sec + print('Latency ms/step = %.1f' % (latency)) def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - with tf.Graph().as_default(): - tf_global_step = slim.get_or_create_global_step() - - ###################### - # Select the dataset # - ###################### - if FLAGS.dataset_dir: - print("Inference using real data") - dataset = dataset_factory.get_dataset( - FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) - num_classes = dataset.num_classes - FLAGS.labels_offset - else: - print("Inference using synthetic data") - num_classes = 1000 - - #################### - # Select the model # - #################### - network_fn = nets_factory.get_network_fn( - FLAGS.model_name, - num_classes=num_classes, + tf.logging.set_verbosity(tf.logging.INFO) + + with tf.Graph().as_default(): + tf_global_step = slim.get_or_create_global_step() + + ###################### + # Select the dataset # + ###################### + if FLAGS.dataset_dir: + print("Inference using real data") + dataset = dataset_factory.get_dataset( + FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) + num_classes = dataset.num_classes - FLAGS.labels_offset + else: + print("Inference using synthetic data") + num_classes = 1000 + + #################### + # Select the model # + #################### + network_fn = nets_factory.get_network_fn( + FLAGS.model_name, + num_classes=num_classes, + is_training=False) + + eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size + + if FLAGS.dataset_dir: + ############################################################## + # Create a dataset provider that loads data from the dataset # + ############################################################## + provider = slim.dataset_data_provider.DatasetDataProvider( + dataset, + shuffle=False, + common_queue_capacity=2 * FLAGS.batch_size, + common_queue_min=FLAGS.batch_size) + [image, label] = provider.get(['image', 'label']) + label -= FLAGS.labels_offset + + ##################################### + # Select the preprocessing function # + ##################################### + preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name + image_preprocessing_fn = preprocessing_factory.get_preprocessing( + preprocessing_name, is_training=False) - eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size - - if FLAGS.dataset_dir: - ############################################################## - # Create a dataset provider that loads data from the dataset # - ############################################################## - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - shuffle=False, - common_queue_capacity=2 * FLAGS.batch_size, - common_queue_min=FLAGS.batch_size) - [image, label] = provider.get(['image', 'label']) - label -= FLAGS.labels_offset - - ##################################### - # Select the preprocessing function # - ##################################### - preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name - image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, - is_training=False) - - image = image_preprocessing_fn(image, eval_image_size, eval_image_size) - - images, labels = tf.train.batch( - [image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) - else: - # Generate random images and labels with constant 0 when no dataset is used - input_shape = [FLAGS.batch_size, eval_image_size, eval_image_size, 3] - label_shape = [FLAGS.batch_size] - images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - labels = tf.constant(0, shape=label_shape, dtype=tf.int64) - - #################### - # Define the model # - #################### - logits, _ = network_fn(images) - - if FLAGS.moving_average_decay: - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, tf_global_step) - variables_to_restore = variable_averages.variables_to_restore( - slim.get_model_variables()) - variables_to_restore[tf_global_step.op.name] = tf_global_step - else: - variables_to_restore = slim.get_variables_to_restore() - - predictions = tf.argmax(logits, 1) - # labels = tf.squeeze(labels) - - # Define the metrics: - names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ - 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), - 'Recall_5': slim.metrics.streaming_recall_at_k( - logits, labels, 5), - }) - - # Print the summaries to screen. - for name, value in names_to_values.items(): - summary_name = 'eval/%s' % name - op = tf.summary.scalar(summary_name, value, collections=[]) - op = tf.Print(op, [value], summary_name) - tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) - - # TODO(sguada) use num_epochs=1 - if FLAGS.max_num_batches: - num_batches = FLAGS.max_num_batches - else: - # This ensures that we make a single pass over all of the data. - num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) - - num_batches = 100 - - config = tf.ConfigProto(inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, - intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) - - if tf.gfile.IsDirectory(FLAGS.checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) - else: - checkpoint_path = FLAGS.checkpoint_path - - tf.logging.info('Evaluating %s' % checkpoint_path) - - slim.evaluation.evaluate_once( - master=FLAGS.master, - checkpoint_path=checkpoint_path, - logdir=FLAGS.eval_dir, - num_evals=num_batches, - eval_op=list(names_to_updates.values()), - variables_to_restore=variables_to_restore, - hooks=[_LoggerHook()], - session_config=config) + image = image_preprocessing_fn(image, eval_image_size, eval_image_size) + + images, labels = tf.train.batch( + [image, label], + batch_size=FLAGS.batch_size, + num_threads=FLAGS.num_preprocessing_threads, + capacity=5 * FLAGS.batch_size) + else: + # Generate random images and labels with constant 0 when no dataset is used + input_shape = [FLAGS.batch_size, eval_image_size, eval_image_size, 3] + label_shape = [FLAGS.batch_size] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + labels = tf.constant(0, shape=label_shape, dtype=tf.int64) + + #################### + # Define the model # + #################### + logits, _ = network_fn(images) + + if FLAGS.moving_average_decay: + variable_averages = tf.train.ExponentialMovingAverage( + FLAGS.moving_average_decay, tf_global_step) + variables_to_restore = variable_averages.variables_to_restore( + slim.get_model_variables()) + variables_to_restore[tf_global_step.op.name] = tf_global_step + else: + variables_to_restore = slim.get_variables_to_restore() + + predictions = tf.argmax(logits, 1) + #labels = tf.squeeze(labels) + + # Define the metrics: + names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ + 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), + 'Recall_5': slim.metrics.streaming_recall_at_k( + logits, labels, 5), + }) + + # Print the summaries to screen. + for name, value in names_to_values.items(): + summary_name = 'eval/%s' % name + op = tf.summary.scalar(summary_name, value, collections=[]) + op = tf.Print(op, [value], summary_name) + tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) + + # TODO(sguada) use num_epochs=1 + if FLAGS.max_num_batches: + num_batches = FLAGS.max_num_batches + else: + # This ensures that we make a single pass over all of the data. + num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) + + num_batches = 100 + + config = tf.ConfigProto(inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) + + if tf.gfile.IsDirectory(FLAGS.checkpoint_path): + checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) + else: + checkpoint_path = FLAGS.checkpoint_path + + tf.logging.info('Evaluating %s' % checkpoint_path) + + slim.evaluation.evaluate_once( + master=FLAGS.master, + checkpoint_path=checkpoint_path, + logdir=FLAGS.eval_dir, + num_evals=num_batches, + eval_op=list(names_to_updates.values()), + variables_to_restore=variables_to_restore, + hooks=[_LoggerHook()], + session_config=config) if __name__ == '__main__': - tf.app.run() + tf.app.run() diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py index 7431904c0..bb3e7b8d5 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py @@ -30,108 +30,106 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - input_height = args.input_height - input_width = args.input_width - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = dataset.get_image_preprocessor()( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='bilinear') - + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + with tf.compat.v1.get_default_graph().as_default(): images, labels = preprocessor.minibatch(dataset, subset='validation', - use_datasets=True, cache_data=False) - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - - num_processed_images - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels = sess.run([images[0], labels[0]]) - num_processed_images += batch_size - num_remaining_images -= batch_size - start_time = time.time() - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + use_datasets=True, cache_data=False) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + with tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph()) as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + start_time = time.time() + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py index 929d20c5a..15ce736db 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py @@ -46,102 +46,100 @@ from google.protobuf import text_format import tensorflow as tf - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument("--warmup_steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - input_height = args.input_height - input_width = args.input_width - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - - graph = load_graph(model_file) - - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - with tf.Session(graph=graph, config=config) as sess: - input_shape = [batch_size, input_height, input_width, 3] - images = tf.truncated_normal( - input_shape, - dtype=tf.float32, - stddev=10, - name='synthetic_images') - image_data = sess.run(images) - - sys.stdout.flush() - print("[Running warmup steps...]") - for t in range(warmup_steps): - start_time = time.time() - sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time), flush=True) - - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - for t in range(steps): - start_time = time.time() - results = sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - if((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, batch_size / elapsed_time), flush=True) + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + with tf.compat.v1.Session(graph=graph, config=config) as sess: + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + image_data = sess.run(images) + + sys.stdout.flush() + print("[Running warmup steps...]") + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)); diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/calibration.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/calibration.py new file mode 100644 index 000000000..5288aca62 --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/calibration.py @@ -0,0 +1,135 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np + +from google.protobuf import text_format +import tensorflow as tf +import preprocessing +import datasets + +NUM_TEST_IMAGES = 50000 + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + with tf.compat.v1.get_default_graph().as_default(): + images, labels = preprocessor.minibatch(dataset, subset='calibration', + use_datasets=True, cache_data=False) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='calibration') \ + - num_processed_images + with tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph()) as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + start_time = time.time() + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py index e2a42ee76..a9a4e2ca3 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py @@ -39,11 +39,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py index 5cd94c1f9..18e3ad094 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py @@ -50,146 +50,149 @@ IMAGENET_NUM_TRAIN_IMAGES = 1281167 IMAGENET_NUM_VAL_IMAGES = 50000 +IMAGENET_NUM_CALIB_IMAGES = 500 def create_dataset(data_dir, data_name): - """Create a Dataset instance based on data_dir and data_name.""" - supported_datasets = { - 'imagenet': ImagenetData, - 'cifar10': Cifar10Data, - } - if not data_dir and not data_name: - # When using synthetic data, use synthetic imagenet images by default. - data_name = 'imagenet' + """Create a Dataset instance based on data_dir and data_name.""" + supported_datasets = { + 'imagenet': ImagenetData, + 'cifar10': Cifar10Data, + } + if not data_dir and not data_name: + # When using synthetic data, use synthetic imagenet images by default. + data_name = 'imagenet' - if data_name is None: - for supported_name in supported_datasets: - if supported_name in data_dir: - data_name = supported_name - break + if data_name is None: + for supported_name in supported_datasets: + if supported_name in data_dir: + data_name = supported_name + break - if data_name is None: - raise ValueError('Could not identify name of dataset. ' - 'Please specify with --data_name option.') + if data_name is None: + raise ValueError('Could not identify name of dataset. ' + 'Please specify with --data_name option.') - if data_name not in supported_datasets: - raise ValueError('Unknown dataset. Must be one of %s', ', '.join( - [key for key in sorted(supported_datasets.keys())])) + if data_name not in supported_datasets: + raise ValueError('Unknown dataset. Must be one of %s', ', '.join( + [key for key in sorted(supported_datasets.keys())])) - return supported_datasets[data_name](data_dir) + return supported_datasets[data_name](data_dir) class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" + """Configuration for Imagenet dataset.""" - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibration': + return IMAGENET_NUM_CALIB_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.RecordInputImagePreprocessor class Cifar10Data(Dataset): - """Configuration for cifar 10 dataset. - - It will mount all the input images to memory. - """ - - def __init__(self, data_dir=None): - super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, - queue_runner_required=True, - num_classes=10) - - def read_data_files(self, subset='train'): - """Reads from data file and returns images and labels in a numpy array.""" - assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' - 'data') - if subset == 'train': - filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) - for i in xrange(1, 6)] - elif subset == 'validation': - filenames = [os.path.join(self.data_dir, 'test_batch')] - else: - raise ValueError('Invalid data subset "%s"' % subset) - - inputs = [] - for filename in filenames: - with gfile.Open(filename, 'r') as f: - inputs.append(cPickle.load(f)) - # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the - # input format. - all_images = np.concatenate( - [each_input['data'] for each_input in inputs]).astype(np.float32) - all_labels = np.concatenate( - [each_input['labels'] for each_input in inputs]) - return all_images, all_labels - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 50000 - elif subset == 'validation': - return 10000 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self): - if self.use_synthetic_gpu_images(): - return preprocessing.SyntheticImagePreprocessor - else: - return preprocessing.Cifar10ImagePreprocessor + """Configuration for cifar 10 dataset. + + It will mount all the input images to memory. + """ + + def __init__(self, data_dir=None): + super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, + queue_runner_required=True, + num_classes=10) + + def read_data_files(self, subset='train'): + """Reads from data file and returns images and labels in a numpy array.""" + assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' + 'data') + if subset == 'train': + filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) + for i in xrange(1, 6)] + elif subset == 'validation': + filenames = [os.path.join(self.data_dir, 'test_batch')] + else: + raise ValueError('Invalid data subset "%s"' % subset) + + inputs = [] + for filename in filenames: + with gfile.Open(filename, 'r') as f: + inputs.append(cPickle.load(f)) + # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the + # input format. + all_images = np.concatenate( + [each_input['data'] for each_input in inputs]).astype(np.float32) + all_labels = np.concatenate( + [each_input['labels'] for each_input in inputs]) + return all_images, all_labels + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 50000 + elif subset == 'validation': + return 10000 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.Cifar10ImagePreprocessor diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py index 12ef37ce7..58f0eb207 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py @@ -42,9 +42,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.image.python.ops import distort_image_ops +from tensorflow.python.data.experimental.ops import batching +from tensorflow.python.data.experimental.ops import interleave_ops from tensorflow.python.layers import utils from tensorflow.python.ops import data_flow_ops from tensorflow.python.platform import gfile @@ -52,265 +51,264 @@ from tensorflow.python.ops import control_flow_ops - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def get_image_resize_method(resize_method, batch_position=0): - """Get tensorflow resize method. - - If resize_method is 'round_robin', return different methods based on batch - position in a round-robin fashion. NOTE: If the batch size is not a multiple - of the number of methods, then the distribution of methods will not be - uniform. + """Get tensorflow resize method. - Args: - resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. - batch_position: position of the image in a batch. NOTE: this argument can - be an integer or a tensor - Returns: - one of resize type defined in tf.image.ResizeMethod. - """ - resize_methods_map = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - } + If resize_method is 'round_robin', return different methods based on batch + position in a round-robin fashion. NOTE: If the batch size is not a multiple + of the number of methods, then the distribution of methods will not be + uniform. - if resize_method != 'round_robin': - return resize_methods_map[resize_method] + Args: + resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. + batch_position: position of the image in a batch. NOTE: this argument can + be an integer or a tensor + Returns: + one of resize type defined in tf.image.ResizeMethod. + """ + resize_methods_map = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + } - # return a resize method based on batch position in a round-robin fashion. - resize_methods = resize_methods_map.values() + if resize_method != 'round_robin': + return resize_methods_map[resize_method] - def lookup(index): - return resize_methods[index] + # return a resize method based on batch position in a round-robin fashion. + resize_methods = resize_methods_map.values() + def lookup(index): + return resize_methods[index] - def resize_method_0(): - return utils.smart_cond(batch_position % len(resize_methods) == 0, - lambda: lookup(0), resize_method_1) + def resize_method_0(): + return utils.smart_cond(batch_position % len(resize_methods) == 0, + lambda: lookup(0), resize_method_1) - def resize_method_1(): - return utils.smart_cond(batch_position % len(resize_methods) == 1, - lambda: lookup(1), resize_method_2) + def resize_method_1(): + return utils.smart_cond(batch_position % len(resize_methods) == 1, + lambda: lookup(1), resize_method_2) - def resize_method_2(): - return utils.smart_cond(batch_position % len(resize_methods) == 2, - lambda: lookup(2), lambda: lookup(3)) + def resize_method_2(): + return utils.smart_cond(batch_position % len(resize_methods) == 2, + lambda: lookup(2), lambda: lookup(3)) - # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here - # because TF would not be able to construct a finite graph. + # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here + # because TF would not be able to construct a finite graph. - return resize_method_0() + return resize_method_0() def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3) # , - # fancy_upscaling=False, - # dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3) #, + # fancy_upscaling=False, + # dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + return image + def preprocess_for_eval(image, height, width, central_fraction=0.875, scope=None): - """Prepare one image for evaluation. - - If height and width are specified it would output an image with that size by - applying resize_bilinear. - - If central_fraction is specified it would crop the central fraction of the - input image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - central_fraction: Optional Float, fraction of the image to crop. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would crop the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.compat.v1.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.compat.v1.image.resize_bilinear(image, [height, width], align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + def apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([ - func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) - for case in range(num_cases)])[0] + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] def distort_color(image, color_ordering=0, fast_mode=True, scope=None): - """Distort the color of a Tensor image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: 3-D Tensor containing single image in [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0-3). - fast_mode: Avoids slower ops (random_hue and random_contrast) - scope: Optional scope for name_scope. - Returns: - 3-D Tensor color-distorted image on range [0, 1] - Raises: - ValueError: if color_ordering not in [0, 3] - """ - with tf.name_scope(scope, 'distort_color', [image]): - if fast_mode: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - elif color_ordering == 2: - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - elif color_ordering == 3: - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - raise ValueError('color_ordering must be in [0, 3]') - - # The random_* ops do not necessarily clamp. - return tf.clip_by_value(image, 0.0, 1.0) + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.compat.v1.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) def distorted_bounding_box_crop(image, @@ -320,314 +318,332 @@ def distorted_bounding_box_crop(image, area_range=(0.05, 1.0), max_attempts=100, scope=None): - """Generates cropped_image using a one of the bboxes randomly distorted. - - See `tf.image.sample_distorted_bounding_box` for more documentation. - - Args: - image: 3-D Tensor of image (it will be converted to floats in [0, 1]). - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole - image. - min_object_covered: An optional `float`. Defaults to `0.1`. The cropped - area of the image must contain at least this fraction of any bounding box - supplied. - aspect_ratio_range: An optional list of `floats`. The cropped area of the - image must have an aspect ratio = width / height within this range. - area_range: An optional list of `floats`. The cropped area of the image - must contain a fraction of the supplied image within in this range. - max_attempts: An optional `int`. Number of attempts at generating a cropped - region of the image of the specified constraints. After `max_attempts` - failures, return the entire image. - scope: Optional scope for name_scope. - Returns: - A tuple, a 3-D Tensor cropped_image and the distorted bbox - """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - cropped_image = tf.slice(image, bbox_begin, bbox_size) - return cropped_image, distort_bbox - - -def preprocess_for_train(image, height, width, bbox, - batch_position, - fast_mode=True, - scope=None, - add_image_summaries=True): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - batch_position: position of the image in a batch, which affects how images - are distorted and resized. NOTE: this argument can be an integer or a - tensor - scope: Optional scope for op_scope. - add_image_summaries: Enable image summaries. - Returns: - 3-D float Tensor of distorted image used for training with range [-1, 1]. - """ - - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - if bbox is None: - bbox = tf.constant([0.0, 0.0, 1.0, 1.0], - dtype=tf.float32, - shape=[1, 1, 4]) - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - if add_image_summaries: - tf.summary.image('image_with_bounding_boxes', image_with_box) - - distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([None, None, 3]) - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distorted_bbox) - if add_image_summaries: - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - - # We select only 1 case for fast_mode bilinear. - num_resize_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method), - num_cases=num_resize_cases) - - if add_image_summaries: - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - # Randomly distort the colors. There are 1 or 4 ways to do it. - num_distort_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, ordering: distort_color(x, ordering, fast_mode), - num_cases=num_distort_cases) - - if add_image_summaries: - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) - distorted_image = tf.subtract(distorted_image, 0.5) - distorted_image = tf.multiply(distorted_image, 2.0) - return distorted_image + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.compat.v1.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox + + + +def preprocess_for_train(image, height,width, bbox, + batch_position, + fast_mode=True, + scope=None, + add_image_summaries=True): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + batch_position: position of the image in a batch, which affects how images + are distorted and resized. NOTE: this argument can be an integer or a + tensor + scope: Optional scope for op_scope. + add_image_summaries: Enable image summaries. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + + with tf.compat.v1.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + if add_image_summaries: + tf.compat.v1.summary.image('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + if add_image_summaries: + tf.compat.v1.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) + + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize(x, [height, width], method), + num_cases=num_resize_cases) + + if add_image_summaries: + tf.compat.v1.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + # Randomly distort the colors. There are 1 or 4 ways to do it. + num_distort_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=num_distort_cases) + + if add_image_summaries: + tf.compat.v1.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + distorted_image = tf.subtract(distorted_image, 0.5) + distorted_image = tf.multiply(distorted_image, 2.0) + return distorted_image def distort_color(image, batch_position=0, distort_color_in_yiq=False, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops based on the position of the image in a batch. - - Args: - image: float32 Tensor containing single image. Tensor values should be in - range [0, 1]. - batch_position: the position of the image in a batch. NOTE: this argument - can be an integer or a tensor - distort_color_in_yiq: distort color of input images in YIQ space. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - with tf.name_scope(scope or 'distort_color'): - - def distort_fn_0(image=image): - """Variant 0 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - # if distort_color_in_yiq: - # image = distort_image_ops.random_hsv_in_yiq( - # image, lower_saturation=0.5, upper_saturation=1.5, - # max_delta_hue=0.2 * math.pi) - # else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - return image - - def distort_fn_1(image=image): - """Variant 1 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - # if distort_color_in_yiq: - # image = distort_image_ops.random_hsv_in_yiq( - # image, lower_saturation=0.5, upper_saturation=1.5, - # max_delta_hue=0.2 * math.pi) - # else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - return image - - image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, - distort_fn_1) - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops based on the position of the image in a batch. + + Args: + image: float32 Tensor containing single image. Tensor values should be in + range [0, 1]. + batch_position: the position of the image in a batch. NOTE: this argument + can be an integer or a tensor + distort_color_in_yiq: distort color of input images in YIQ space. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + with tf.compat.v1.name_scope(scope or 'distort_color'): + + def distort_fn_0(image=image): + """Variant 0 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + return image + + def distort_fn_1(image=image): + """Variant 1 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + return image + + image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, + distort_fn_1) + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_splits, - dtype, - train, - distortions=False, - resize_method="bilinear", - shift_ratio=0, - summary_verbosity=1, - distort_color_in_yiq=False, - fuse_decode_and_crop=False): - self.height = height - self.width = width - self.batch_size = batch_size - self.num_splits = num_splits - self.dtype = dtype - self.train = train - self.resize_method = resize_method - self.shift_ratio = shift_ratio - self.distortions = distortions - self.distort_color_in_yiq = distort_color_in_yiq - self.fuse_decode_and_crop = fuse_decode_and_crop - if self.batch_size % self.num_splits != 0: - raise ValueError( - ('batch_size must be a multiple of num_splits: ' - 'batch_size %d, num_splits: %d') % - (self.batch_size, self.num_splits)) - self.batch_size_per_split = self.batch_size // self.num_splits - self.summary_verbosity = summary_verbosity - - def image_preprocess(self, image_buffer, bbox, batch_position): - """Preprocessing image_buffer as a function of its batch position.""" - if self.train: - image_buffer = tf.image.decode_jpeg( - image_buffer, channels=3, dct_method='INTEGER_FAST') - image = preprocess_for_train(image_buffer, self.height, self.width, bbox, - batch_position) - else: - image = tf.image.decode_jpeg( - image_buffer, channels=3, dct_method='INTEGER_FAST') - image = preprocess_for_eval(image, self.height, self.width) - return image - - def parse_and_preprocess(self, value, batch_position): - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.image_preprocess(image_buffer, bbox, batch_position) - return (label_index, image) - - def minibatch(self, dataset, subset, use_datasets, cache_data, - shift_ratio=-1): - if shift_ratio < 0: - shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): - # Build final results per split. - images = [[] for _ in range(self.num_splits)] - labels = [[] for _ in range(self.num_splits)] - if use_datasets: - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - ds = ds.apply( - interleave_ops.parallel_interleave( - tf.data.TFRecordDataset, cycle_length=10)) - if cache_data: - ds = ds.take(1).cache().repeat() - counter = tf.data.Dataset.range(self.batch_size) - counter = counter.repeat() - ds = tf.data.Dataset.zip((ds, counter)) - ds = ds.prefetch(buffer_size=self.batch_size) - ds = ds.shuffle(buffer_size=10000) - ds = ds.repeat() - ds = ds.apply( - batching.map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size_per_split, - num_parallel_batches=self.num_splits)) - ds = ds.prefetch(buffer_size=self.num_splits) - ds_iterator = ds.make_one_shot_iterator() - for d in xrange(self.num_splits): - labels[d], images[d] = ds_iterator.get_next() - - else: - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=301, - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - shift_ratio=shift_ratio, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for idx in xrange(self.batch_size): - value = records[idx] - (label, image) = self.parse_and_preprocess(value, idx) - split_index = idx % self.num_splits - labels[split_index].append(label) - images[split_index].append(image) - - for split_index in xrange(self.num_splits): - if not use_datasets: - images[split_index] = tf.parallel_stack(images[split_index]) - labels[split_index] = tf.concat(labels[split_index], 0) - images[split_index] = tf.cast(images[split_index], self.dtype) - depth = 3 - images[split_index] = tf.reshape( - images[split_index], - shape=[self.batch_size_per_split, self.height, self.width, depth]) - labels[split_index] = tf.reshape(labels[split_index], - [self.batch_size_per_split]) - return images, labels + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_splits, + dtype, + train, + distortions=False, + resize_method="bilinear", + shift_ratio=0, + summary_verbosity=1, + distort_color_in_yiq=False, + fuse_decode_and_crop=False): + self.height = height + self.width = width + self.batch_size = batch_size + self.num_splits = num_splits + self.dtype = dtype + self.train = train + self.resize_method = resize_method + self.shift_ratio = shift_ratio + self.distortions = distortions + self.distort_color_in_yiq = distort_color_in_yiq + self.fuse_decode_and_crop = fuse_decode_and_crop + if self.batch_size % self.num_splits != 0: + raise ValueError( + ('batch_size must be a multiple of num_splits: ' + 'batch_size %d, num_splits: %d') % + (self.batch_size, self.num_splits)) + self.batch_size_per_split = self.batch_size // self.num_splits + self.summary_verbosity = summary_verbosity + + def center_crop(self, img, init_h, init_w): + height, width, _ = img.shape + + left = int((width - init_w) // 2) + right = int((width + init_w) // 2) + top = int((height - init_h) // 2) + bottom = int((height + init_h) // 2) + + img = img[top: bottom, left: right] + + return img + + def image_preprocess(self, image_buffer, bbox, batch_position): + """Preprocessing image_buffer as a function of its batch position.""" + if self.train: + image_buffer = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + image = preprocess_for_train(image_buffer, self.height, self.width, bbox, + batch_position) + else: + image = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + + new_height = int(100. * self.height / 87.5) + new_width = int(100. * self.width / 87.5) + + if(self.height > self.width): + w = new_width + h = int(new_height * self.height / self.width) + else: + h = new_height + w = int(new_width * self.width / self.height) + + image = preprocess_for_eval(image, h, w) + image = self.center_crop(image, self.height, self.width) + + return image + + def parse_and_preprocess(self, value, batch_position): + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.image_preprocess(image_buffer, bbox, batch_position) + return (label_index, image) + + def minibatch(self, dataset, subset, use_datasets, cache_data, + shift_ratio=-1): + if shift_ratio < 0: + shift_ratio = self.shift_ratio + with tf.compat.v1.name_scope('batch_processing'): + # Build final results per split. + images = [[] for _ in range(self.num_splits)] + labels = [[] for _ in range(self.num_splits)] + if use_datasets: + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply( + interleave_ops.parallel_interleave( + tf.data.TFRecordDataset, cycle_length=10)) + if cache_data: + ds = ds.take(1).cache().repeat() + counter = tf.data.Dataset.range(self.batch_size) + counter = counter.repeat() + ds = tf.data.Dataset.zip((ds, counter)) + ds = ds.prefetch(buffer_size=self.batch_size) + ds = ds.repeat() + ds = ds.apply( + batching.map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size_per_split, + num_parallel_batches=self.num_splits)) + ds = ds.prefetch(buffer_size=self.num_splits) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + for d in xrange(self.num_splits): + labels[d], images[d] = ds_iterator.get_next() + + else: + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=301, + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + shift_ratio=shift_ratio, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for idx in xrange(self.batch_size): + value = records[idx] + (label, image) = self.parse_and_preprocess(value, idx) + split_index = idx % self.num_splits + labels[split_index].append(label) + images[split_index].append(image) + + for split_index in xrange(self.num_splits): + if not use_datasets: + images[split_index] = tf.parallel_stack(images[split_index]) + labels[split_index] = tf.concat(labels[split_index], 0) + images[split_index] = tf.cast(images[split_index], self.dtype) + depth = 3 + images[split_index] = tf.reshape( + images[split_index], + shape=[self.batch_size_per_split, self.height, self.width, depth]) + labels[split_index] = tf.reshape(labels[split_index], + [self.batch_size_per_split]) + return images, labels + diff --git a/models/image_recognition/tensorflow/resnet101/inference/__init__.py b/models/image_recognition/tensorflow/resnet101/inference/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/__init__.py +++ b/models/image_recognition/tensorflow/resnet101/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet101/inference/datasets.py b/models/image_recognition/tensorflow/resnet101/inference/datasets.py index a12d64f53..098dddcb9 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/datasets.py +++ b/models/image_recognition/tensorflow/resnet101/inference/datasets.py @@ -47,49 +47,48 @@ IMAGENET_NUM_VAL_IMAGES = 50000 IMAGENET_NUM_CLASSES = 1000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, data_dir=None): - self.name = name - if data_dir is None: - raise ValueError('Data directory not specified') - self.data_dir = data_dir + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @abstractmethod - def num_classes(self): - pass + @abstractmethod + def num_classes(self): + pass - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name class ImagenetData(Dataset): - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('ImageNet', data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) - def num_classes(self): - return IMAGENET_NUM_CLASSES + def num_classes(self): + return IMAGENET_NUM_CLASSES - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py index ecac7999e..225c94e63 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py @@ -22,217 +22,215 @@ from argparse import ArgumentParser import tensorflow as tf -try: - import tensorflow.tools.graph_transforms as graph_transforms -except: - import tensorflow_core.tools.graph_transforms as graph_transforms +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes import datasets INPUTS = 'input' OUTPUTS = 'resnet_v1_101/predictions/Reshape_1' -OPTIMIZATION = 'strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' RESNET_IMAGE_SIZE = 224 IMAGENET_VALIDATION_IMAGES = 50000 class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph""" - - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--num-inter-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--num-intra-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-m', "--model-name", - help='Specify the model name to run benchmark for', - dest='model_name') - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - - arg_parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - arg_parser.add_argument("--steps", type=int, default=50, - help="number of steps") - arg_parser.add_argument( - '--data-num-inter-threads', dest='data_num_inter_threads', - help='number threads across operators', - type=int, default=16) - arg_parser.add_argument( - '--data-num-intra-threads', dest='data_num_intra_threads', - help='number threads for data layer operator', - type=int, default=14) - # parse the arguments - self.args = arg_parser.parse_args() - # validate the arguements - self.validate_args() - - def run(self): - """run benchmark with optimized graph""" - - print("Run inference") - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads - data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = self.args.num_intra_threads - infer_config.inter_op_parallelism_threads = self.args.num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if (self.args.data_location): - print("Inference with real data.") - dataset = datasets.ImagenetData(self.args.data_location) - preprocessor = dataset.get_image_preprocessor()( - RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, - intra_threads=self.args.num_intra_threads, - resize_method='crop') - images, labels = preprocessor.minibatch(dataset, subset='validation') - else: - print("Inference with dummy data.") - input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] - images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - # convert the freezed graph to optimized graph - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: - input_graph_content = input_file.read() - graph_def.ParseFromString(input_graph_content) - - output_graph = graph_transforms.TransformGraph(graph_def, - [INPUTS], [OUTPUTS], [OPTIMIZATION]) - tf.import_graph_def(output_graph, name='') - - # Definite input and output Tensors for detection_graph - input_tensor = infer_graph.get_tensor_by_name('input:0') - # output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/SpatialSqueeze:0') - output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/predictions/Reshape_1:0') - - # tf.global_variables_initializer() - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - num_processed_images = 0 - num_remaining_images = IMAGENET_VALIDATION_IMAGES - - if (not self.args.accuracy_only): # performance check - iteration = 0 - warm_up_iteration = self.args.warmup_steps - total_run = self.args.steps - total_time = 0 - # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - # run_metadata = tf.RunMetadata() - - while num_remaining_images >= self.args.batch_size and iteration < total_run: - iteration += 1 - - # Reads and preprocess data - data_load_start = time.time() - image_np = data_sess.run(images) - data_load_time = time.time() - data_load_start - - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - infer_sess.run([output_tensor], feed_dict={input_tensor: image_np}) - time_consume = time.time() - start_time - - # only add data loading time for real data, not for dummy data - if self.args.data_location: - time_consume += data_load_time - - # trace = timeline.Timeline(step_stats=run_metadata.step_stats) - # with gfile.Open('resnet101_fp32_int8_master', 'w') as trace_file: - # trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) - - print('Iteration %d: %.3f sec' % (iteration, time_consume)) - if iteration > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (iteration - warm_up_iteration) - print('Average time: %.3f sec' % (time_average)) - - print('Batch size = %d' % self.args.batch_size) - if (self.args.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - # print throughput for both batch size 1 and 128 - print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) - - else: # accuracy check - total_accuracy1, total_accuracy5 = (0.0, 0.0) - - while num_remaining_images >= self.args.batch_size: - # Reads and preprocess data - np_images, np_labels = data_sess.run([images, labels]) - np_labels -= 1 - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - # Compute inference on the preprocessed data - predictions = infer_sess.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - with tf.Graph().as_default() as accu_graph: - # Putting all code within this make things faster. - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - with tf.Session() as accu_sess: - np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - - def validate_args(self): - """validate the arguments""" - - if not self.args.data_location: - if self.args.accuracy_only: - raise ValueError("You must use real data for accuracy measurement.") + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-m', "--model-name", + help='Specify the model name to run benchmark for', + dest='model_name') + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + arg_parser.add_argument( + '--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=16) + arg_parser.add_argument( + '--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=14) + # parse the arguments + self.args = arg_parser.parse_args() + # validate the arguements + self.validate_args() + + def run(self): + """run benchmark with optimized graph""" + + print("Run inference") + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if (self.args.data_location): + print("Inference with real data.") + dataset = datasets.ImagenetData(self.args.data_location) + preprocessor = dataset.get_image_preprocessor()( + RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, + intra_threads=self.args.num_intra_threads, + resize_method='crop') + images, labels = preprocessor.minibatch(dataset, subset='validation') + else: + print("Inference with dummy data.") + input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] + images = tf.random.uniform(input_shape, 0.0, 255.0,dtype=tf.float32,name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + # convert the freezed graph to optimized graph + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + output_graph = optimize_for_inference(graph_def, [INPUTS], + [OUTPUTS], dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name='') + + # Definite input and output Tensors for detection_graph + input_tensor = infer_graph.get_tensor_by_name('input:0') + #output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/SpatialSqueeze:0') + output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/predictions/Reshape_1:0') + + #tf.global_variables_initializer() + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + num_processed_images = 0 + num_remaining_images = IMAGENET_VALIDATION_IMAGES + + if (not self.args.accuracy_only): # performance check + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + total_time = 0 + #options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + #run_metadata = tf.RunMetadata() + + while num_remaining_images >= self.args.batch_size and iteration < total_run: + iteration += 1 + + # Reads and preprocess data + data_load_start = time.time() + image_np = data_sess.run(images) + data_load_time = time.time() - data_load_start + + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + infer_sess.run([output_tensor], feed_dict={input_tensor: image_np}) + time_consume = time.time() - start_time + + # only add data loading time for real data, not for dummy data + if self.args.data_location: + time_consume += data_load_time + + #trace = timeline.Timeline(step_stats=run_metadata.step_stats) + #with gfile.Open('resnet101_fp32_int8_master', 'w') as trace_file: + # trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) + + print('Iteration %d: %.3f sec' % (iteration, time_consume)) + if iteration > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (iteration - warm_up_iteration) + print('Average time: %.3f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + # print throughput for both batch size 1 and 128 + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + + else: # accuracy check + total_accuracy1, total_accuracy5 = (0.0, 0.0) + + + while num_remaining_images >= self.args.batch_size: + # Reads and preprocess data + np_images, np_labels = data_sess.run([images, labels]) + np_labels -= 1 + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + # Compute inference on the preprocessed data + predictions = infer_sess.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + with tf.Graph().as_default() as accu_graph: + # Putting all code within this make things faster. + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + with tf.compat.v1.Session() as accu_sess: + np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) + + def validate_args(self): + """validate the arguments""" + + if not self.args.data_location: + if self.args.accuracy_only: + raise ValueError("You must use real data for accuracy measurement.") if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py b/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py index 612f88f62..7a81358fa 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py @@ -43,137 +43,136 @@ from tensorflow.python.platform import gfile import vgg_preprocessing - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1) - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1) + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - return distorted_image - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + return distorted_image + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - intra_threads, - resize_method="bilinear"): - - self.height = height - self.width = width - self.batch_size = batch_size - self.intra_threads = intra_threads - self.resize_method = resize_method - # parallel number of files and tfrecords - # file_dict = {1: 8, 2: 8, 4: 8, 8: 8, 16: 20, 32: 20, 64: 28} - # record_dict = {1: 150, 2: 150, 4: 150, 8: 150, 16: 10, 32: 10, 64: 5} - # self.num_files = file_dict.get(self.batch_size, 10) # default is 10 - # self.num_records = record_dict.get(self.batch_size, 2) # default is 2 - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = vgg_preprocessing.preprocess_image(image, 224, 224, False) - - return (image, label_index) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - # number of parallel open files and tfrecords should be tuned according to - # different batch size - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=28, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - # ds = ds.prefetch(buffer_size=self.batch_size) - - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=56, - num_parallel_calls=None)) # this number should be tuned - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned - - ds_iterator = ds.make_one_shot_iterator() - images, labels = ds_iterator.get_next() - # reshape - labels = tf.reshape(labels, [self.batch_size]) - - return images, labels + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + intra_threads, + resize_method="bilinear"): + + self.height = height + self.width = width + self.batch_size = batch_size + self.intra_threads = intra_threads + self.resize_method = resize_method + # parallel number of files and tfrecords + # file_dict = {1: 8, 2: 8, 4: 8, 8: 8, 16: 20, 32: 20, 64: 28} + # record_dict = {1: 150, 2: 150, 4: 150, 8: 150, 16: 10, 32: 10, 64: 5} + # self.num_files = file_dict.get(self.batch_size, 10) # default is 10 + # self.num_records = record_dict.get(self.batch_size, 2) # default is 2 + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = vgg_preprocessing.preprocess_image(image,224,224,False) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + # number of parallel open files and tfrecords should be tuned according to + # different batch size + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=28, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + #ds = ds.prefetch(buffer_size=self.batch_size) + + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=56, + num_parallel_calls=None)) # this number should be tuned + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, labels = ds_iterator.get_next() + # reshape + labels = tf.reshape(labels, [self.batch_size]) + + return images, labels diff --git a/models/image_recognition/tensorflow/resnet101/inference/vgg_preprocessing.py b/models/image_recognition/tensorflow/resnet101/inference/vgg_preprocessing.py index b3b4908a1..96957d9e0 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/vgg_preprocessing.py +++ b/models/image_recognition/tensorflow/resnet101/inference/vgg_preprocessing.py @@ -40,8 +40,6 @@ import tensorflow as tf -slim = tf.contrib.slim - _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 @@ -51,244 +49,244 @@ def _crop(image, offset_height, offset_width, crop_height, crop_width): - """Crops the given image using the provided offsets and sizes. + """Crops the given image using the provided offsets and sizes. - Note that the method doesn't assume we know the input image size but it does - assume we know the input image rank. + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. - Args: - image: an image of shape [height, width, channels]. - offset_height: a scalar tensor indicating the height offset. - offset_width: a scalar tensor indicating the width offset. - crop_height: the height of the cropped image. - crop_width: the width of the cropped image. + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. - Returns: - the cropped (and resized) image. + Returns: + the cropped (and resized) image. - Raises: - InvalidArgumentError: if the rank is not 3 or if the image dimensions are - less than the crop size. - """ - original_shape = tf.shape(image) + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(input=image) - rank_assertion = tf.Assert( - tf.equal(tf.rank(image), 3), - ['Rank of image must be equal to 3.']) - with tf.control_dependencies([rank_assertion]): - cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + with tf.control_dependencies([rank_assertion]): + cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) - size_assertion = tf.Assert( - tf.logical_and( - tf.greater_equal(original_shape[0], crop_height), - tf.greater_equal(original_shape[1], crop_width)), - ['Crop size greater than the image size.']) + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) - offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), dtype=tf.int32) - # Use tf.slice instead of crop_to_bounding box as it accepts tensors to - # define the crop size. - with tf.control_dependencies([size_assertion]): - image = tf.slice(image, offsets, cropped_shape) - return tf.reshape(image, cropped_shape) + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + with tf.control_dependencies([size_assertion]): + image = tf.slice(image, offsets, cropped_shape) + return tf.reshape(image, cropped_shape) def _random_crop(image_list, crop_height, crop_width): - """Crops the given list of images. - - The function applies the same crop to each image in the list. This can be - effectively applied when there are multiple image inputs of the same - dimension such as: - - image, depths, normals = _random_crop([image, depths, normals], 120, 150) - - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the new height. - crop_width: the new width. - - Returns: - the image_list with cropped images. - - Raises: - ValueError: if there are multiple image inputs provided with different size - or the images are smaller than the crop dimensions. - """ - if not image_list: - raise ValueError('Empty image_list.') - - # Compute the rank assertions. - rank_assertions = [] - for i in range(len(image_list)): - image_rank = tf.rank(image_list[i]) - rank_assert = tf.Assert( - tf.equal(image_rank, 3), - ['Wrong rank for tensor %s [expected] [actual]', - image_list[i].name, 3, image_rank]) - rank_assertions.append(rank_assert) - - with tf.control_dependencies([rank_assertions[0]]): - image_shape = tf.shape(image_list[0]) - image_height = image_shape[0] - image_width = image_shape[1] - crop_size_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(image_height, crop_height), - tf.greater_equal(image_width, crop_width)), - ['Crop size greater than the image size.']) - - asserts = [rank_assertions[0], crop_size_assert] - - for i in range(1, len(image_list)): - image = image_list[i] - asserts.append(rank_assertions[i]) - with tf.control_dependencies([rank_assertions[i]]): - shape = tf.shape(image) - height = shape[0] - width = shape[1] - - height_assert = tf.Assert( - tf.equal(height, image_height), - ['Wrong height for tensor %s [expected][actual]', - image.name, height, image_height]) - width_assert = tf.Assert( - tf.equal(width, image_width), - ['Wrong width for tensor %s [expected][actual]', - image.name, width, image_width]) - asserts.extend([height_assert, width_assert]) - - # Create a random bounding box. - # - # Use tf.random_uniform and not numpy.random.rand as doing the former would - # generate random numbers at graph eval time, unlike the latter which - # generates random numbers at graph definition time. - with tf.control_dependencies(asserts): - max_offset_height = tf.reshape(image_height - crop_height + 1, []) - with tf.control_dependencies(asserts): - max_offset_width = tf.reshape(image_width - crop_width + 1, []) - offset_height = tf.random_uniform( - [], maxval=max_offset_height, dtype=tf.int32) - offset_width = tf.random_uniform( - [], maxval=max_offset_width, dtype=tf.int32) - - return [_crop(image, offset_height, offset_width, - crop_height, crop_width) for image in image_list] + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + with tf.control_dependencies([rank_assertions[0]]): + image_shape = tf.shape(input=image_list[0]) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + with tf.control_dependencies([rank_assertions[i]]): + shape = tf.shape(input=image) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + with tf.control_dependencies(asserts): + max_offset_height = tf.reshape(image_height - crop_height + 1, []) + with tf.control_dependencies(asserts): + max_offset_width = tf.reshape(image_width - crop_width + 1, []) + offset_height = tf.random.uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random.uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] def _central_crop(image_list, crop_height, crop_width): - """Performs central crops of the given image list. + """Performs central crops of the given image list. - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the height of the image following the crop. - crop_width: the width of the image following the crop. + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. - Returns: - the list of cropped images. - """ - outputs = [] - for image in image_list: - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(input=image)[0] + image_width = tf.shape(input=image)[1] - offset_height = (image_height - crop_height) / 2 - offset_width = (image_width - crop_width) / 2 + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 - outputs.append(_crop(image, offset_height, offset_width, - crop_height, crop_width)) - return outputs + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs def _mean_image_subtraction(image, means): - """Subtracts the given means from each image channel. + """Subtracts the given means from each image channel. - For example: - means = [123.68, 116.779, 103.939] - image = _mean_image_subtraction(image, means) + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) - Note that the rank of `image` must be known. + Note that the rank of `image` must be known. - Args: - image: a tensor of size [height, width, C]. - means: a C-vector of values to subtract from each channel. + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. - Returns: - the centered image. + Returns: + the centered image. - Raises: - ValueError: If the rank of `image` is unknown, if `image` has a rank other - than three or if the number of channels in `image` doesn't match the - number of values in `means`. - """ - if image.get_shape().ndims != 3: - raise ValueError('Input must be of size [height, width, C>0]') - num_channels = image.get_shape().as_list()[-1] - if len(means) != num_channels: - raise ValueError('len(means) must match the number of channels') + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') - channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) - for i in range(num_channels): - channels[i] -= means[i] - return tf.concat(axis=2, values=channels) + channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(axis=2, values=channels) def _smallest_size_at_least(height, width, smallest_side): - """Computes new shape with the smallest side equal to `smallest_side`. + """Computes new shape with the smallest side equal to `smallest_side`. - Computes new shape with the smallest side equal to `smallest_side` while - preserving the original aspect ratio. + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. - Args: - height: an int32 scalar tensor indicating the current height. - width: an int32 scalar tensor indicating the current width. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. - Returns: - new_height: an int32 scalar tensor indicating the new height. - new_width: and int32 scalar tensor indicating the new width. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) - height = tf.to_float(height) - width = tf.to_float(width) - smallest_side = tf.to_float(smallest_side) + height = tf.cast(height, dtype=tf.float32) + width = tf.cast(width, dtype=tf.float32) + smallest_side = tf.cast(smallest_side, dtype=tf.float32) - scale = tf.cond(tf.greater(height, width), - lambda: smallest_side / width, - lambda: smallest_side / height) - new_height = tf.to_int32(tf.rint(height * scale)) - new_width = tf.to_int32(tf.rint(width * scale)) - return new_height, new_width + scale = tf.cond(pred=tf.greater(height, width), + true_fn=lambda: smallest_side / width, + false_fn=lambda: smallest_side / height) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) + return new_height, new_width def _aspect_preserving_resize(image, smallest_side): - """Resize images preserving the original aspect ratio. - - Args: - image: A 3-D image `Tensor`. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. - - Returns: - resized_image: A 3-D tensor containing the resized image. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) - # import pdb - # pdb.set_trace() - shape = tf.shape(image) - - height = shape[0] - width = shape[1] - new_height, new_width = _smallest_size_at_least(height, width, smallest_side) - image = tf.expand_dims(image, 0) - resized_image = tf.image.resize_bilinear(image, [new_height, new_width], - align_corners=False) - resized_image = tf.squeeze(resized_image) - resized_image.set_shape([None, None, 3]) - return resized_image + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) + #import pdb + #pdb.set_trace() + shape = tf.shape(input=image) + + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, smallest_side) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize(image, [new_height, new_width], + method=tf.image.ResizeMethod.BILINEAR) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image def preprocess_for_train(image, @@ -296,78 +294,78 @@ def preprocess_for_train(image, output_width, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image for training. - - Note that the actual resizing scale is sampled from - [`resize_size_min`, `resize_size_max`]. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. - - Returns: - A preprocessed image. - """ - resize_side = tf.random_uniform( - [], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32) - - image = _aspect_preserving_resize(image, resize_side) - image = _random_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - image = tf.image.random_flip_left_right(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + resize_side = tf.random.uniform( + [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) + + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) + image = tf.image.random_flip_left_right(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) def preprocess_for_eval(image, output_height, output_width, resize_side): - """Preprocesses the given image for evaluation. + """Preprocesses the given image for evaluation. - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side: The smallest side of the image for aspect-preserving resizing. + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side: The smallest side of the image for aspect-preserving resizing. - Returns: - A preprocessed image. - """ - image = _aspect_preserving_resize(image, resize_side) - image = _central_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, resize_side) + image = _central_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) def preprocess_image(image, output_height, output_width, is_training=False, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, then this value - is used for rescaling. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, this value is - ignored. Otherwise, the resize side is sampled from - [resize_size_min, resize_size_max]. - - Returns: - A preprocessed image. - """ - if is_training: - return preprocess_for_train(image, output_height, output_width, - resize_side_min, resize_side_max) - else: - return preprocess_for_eval(image, output_height, output_width, - resize_side_min) + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) diff --git a/models/image_recognition/tensorflow/resnet101/int8/calibration.py b/models/image_recognition/tensorflow/resnet101/int8/calibration.py index 706760998..0808ddccc 100644 --- a/models/image_recognition/tensorflow/resnet101/int8/calibration.py +++ b/models/image_recognition/tensorflow/resnet101/int8/calibration.py @@ -51,126 +51,124 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') - tf.train.write_graph(graph_def, '/tmp/', 'optimized_graph.pb', as_text=False) + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + tf.io.write_graph(graph_def, '/tmp/', 'optimized_graph.pb',as_text=False) + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=None, - type=int, help="input height") - parser.add_argument("--input_width", default=None, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="resnet_v1_101/SpatialSqueeze", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = preprocessing.ImagePreprocessor( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='crop') - images, labels = preprocessor.minibatch(dataset, subset='train') - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = 5000 - top1 = 0 - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels = sess.run([images[0], labels[0]]) - np_labels -= 1 - # print(np_labels.shape) - num_processed_images += batch_size - num_remaining_images -= batch_size - # Compute inference on the preprocessed data - predictions1 = sess_graph.run(output_tensor, - {input_tensor: np_images}) - # predictions = predictions +1 - # print(predictions1) - predictions2 = tf.argmax(predictions1, axis=1) - predictions = sess.run(predictions2) - top1 += batch_size - (np.count_nonzero(predictions - np_labels)) - # print(top1/num_processed_images) - # print(num_processed_images) - # print(predictions) - # accuracy1 = tf.reduce_sum( - # tf.nn.in_top_k(tf.cast(tf.Variable(predictions2), tf.float32), - # tf.cast((tf.constant(np_labels), 1), tf.float32))) - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions1), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions1), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - - # print(labels) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=None, + type=int, help="input height") + parser.add_argument("--input_width", default=None, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="resnet_v1_101/SpatialSqueeze", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = preprocessing.ImagePreprocessor( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='crop') + images, labels = preprocessor.minibatch(dataset, subset='train') + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = 5000 + top1 = 0 + with tf.compat.v1.Session() as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + np_labels -= 1 + #print(np_labels.shape) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions1 = sess_graph.run(output_tensor, + {input_tensor: np_images}) + #predictions = predictions +1 + #print(predictions1) + predictions2 = tf.argmax(input=predictions1, axis=1) + predictions = sess.run(predictions2) + top1 += batch_size - (np.count_nonzero(predictions - np_labels)) + #print(top1/num_processed_images) + #print(num_processed_images) + #print(predictions) + #accuracy1 = tf.reduce_sum( + # tf.nn.in_top_k(tf.cast(tf.Variable(predictions2), tf.float32), + # tf.cast((tf.constant(np_labels), 1), tf.float32))) + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions1), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions1), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + + ##print(labels) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/resnet101/int8/cnn_util.py b/models/image_recognition/tensorflow/resnet101/int8/cnn_util.py index cd1f0d4d1..b5639df6f 100644 --- a/models/image_recognition/tensorflow/resnet101/int8/cnn_util.py +++ b/models/image_recognition/tensorflow/resnet101/int8/cnn_util.py @@ -40,11 +40,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/resnet101/int8/datasets.py b/models/image_recognition/tensorflow/resnet101/int8/datasets.py index cbde33a52..8dd5a8690 100644 --- a/models/image_recognition/tensorflow/resnet101/int8/datasets.py +++ b/models/image_recognition/tensorflow/resnet101/int8/datasets.py @@ -44,61 +44,61 @@ class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, data_dir=None): - self.name = name - if data_dir is None: - raise ValueError('Data directory not specified') - self.data_dir = data_dir + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @abstractmethod - def num_classes(self): - pass + @abstractmethod + def num_classes(self): + pass - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name class FlowersData(Dataset): - def __init__(self, data_dir=None): - super(FlowersData, self).__init__('Flowers', data_dir) + def __init__(self, data_dir=None): + super(FlowersData, self).__init__('Flowers', data_dir) - def num_classes(self): - return 5 + def num_classes(self): + return 5 - def num_examples_per_epoch(self, subset): - if subset == 'train': - return 3170 - elif subset == 'validation': - return 500 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset): + if subset == 'train': + return 3170 + elif subset == 'validation': + return 500 + else: + raise ValueError('Invalid data subset "%s"' % subset) class ImagenetData(Dataset): - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('ImageNet', data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) - def num_classes(self): - return 1000 + def num_classes(self): + return 1000 - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 1281167 - elif subset == 'validation': - return 50000 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 1281167 + elif subset == 'validation': + return 50000 + else: + raise ValueError('Invalid data subset "%s"' % subset) diff --git a/models/image_recognition/tensorflow/resnet101/int8/preprocessing.py b/models/image_recognition/tensorflow/resnet101/int8/preprocessing.py index 78c83653e..1e8e6559a 100644 --- a/models/image_recognition/tensorflow/resnet101/int8/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet101/int8/preprocessing.py @@ -43,381 +43,378 @@ from tensorflow.python.ops import data_flow_ops import cnn_util - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3, - fancy_upscaling=False, - dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image def eval_image(image, height, width, bbox, thread_id, resize): - """Get the image for model evaluation.""" - with tf.name_scope('eval_image'): - if not thread_id: - tf.summary.image( - 'original_image', tf.expand_dims(image, 0)) - - if resize == 'crop': - # Note: This is much slower than crop_to_bounding_box - # It seems that the redundant pad step has huge overhead - # distorted_image = tf.image.resize_image_with_crop_or_pad(image, - # height, width) - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, tf.convert_to_tensor( - [256, 256 * shape[1] / shape[0]], dtype=tf.int32)), - lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32))) - shape = tf.shape(image) - - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) - # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) - # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, - width) - else: - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.5, - aspect_ratio_range=[0.90, 1.10], - area_range=[0.10, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, _ = sample_distorted_bounding_box - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - resize_method = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - }[resize] - # This resizing operation may distort the images because the aspect - # ratio is not respected. - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], - resize_method, - align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) - image = distorted_image - return image + """Get the image for model evaluation.""" + with tf.compat.v1.name_scope('eval_image'): + if not thread_id: + tf.compat.v1.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256, 256*shape[1]/shape[0]], dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(input=image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], + resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image def distort_image(image, height, width, bbox, thread_id=0, scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D float Tensor of image - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - thread_id: integer indicating the preprocessing thread. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor of distorted image used for training. - """ - # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): - # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - with tf.name_scope(scope or 'distort_image'): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - # Display the bounding box in the first thread only. - if not thread_id: - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image( - 'image_with_bounding_boxes', image_with_box) - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an allowed - # range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.99, 1.01], - area_range=[0.05, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if not thread_id: - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image( - 'images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - resize_method = thread_id % 4 - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], resize_method, align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, thread_id) - - # Note: This ensures the scaling matches the output of eval_image - distorted_image *= 256 - - if not thread_id: - tf.summary.image( - 'final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.compat.v1.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.compat.v1.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.compat.v1.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image def distort_color(image, thread_id=0, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: Tensor containing single image. - thread_id: preprocessing thread ID. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - # with tf.op_scope([image], scope, 'distort_color'): - # with tf.name_scope(scope, 'distort_color', [image]): - with tf.name_scope(scope or 'distort_color'): - color_ordering = thread_id % 2 - - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class ImagePreprocessor(object): - """Preprocessor for input images.""" - - def __init__(self, - height, - width, - batch_size, - device_count, - dtype=tf.float32, - train=True, - distortions=None, - resize_method=None): - self.height = height - self.width = width - self.batch_size = batch_size - self.device_count = device_count - self.dtype = dtype - self.train = train - self.resize_method = resize_method - if distortions is None: - distortions = False - self.distortions = distortions - if self.batch_size % self.device_count != 0: - raise ValueError( - ('batch_size must be a multiple of device_count: ' - 'batch_size %d, device_count: %d') % - (self.batch_size, self.device_count)) - self.batch_size_per_device = self.batch_size // self.device_count - - def preprocess(self, image_buffer, bbox, thread_id): - """Preprocessing image_buffer using thread_id.""" - # Note: Width and height of image is known only at runtime. - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - if self.train and self.distortions: - image = distort_image(image, self.height, self.width, bbox, thread_id) - else: - # image = eval_image(image, self.height, self.width, bbox, thread_id, - # self.resize_method) - image = vgg_preprocessing.preprocess_image(image, 224, 224, False) - # Note: image is now float32 [height,width,3] with range [0, 255] - - # image = tf.cast(image, tf.uint8) # HACK TESTING - - return image - - def minibatch(self, dataset, subset): - with tf.name_scope('batch_processing'): - images = [[] for i in range(self.device_count)] - labels = [[] for i in range(self.device_count)] - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=randint(0, 9000), - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for i in xrange(self.batch_size): - value = records[i] - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.preprocess(image_buffer, bbox, i % 4) - - device_index = i % self.device_count - images[device_index].append(image) - labels[device_index].append(label_index) - label_index_batch = [None] * self.device_count - for device_index in xrange(self.device_count): - images[device_index] = tf.parallel_stack(images[device_index]) - label_index_batch[device_index] = tf.concat(labels[device_index], 0) - - # dynamic_pad=True) # HACK TESTING dynamic_pad=True - images[device_index] = tf.cast(images[device_index], self.dtype) - depth = 3 - images[device_index] = tf.reshape( - images[device_index], - shape=[self.batch_size_per_device, self.height, self.width, depth]) - label_index_batch[device_index] = tf.reshape( - label_index_batch[device_index], [self.batch_size_per_device]) - # Display the training images in the visualizer. - # tf.summary.image('images', images) - - return images, label_index_batch + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + #image = eval_image(image, self.height, self.width, bbox, thread_id, + # self.resize_method) + image = vgg_preprocessing.preprocess_image(image,224,224,False) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.compat.v1.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch diff --git a/models/image_recognition/tensorflow/resnet101/int8/vgg_preprocessing.py b/models/image_recognition/tensorflow/resnet101/int8/vgg_preprocessing.py index e9009a0ec..96a9920da 100644 --- a/models/image_recognition/tensorflow/resnet101/int8/vgg_preprocessing.py +++ b/models/image_recognition/tensorflow/resnet101/int8/vgg_preprocessing.py @@ -55,8 +55,6 @@ import tensorflow as tf -slim = tf.contrib.slim - _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 @@ -66,244 +64,244 @@ def _crop(image, offset_height, offset_width, crop_height, crop_width): - """Crops the given image using the provided offsets and sizes. + """Crops the given image using the provided offsets and sizes. - Note that the method doesn't assume we know the input image size but it does - assume we know the input image rank. + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. - Args: - image: an image of shape [height, width, channels]. - offset_height: a scalar tensor indicating the height offset. - offset_width: a scalar tensor indicating the width offset. - crop_height: the height of the cropped image. - crop_width: the width of the cropped image. + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. - Returns: - the cropped (and resized) image. + Returns: + the cropped (and resized) image. - Raises: - InvalidArgumentError: if the rank is not 3 or if the image dimensions are - less than the crop size. - """ - original_shape = tf.shape(image) + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(input=image) - rank_assertion = tf.Assert( - tf.equal(tf.rank(image), 3), - ['Rank of image must be equal to 3.']) - with tf.control_dependencies([rank_assertion]): - cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + with tf.control_dependencies([rank_assertion]): + cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) - size_assertion = tf.Assert( - tf.logical_and( - tf.greater_equal(original_shape[0], crop_height), - tf.greater_equal(original_shape[1], crop_width)), - ['Crop size greater than the image size.']) + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) - offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), dtype=tf.int32) - # Use tf.slice instead of crop_to_bounding box as it accepts tensors to - # define the crop size. - with tf.control_dependencies([size_assertion]): - image = tf.slice(image, offsets, cropped_shape) - return tf.reshape(image, cropped_shape) + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + with tf.control_dependencies([size_assertion]): + image = tf.slice(image, offsets, cropped_shape) + return tf.reshape(image, cropped_shape) def _random_crop(image_list, crop_height, crop_width): - """Crops the given list of images. - - The function applies the same crop to each image in the list. This can be - effectively applied when there are multiple image inputs of the same - dimension such as: - - image, depths, normals = _random_crop([image, depths, normals], 120, 150) - - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the new height. - crop_width: the new width. - - Returns: - the image_list with cropped images. - - Raises: - ValueError: if there are multiple image inputs provided with different size - or the images are smaller than the crop dimensions. - """ - if not image_list: - raise ValueError('Empty image_list.') - - # Compute the rank assertions. - rank_assertions = [] - for i in range(len(image_list)): - image_rank = tf.rank(image_list[i]) - rank_assert = tf.Assert( - tf.equal(image_rank, 3), - ['Wrong rank for tensor %s [expected] [actual]', - image_list[i].name, 3, image_rank]) - rank_assertions.append(rank_assert) - - with tf.control_dependencies([rank_assertions[0]]): - image_shape = tf.shape(image_list[0]) - image_height = image_shape[0] - image_width = image_shape[1] - crop_size_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(image_height, crop_height), - tf.greater_equal(image_width, crop_width)), - ['Crop size greater than the image size.']) - - asserts = [rank_assertions[0], crop_size_assert] - - for i in range(1, len(image_list)): - image = image_list[i] - asserts.append(rank_assertions[i]) - with tf.control_dependencies([rank_assertions[i]]): - shape = tf.shape(image) - height = shape[0] - width = shape[1] - - height_assert = tf.Assert( - tf.equal(height, image_height), - ['Wrong height for tensor %s [expected][actual]', - image.name, height, image_height]) - width_assert = tf.Assert( - tf.equal(width, image_width), - ['Wrong width for tensor %s [expected][actual]', - image.name, width, image_width]) - asserts.extend([height_assert, width_assert]) - - # Create a random bounding box. - # - # Use tf.random_uniform and not numpy.random.rand as doing the former would - # generate random numbers at graph eval time, unlike the latter which - # generates random numbers at graph definition time. - with tf.control_dependencies(asserts): - max_offset_height = tf.reshape(image_height - crop_height + 1, []) - with tf.control_dependencies(asserts): - max_offset_width = tf.reshape(image_width - crop_width + 1, []) - offset_height = tf.random_uniform( - [], maxval=max_offset_height, dtype=tf.int32) - offset_width = tf.random_uniform( - [], maxval=max_offset_width, dtype=tf.int32) - - return [_crop(image, offset_height, offset_width, - crop_height, crop_width) for image in image_list] + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + with tf.control_dependencies([rank_assertions[0]]): + image_shape = tf.shape(input=image_list[0]) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + with tf.control_dependencies([rank_assertions[i]]): + shape = tf.shape(input=image) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + with tf.control_dependencies(asserts): + max_offset_height = tf.reshape(image_height - crop_height + 1, []) + with tf.control_dependencies(asserts): + max_offset_width = tf.reshape(image_width - crop_width + 1, []) + offset_height = tf.random.uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random.uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] def _central_crop(image_list, crop_height, crop_width): - """Performs central crops of the given image list. + """Performs central crops of the given image list. - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the height of the image following the crop. - crop_width: the width of the image following the crop. + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. - Returns: - the list of cropped images. - """ - outputs = [] - for image in image_list: - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(input=image)[0] + image_width = tf.shape(input=image)[1] - offset_height = (image_height - crop_height) / 2 - offset_width = (image_width - crop_width) / 2 + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 - outputs.append(_crop(image, offset_height, offset_width, - crop_height, crop_width)) - return outputs + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs def _mean_image_subtraction(image, means): - """Subtracts the given means from each image channel. + """Subtracts the given means from each image channel. - For example: - means = [123.68, 116.779, 103.939] - image = _mean_image_subtraction(image, means) + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) - Note that the rank of `image` must be known. + Note that the rank of `image` must be known. - Args: - image: a tensor of size [height, width, C]. - means: a C-vector of values to subtract from each channel. + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. - Returns: - the centered image. + Returns: + the centered image. - Raises: - ValueError: If the rank of `image` is unknown, if `image` has a rank other - than three or if the number of channels in `image` doesn't match the - number of values in `means`. - """ - if image.get_shape().ndims != 3: - raise ValueError('Input must be of size [height, width, C>0]') - num_channels = image.get_shape().as_list()[-1] - if len(means) != num_channels: - raise ValueError('len(means) must match the number of channels') + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') - channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) - for i in range(num_channels): - channels[i] -= means[i] - return tf.concat(axis=2, values=channels) + channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(axis=2, values=channels) def _smallest_size_at_least(height, width, smallest_side): - """Computes new shape with the smallest side equal to `smallest_side`. + """Computes new shape with the smallest side equal to `smallest_side`. - Computes new shape with the smallest side equal to `smallest_side` while - preserving the original aspect ratio. + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. - Args: - height: an int32 scalar tensor indicating the current height. - width: an int32 scalar tensor indicating the current width. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. - Returns: - new_height: an int32 scalar tensor indicating the new height. - new_width: and int32 scalar tensor indicating the new width. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) - height = tf.to_float(height) - width = tf.to_float(width) - smallest_side = tf.to_float(smallest_side) + height = tf.cast(height, dtype=tf.float32) + width = tf.cast(width, dtype=tf.float32) + smallest_side = tf.cast(smallest_side, dtype=tf.float32) - scale = tf.cond(tf.greater(height, width), - lambda: smallest_side / width, - lambda: smallest_side / height) - new_height = tf.to_int32(tf.rint(height * scale)) - new_width = tf.to_int32(tf.rint(width * scale)) - return new_height, new_width + scale = tf.cond(pred=tf.greater(height, width), + true_fn=lambda: smallest_side / width, + false_fn=lambda: smallest_side / height) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) + return new_height, new_width def _aspect_preserving_resize(image, smallest_side): - """Resize images preserving the original aspect ratio. - - Args: - image: A 3-D image `Tensor`. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. - - Returns: - resized_image: A 3-D tensor containing the resized image. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) - # import pdb - # pdb.set_trace() - shape = tf.shape(image) - - height = shape[0] - width = shape[1] - new_height, new_width = _smallest_size_at_least(height, width, smallest_side) - image = tf.expand_dims(image, 0) - resized_image = tf.image.resize_bilinear(image, [new_height, new_width], - align_corners=False) - resized_image = tf.squeeze(resized_image) - resized_image.set_shape([None, None, 3]) - return resized_image + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + smallest_side = tf.convert_to_tensor(value=smallest_side, dtype=tf.int32) + #import pdb + #pdb.set_trace() + shape = tf.shape(input=image) + + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, smallest_side) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize(image, [new_height, new_width], + method=tf.image.ResizeMethod.BILINEAR) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image def preprocess_for_train(image, @@ -311,78 +309,78 @@ def preprocess_for_train(image, output_width, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image for training. - - Note that the actual resizing scale is sampled from - [`resize_size_min`, `resize_size_max`]. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. - - Returns: - A preprocessed image. - """ - resize_side = tf.random_uniform( - [], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32) - - image = _aspect_preserving_resize(image, resize_side) - image = _random_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - image = tf.image.random_flip_left_right(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + resize_side = tf.random.uniform( + [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) + + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) + image = tf.image.random_flip_left_right(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) def preprocess_for_eval(image, output_height, output_width, resize_side): - """Preprocesses the given image for evaluation. + """Preprocesses the given image for evaluation. - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side: The smallest side of the image for aspect-preserving resizing. + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side: The smallest side of the image for aspect-preserving resizing. - Returns: - A preprocessed image. - """ - image = _aspect_preserving_resize(image, resize_side) - image = _central_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, resize_side) + image = _central_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.cast(image, dtype=tf.float32) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) def preprocess_image(image, output_height, output_width, is_training=False, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, then this value - is used for rescaling. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, this value is - ignored. Otherwise, the resize side is sampled from - [resize_size_min, resize_size_max]. - - Returns: - A preprocessed image. - """ - if is_training: - return preprocess_for_train(image, output_height, output_width, - resize_side_min, resize_side_max) - else: - return preprocess_for_eval(image, output_height, output_width, - resize_side_min) + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) diff --git a/models/image_recognition/tensorflow/resnet50/__init__.py b/models/image_recognition/tensorflow/resnet50/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/tensorflow/resnet50/__init__.py +++ b/models/image_recognition/tensorflow/resnet50/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50/inference/__init__.py b/models/image_recognition/tensorflow/resnet50/inference/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/__init__.py +++ b/models/image_recognition/tensorflow/resnet50/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50/inference/datasets.py b/models/image_recognition/tensorflow/resnet50/inference/datasets.py index 841ef0f88..2beabb30e 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/datasets.py +++ b/models/image_recognition/tensorflow/resnet50/inference/datasets.py @@ -47,51 +47,50 @@ IMAGENET_NUM_VAL_IMAGES = 50000 IMAGENET_NUM_CLASSES = 1000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, data_dir=None): - self.name = name - if data_dir is None: - raise ValueError('Data directory not specified') - self.data_dir = data_dir + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @abstractmethod - def num_classes(self): - pass + @abstractmethod + def num_classes(self): + pass - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name class ImagenetData(Dataset): - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('ImageNet', data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) - def num_classes(self): - return IMAGENET_NUM_CLASSES + def num_classes(self): + return IMAGENET_NUM_CLASSES - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - elif subset == 'calibrate' or subset == 'calibration': - return 100 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py index 321e1bb52..d8d8487e4 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py @@ -22,253 +22,250 @@ from argparse import ArgumentParser import tensorflow as tf -try: - import tensorflow.tools.graph_transforms as graph_transforms -except: - import tensorflow_core.tools.graph_transforms as graph_transforms +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes import datasets import numpy as np INPUTS = 'input' OUTPUTS = 'predict' -OPTIMIZATION = 'strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' RESNET_IMAGE_SIZE = 224 class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph""" - - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--num-inter-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--num-intra-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-m', "--model-name", - help='Specify the model name to run benchmark for', - dest='model_name') - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - arg_parser.add_argument('--calibrate', dest='calibrate', - help='Run accuracy with calibration data,' - 'to generate min_max ranges, calibrate=[True/False]', - type=bool, default=False) - arg_parser.add_argument("--results-file-path", - help="File path for the inference results", - dest="results_file_path", default=None) - arg_parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - arg_parser.add_argument("--steps", type=int, default=50, - help="number of steps") - - arg_parser.add_argument( - '--data-num-inter-threads', dest='data_num_inter_threads', - help='number threads across operators', - type=int, default=32) - arg_parser.add_argument( - '--data-num-intra-threads', dest='data_num_intra_threads', - help='number threads for data layer operator', - type=int, default=14) - arg_parser.add_argument( - '--num-cores', dest='num_cores', - help='number of cores', - type=int, default=28) - - self.args = arg_parser.parse_args() - # validate the arguements - self.validate_args() - - def write_results_output(self, predictions, filenames, labels): - # If a results_file_path is provided, write the predictions to the file + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-m', "--model-name", + help='Specify the model name to run benchmark for', + dest='model_name') + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + arg_parser.add_argument('--calibrate', dest='calibrate', + help='Run accuracy with calibration data,' + 'to generate min_max ranges, calibrate=[True/False]', + type=bool, default=False) + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + arg_parser.add_argument( + '--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=32) + arg_parser.add_argument( + '--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=14) + arg_parser.add_argument( + '--num-cores', dest='num_cores', + help='number of cores', + type=int, default=28) + + self.args = arg_parser.parse_args() + # validate the arguements + self.validate_args() + + def write_results_output(self, predictions, filenames, labels): + # If a results_file_path is provided, write the predictions to the file + if self.args.results_file_path: + top_predictions = np.argmax(predictions, 1) + with open(self.args.results_file_path, "a") as fp: + for filename, expected_label, top_prediction in zip(filenames, labels, top_predictions): + fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) + + def run(self): + """run benchmark with optimized graph""" + + print("Run inference") + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if (self.args.data_location): + print("Inference with real data.") + if self.args.calibrate: + subset = 'calibration' + else: + subset = 'validation' + dataset = datasets.ImagenetData(self.args.data_location) + preprocessor = dataset.get_image_preprocessor()( + RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, + num_cores=self.args.num_cores, + resize_method='crop') + + images, labels, filenames = preprocessor.minibatch(dataset, subset=subset) + + # If a results file path is provided, then start the prediction output file if self.args.results_file_path: - top_predictions = np.argmax(predictions, 1) - with open(self.args.results_file_path, "a") as fp: - for filename, expected_label, top_prediction in zip(filenames, labels, top_predictions): - fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) - - def run(self): - """run benchmark with optimized graph""" - - print("Run inference") - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads - data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = self.args.num_intra_threads - infer_config.inter_op_parallelism_threads = self.args.num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if (self.args.data_location): - print("Inference with real data.") - if self.args.calibrate: - subset = 'calibration' - else: - subset = 'validation' - dataset = datasets.ImagenetData(self.args.data_location) - preprocessor = dataset.get_image_preprocessor()( - RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, - num_cores=self.args.num_cores, - resize_method='crop') - - images, labels, filenames = preprocessor.minibatch(dataset, subset=subset) - - # If a results file path is provided, then start the prediction output file - if self.args.results_file_path: - with open(self.args.results_file_path, "w+") as fp: - fp.write("filename,actual,prediction\n") - else: - print("Inference with dummy data.") - input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] - images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: - input_graph_content = input_file.read() - graph_def.ParseFromString(input_graph_content) - - output_graph = graph_transforms.TransformGraph(graph_def, - [INPUTS], [OUTPUTS], [OPTIMIZATION]) - tf.import_graph_def(output_graph, name='') - - # Definite input and output Tensors for detection_graph - input_tensor = infer_graph.get_tensor_by_name('input:0') - output_tensor = infer_graph.get_tensor_by_name('predict:0') - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ - if self.args.data_location else (self.args.batch_size * self.args.steps) - - if (not self.args.accuracy_only): - iteration = 0 - warm_up_iteration = self.args.warmup_steps - total_run = self.args.steps - total_time = 0 - - while num_remaining_images >= self.args.batch_size and iteration < total_run: - iteration += 1 - tf_filenames = None - np_labels = None - data_load_start = time.time() - if self.args.results_file_path: - image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) - else: - image_np = data_sess.run(images) - - data_load_time = time.time() - data_load_start - - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np}) - time_consume = time.time() - start_time - - # Write out the file name, expected label, and top prediction - self.write_results_output(predictions, tf_filenames, np_labels) - - # only add data loading time for real data, not for dummy data - if self.args.data_location: - time_consume += data_load_time - - print('Iteration %d: %.6f sec' % (iteration, time_consume)) - if iteration > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (iteration - warm_up_iteration) - print('Average time: %.6f sec' % (time_average)) - - print('Batch size = %d' % self.args.batch_size) - if (self.args.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - # print throughput for both batch size 1 and 128 - print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) - - else: # accuracy check - total_accuracy1, total_accuracy5 = (0.0, 0.0) - - while num_remaining_images >= self.args.batch_size: - # Reads and preprocess data - tf_filenames = None - if self.args.results_file_path: - np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) - else: - np_images, np_labels = data_sess.run([images, labels]) - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - # Compute inference on the preprocessed data - predictions = infer_sess.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - - # Write out the file name, expected label, and top prediction - self.write_results_output(predictions, tf_filenames, np_labels) - - with tf.Graph().as_default() as accu_graph: - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - with tf.Session() as accu_sess: - np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) - - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - - def validate_args(self): - """validate the arguments""" - - if not self.args.data_location: - if self.args.accuracy_only: - raise ValueError("You must use real data for accuracy measurement.") + with open(self.args.results_file_path, "w+") as fp: + fp.write("filename,actual,prediction\n") + else: + print("Inference with dummy data.") + input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + output_graph = optimize_for_inference(graph_def, [INPUTS], + [OUTPUTS], dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name='') + + # Definite input and output Tensors for detection_graph + input_tensor = infer_graph.get_tensor_by_name('input:0') + output_tensor = infer_graph.get_tensor_by_name('predict:0') + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ + if self.args.data_location else (self.args.batch_size * self.args.steps) + + if (not self.args.accuracy_only): + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + total_time = 0 + + while num_remaining_images >= self.args.batch_size and iteration < total_run: + iteration += 1 + tf_filenames = None + np_labels = None + data_load_start = time.time() + if self.args.results_file_path: + image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + image_np = data_sess.run(images) + + data_load_time = time.time() - data_load_start + + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np}) + time_consume = time.time() - start_time + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + # only add data loading time for real data, not for dummy data + if self.args.data_location: + time_consume += data_load_time + + print('Iteration %d: %.6f sec' % (iteration, time_consume)) + if iteration > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (iteration - warm_up_iteration) + print('Average time: %.6f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + # print throughput for both batch size 1 and 128 + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + + else: # accuracy check + total_accuracy1, total_accuracy5 = (0.0, 0.0) + + while num_remaining_images >= self.args.batch_size: + # Reads and preprocess data + tf_filenames = None + if self.args.results_file_path: + np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + np_images, np_labels = data_sess.run([images, labels]) + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + # Compute inference on the preprocessed data + predictions = infer_sess.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + with tf.Graph().as_default() as accu_graph: + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + with tf.compat.v1.Session() as accu_sess: + np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) + + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) + + def validate_args(self): + """validate the arguments""" + + if not self.args.data_location: + if self.args.accuracy_only: + raise ValueError("You must use real data for accuracy measurement.") if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/models/image_recognition/tensorflow/resnet50/inference/preprocessing.py b/models/image_recognition/tensorflow/resnet50/inference/preprocessing.py index ea97b83b8..3aa53ea3d 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet50/inference/preprocessing.py @@ -44,134 +44,134 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/filename': tf.FixedLenFeature([], dtype=tf.string, - default_value="") - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - filename = tf.cast(features['image/filename'], dtype=tf.string) - - return features['image/encoded'], label, filename + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/filename': tf.io.FixedLenFeature([], dtype=tf.string, + default_value="") + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + filename = tf.cast(features['image/filename'], dtype=tf.string) + + return features['image/encoded'], label, filename def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - return distorted_image - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + return distorted_image + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method="bilinear"): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index, filename = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - - return (image, label_index, filename) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - # ds = ds.prefetch(buffer_size=self.batch_size) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) - - ds_iterator = ds.make_one_shot_iterator() - images, labels, filename = ds_iterator.get_next() - # reshape - labels = tf.reshape(labels, [self.batch_size]) - filename = tf.reshape(filename, [self.batch_size]) - - return images, labels, filename + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method="bilinear"): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index, filename = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index, filename) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + #ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2 * self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, labels, filename = ds_iterator.get_next() + # reshape + labels = tf.reshape(labels, [self.batch_size]) + filename = tf.reshape(filename, [self.batch_size]) + + return images, labels, filename diff --git a/models/image_recognition/tensorflow/resnet50/int8/__init__.py b/models/image_recognition/tensorflow/resnet50/int8/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/__init__.py +++ b/models/image_recognition/tensorflow/resnet50/int8/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50/int8/benchmark.py b/models/image_recognition/tensorflow/resnet50/int8/benchmark.py index 0e496cfb5..4094fa801 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/benchmark.py +++ b/models/image_recognition/tensorflow/resnet50/int8/benchmark.py @@ -46,168 +46,168 @@ import tensorflow as tf if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--data_location", default=None, - help="dataset location") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument("--num_cores", default=28, - type=int, help="number of physical cores") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument( - '--data_num_inter_threads', - help='number threads across data layer operators', - type=int, default=16) - parser.add_argument( - '--data_num_intra_threads', - help='number threads for an data layer operator', - type=int, default=14) - parser.add_argument("--warmup_steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--data_location", default=None, + help="dataset location") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument("--num_cores", default=28, + type=int, help="number of physical cores") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument( + '--data_num_inter_threads', + help='number threads across data layer operators', + type=int, default=16) + parser.add_argument( + '--data_num_intra_threads', + help='number threads for an data layer operator', + type=int, default=14) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = args.data_num_intra_threads + data_config.inter_op_parallelism_threads = args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = num_intra_threads + infer_config.inter_op_parallelism_threads = num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if args.data_location: + print("inference with real data") + # get the images from dataset + dataset = datasets.ImagenetData(args.data_location) + preprocessor = dataset.get_image_preprocessor(benchmark=True)( + input_height, input_width, batch_size, + num_cores=args.num_cores, + resize_method='crop') + images = preprocessor.minibatch(dataset, subset='validation') else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = args.data_num_intra_threads - data_config.inter_op_parallelism_threads = args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = num_intra_threads - infer_config.inter_op_parallelism_threads = num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if args.data_location: - print("inference with real data") - # get the images from dataset - dataset = datasets.ImagenetData(args.data_location) - preprocessor = dataset.get_image_preprocessor(benchmark=True)( - input_height, input_width, batch_size, - num_cores=args.num_cores, - resize_method='crop') - images = preprocessor.minibatch(dataset, subset='validation') - else: - # synthetic images - print("inference with dummy data") - input_shape = [batch_size, input_height, input_width, 3] - images = tf.random.uniform( - input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with open(model_file, "rb") as f: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name='') - - input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") - output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") - tf.global_variables_initializer() - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - print("[Running warmup steps...]") - step_total_time = 0 - step_total_images = 0 - - for t in range(warmup_steps): - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - step_total_time += elapsed_time - step_total_images += batch_size - - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, step_total_images / step_total_time)) - step_total_time = 0 - step_total_images = 0 - - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - - step_total_time = 0 - step_total_images = 0 - - for t in range(steps): - try: - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - total_time += elapsed_time - total_images += batch_size - - step_total_time += elapsed_time - step_total_images += batch_size - - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, step_total_images / step_total_time)) - step_total_time = 0 - step_total_images = 0 - - except tf.errors.OutOfRangeError: - print("Running out of images from dataset.") - break - - print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) + # synthetic images + print("inference with dummy data") + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.uniform( + input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") + output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") + tf.compat.v1.global_variables_initializer() + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + print("[Running warmup steps...]") + step_total_time = 0 + step_total_images = 0 + + for t in range(warmup_steps): + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + print("[Running benchmark steps...]") + total_time = 0 + total_images = 0 + + step_total_time = 0 + step_total_images = 0 + + for t in range(steps): + try: + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + total_time += elapsed_time + total_images += batch_size + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + except tf.errors.OutOfRangeError: + print("Running out of images from dataset.") + break + + print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) diff --git a/models/image_recognition/tensorflow/resnet50/int8/cnn_util.py b/models/image_recognition/tensorflow/resnet50/int8/cnn_util.py index cd1f0d4d1..b5639df6f 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/cnn_util.py +++ b/models/image_recognition/tensorflow/resnet50/int8/cnn_util.py @@ -40,11 +40,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/resnet50/int8/datasets.py b/models/image_recognition/tensorflow/resnet50/int8/datasets.py index 123ca3bd9..f0620e884 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/datasets.py +++ b/models/image_recognition/tensorflow/resnet50/int8/datasets.py @@ -43,72 +43,72 @@ IMAGENET_NUM_TRAIN_IMAGES = 1281167 IMAGENET_NUM_VAL_IMAGES = 50000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" - - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - elif subset == 'calibrate' or subset == 'calibration': - return 100 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self, benchmark=False): - if benchmark: - import preprocessing_benchmark - return preprocessing_benchmark.RecordInputImagePreprocessor - else: - import preprocessing - return preprocessing.RecordInputImagePreprocessor + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self, benchmark=False): + if benchmark: + import preprocessing_benchmark + return preprocessing_benchmark.RecordInputImagePreprocessor + else: + import preprocessing + return preprocessing.RecordInputImagePreprocessor + diff --git a/models/image_recognition/tensorflow/resnet50/int8/generate_calibration_data.py b/models/image_recognition/tensorflow/resnet50/int8/generate_calibration_data.py index 0f5b8ff06..21b68c9c9 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/generate_calibration_data.py +++ b/models/image_recognition/tensorflow/resnet50/int8/generate_calibration_data.py @@ -53,133 +53,131 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = preprocessing.ImagePreprocessor( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='crop') - images, labels, tf_records = preprocessor.minibatch(dataset, subset='train') - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ - - num_processed_images - - CALIBRATION_POOL_SIZE = 1000 - CALIBRATION_SET_SIZE = 100 - calibration_pool = [] - ImageWithConfidence = namedtuple('ImageWithConfidence', - ['tf_record', 'confidence']) - current_pool_size = 0 - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels, serialized_images = sess.run( - [images[0], labels[0], tf_records]) - num_processed_images += batch_size - num_remaining_images -= batch_size - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - selected_img_indices = np.where( - predictions.argmax(axis=1) == np_labels)[0].tolist() - current_pool_size += len(selected_img_indices) - for indx in selected_img_indices: - calibration_pool.append(ImageWithConfidence( - serialized_images[indx], predictions[indx].max())) - - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - if current_pool_size >= CALIBRATION_POOL_SIZE: - break - - writer = tf.python_io.TFRecordWriter('calibration-1-of-1') - calibration_pool = sorted(calibration_pool, - key=attrgetter('confidence'), reverse=True) - for i in range(CALIBRATION_SET_SIZE): - writer.write(calibration_pool[i].tf_record) - writer.close() + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = preprocessing.ImagePreprocessor( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='crop') + images, labels, tf_records = preprocessor.minibatch(dataset, subset='train') + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ + - num_processed_images + + CALIBRATION_POOL_SIZE = 1000 + CALIBRATION_SET_SIZE = 100 + calibration_pool = [] + ImageWithConfidence = namedtuple('ImageWithConfidence', + ['tf_record', 'confidence']) + current_pool_size = 0 + with tf.compat.v1.Session() as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels, serialized_images = sess.run( + [images[0], labels[0], tf_records]) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + selected_img_indices = np.where( + predictions.argmax(axis=1) == np_labels)[0].tolist() + current_pool_size += len(selected_img_indices) + for indx in selected_img_indices: + calibration_pool.append(ImageWithConfidence( + serialized_images[indx], predictions[indx].max())) + + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) + if current_pool_size >= CALIBRATION_POOL_SIZE: + break + + writer = tf.io.TFRecordWriter('calibration-1-of-1') + calibration_pool = sorted(calibration_pool, + key=attrgetter('confidence'), reverse=True) + for i in range(CALIBRATION_SET_SIZE): + writer.write(calibration_pool[i].tf_record) + writer.close() diff --git a/models/image_recognition/tensorflow/resnet50/int8/preprocessing.py b/models/image_recognition/tensorflow/resnet50/int8/preprocessing.py index 82a0ebb9a..0bbf8487a 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet50/int8/preprocessing.py @@ -43,379 +43,376 @@ from tensorflow.python.ops import data_flow_ops import cnn_util - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3, - fancy_upscaling=False, - dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image def eval_image(image, height, width, bbox, thread_id, resize): - """Get the image for model evaluation.""" - with tf.name_scope('eval_image'): - if not thread_id: - tf.summary.image( - 'original_image', tf.expand_dims(image, 0)) - - if resize == 'crop': - # Note: This is much slower than crop_to_bounding_box - # It seems that the redundant pad step has huge overhead - # distorted_image = tf.image.resize_image_with_crop_or_pad(image, - # height, width) - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, tf.convert_to_tensor( - [256, 256 * shape[1] / shape[0]], dtype=tf.int32)), - lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32))) - shape = tf.shape(image) - - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) - # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) - # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, - width) - else: - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.5, - aspect_ratio_range=[0.90, 1.10], - area_range=[0.10, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, _ = sample_distorted_bounding_box - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - resize_method = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - }[resize] - # This resizing operation may distort the images because the aspect - # ratio is not respected. - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], - resize_method, - align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) - image = distorted_image - return image + """Get the image for model evaluation.""" + with tf.compat.v1.name_scope('eval_image'): + if not thread_id: + tf.compat.v1.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256, 256*shape[1]/shape[0]], dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(input=image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], + resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image def distort_image(image, height, width, bbox, thread_id=0, scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D float Tensor of image - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - thread_id: integer indicating the preprocessing thread. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor of distorted image used for training. - """ - # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): - # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - with tf.name_scope(scope or 'distort_image'): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - # Display the bounding box in the first thread only. - if not thread_id: - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image( - 'image_with_bounding_boxes', image_with_box) - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an allowed - # range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.99, 1.01], - area_range=[0.05, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if not thread_id: - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image( - 'images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - resize_method = thread_id % 4 - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], resize_method, align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, thread_id) - - # Note: This ensures the scaling matches the output of eval_image - distorted_image *= 256 - - if not thread_id: - tf.summary.image( - 'final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.compat.v1.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.compat.v1.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.compat.v1.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image def distort_color(image, thread_id=0, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: Tensor containing single image. - thread_id: preprocessing thread ID. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - # with tf.op_scope([image], scope, 'distort_color'): - # with tf.name_scope(scope, 'distort_color', [image]): - with tf.name_scope(scope or 'distort_color'): - color_ordering = thread_id % 2 - - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class ImagePreprocessor(object): - """Preprocessor for input images.""" - - def __init__(self, - height, - width, - batch_size, - device_count, - dtype=tf.float32, - train=True, - distortions=None, - resize_method=None): - self.height = height - self.width = width - self.batch_size = batch_size - self.device_count = device_count - self.dtype = dtype - self.train = train - self.resize_method = resize_method - if distortions is None: - distortions = False - self.distortions = distortions - if self.batch_size % self.device_count != 0: - raise ValueError( - ('batch_size must be a multiple of device_count: ' - 'batch_size %d, device_count: %d') % - (self.batch_size, self.device_count)) - self.batch_size_per_device = self.batch_size // self.device_count - - def preprocess(self, image_buffer, bbox, thread_id): - """Preprocessing image_buffer using thread_id.""" - # Note: Width and height of image is known only at runtime. - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - if self.train and self.distortions: - image = distort_image(image, self.height, self.width, bbox, thread_id) - else: - image = eval_image(image, self.height, self.width, bbox, thread_id, - self.resize_method) - # Note: image is now float32 [height,width,3] with range [0, 255] - - # image = tf.cast(image, tf.uint8) # HACK TESTING - - return image - - def minibatch(self, dataset, subset): - with tf.name_scope('batch_processing'): - images = [[] for i in range(self.device_count)] - labels = [[] for i in range(self.device_count)] - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=randint(0, 9000), - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for i in xrange(self.batch_size): - value = records[i] - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.preprocess(image_buffer, bbox, i % 4) - device_index = i % self.device_count - images[device_index].append(image) - labels[device_index].append(label_index) - label_index_batch = [None] * self.device_count - for device_index in xrange(self.device_count): - images[device_index] = tf.parallel_stack(images[device_index]) - label_index_batch[device_index] = tf.concat(labels[device_index], 0) - - # dynamic_pad=True) # HACK TESTING dynamic_pad=True - images[device_index] = tf.cast(images[device_index], self.dtype) - depth = 3 - images[device_index] = tf.reshape( - images[device_index], - shape=[self.batch_size_per_device, self.height, self.width, depth]) - label_index_batch[device_index] = tf.reshape( - label_index_batch[device_index], [self.batch_size_per_device]) - # Display the training images in the visualizer. - # tf.summary.image('images', images) - - return images, label_index_batch, records + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + image = eval_image(image, self.height, self.width, bbox, thread_id, + self.resize_method) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.compat.v1.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch, records diff --git a/models/image_recognition/tensorflow/resnet50/int8/preprocessing_benchmark.py b/models/image_recognition/tensorflow/resnet50/int8/preprocessing_benchmark.py index 67cfc40a1..cc22be37e 100644 --- a/models/image_recognition/tensorflow/resnet50/int8/preprocessing_benchmark.py +++ b/models/image_recognition/tensorflow/resnet50/int8/preprocessing_benchmark.py @@ -46,127 +46,127 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - return distorted_image - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + return distorted_image + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - - return (image, label_index) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - # ds = ds.prefetch(buffer_size=self.batch_size) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) # this number should be tuned - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned - - ds_iterator = ds.make_one_shot_iterator() - images, _ = ds_iterator.get_next() - - return images + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + # ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2*self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) # this number should be tuned + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, _ = ds_iterator.get_next() + + return images diff --git a/models/image_recognition/tensorflow/resnet50v1_5/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/__init__.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py index d1416dccc..36c2b3f99 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py @@ -47,51 +47,50 @@ IMAGENET_NUM_VAL_IMAGES = 50000 IMAGENET_NUM_CLASSES = 1000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, data_dir=None): - self.name = name - if data_dir is None: - raise ValueError('Data directory not specified') - self.data_dir = data_dir + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @abstractmethod - def num_classes(self): - pass + @abstractmethod + def num_classes(self): + pass - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name class ImagenetData(Dataset): - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('ImageNet', data_dir) + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) - def num_classes(self): - return IMAGENET_NUM_CLASSES + def num_classes(self): + return IMAGENET_NUM_CLASSES - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - elif subset == 'calibrate' or subset == 'calibration': - return 100 - else: - raise ValueError('Invalid data subset "%s"' % subset) + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) - def get_image_preprocessor(self): - return preprocessing.RecordInputImagePreprocessor + def get_image_preprocessor(self): + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py index 7d70ae2a7..be6d943b8 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py @@ -22,253 +22,250 @@ from argparse import ArgumentParser import tensorflow as tf -try: - import tensorflow.tools.graph_transforms as graph_transforms -except: - import tensorflow_core.tools.graph_transforms as graph_transforms +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes import datasets import numpy as np -INPUTS = 'input_tensor:0' -OUTPUTS = 'softmax_tensor:0' -OPTIMIZATION = 'strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' +INPUTS = 'input_tensor' +OUTPUTS = 'softmax_tensor' RESNET_IMAGE_SIZE = 224 class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph""" - - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--num-inter-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--num-intra-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-m', "--model-name", - help='Specify the model name to run benchmark for', - dest='model_name') - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - arg_parser.add_argument('--calibrate', dest='calibrate', - help='Run accuracy with calibration data,' - 'to generate min_max ranges, calibrate=[True/False]', - type=bool, default=False) - arg_parser.add_argument("--results-file-path", - help="File path for the inference results", - dest="results_file_path", default=None) - arg_parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - arg_parser.add_argument("--steps", type=int, default=50, - help="number of steps") - - arg_parser.add_argument( - '--data-num-inter-threads', dest='data_num_inter_threads', - help='number threads across operators', - type=int, default=32) - arg_parser.add_argument( - '--data-num-intra-threads', dest='data_num_intra_threads', - help='number threads for data layer operator', - type=int, default=14) - arg_parser.add_argument( - '--num-cores', dest='num_cores', - help='number of cores', - type=int, default=28) - - self.args = arg_parser.parse_args() - # validate the arguements - self.validate_args() - - def write_results_output(self, predictions, filenames, labels): - # If a results_file_path is provided, write the predictions to the file + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-m', "--model-name", + help='Specify the model name to run benchmark for', + dest='model_name') + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + arg_parser.add_argument('--calibrate', dest='calibrate', + help='Run accuracy with calibration data,' + 'to generate min_max ranges, calibrate=[True/False]', + type=bool, default=False) + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + arg_parser.add_argument( + '--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=32) + arg_parser.add_argument( + '--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=14) + arg_parser.add_argument( + '--num-cores', dest='num_cores', + help='number of cores', + type=int, default=28) + + self.args = arg_parser.parse_args() + # validate the arguements + self.validate_args() + + def write_results_output(self, predictions, filenames, labels): + # If a results_file_path is provided, write the predictions to the file + if self.args.results_file_path: + top_predictions = np.argmax(predictions, 1) + with open(self.args.results_file_path, "a") as fp: + for filename, expected_label, top_prediction in zip(filenames, labels, top_predictions): + fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) + + def run(self): + """run benchmark with optimized graph""" + + print("Run inference") + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if (self.args.data_location): + print("Inference with real data.") + if self.args.calibrate: + subset = 'calibration' + else: + subset = 'validation' + dataset = datasets.ImagenetData(self.args.data_location) + preprocessor = dataset.get_image_preprocessor()( + RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, + num_cores=self.args.num_cores, + resize_method='crop') + + images, labels, filenames = preprocessor.minibatch(dataset, subset=subset) + + # If a results file path is provided, then start the prediction output file if self.args.results_file_path: - top_predictions = np.argmax(predictions, 1) - with open(self.args.results_file_path, "a") as fp: - for filename, expected_label, top_prediction in zip(filenames, labels, top_predictions): - fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) - - def run(self): - """run benchmark with optimized graph""" - - print("Run inference") - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads - data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = self.args.num_intra_threads - infer_config.inter_op_parallelism_threads = self.args.num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if (self.args.data_location): - print("Inference with real data.") - if self.args.calibrate: - subset = 'calibration' - else: - subset = 'validation' - dataset = datasets.ImagenetData(self.args.data_location) - preprocessor = dataset.get_image_preprocessor()( - RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, - num_cores=self.args.num_cores, - resize_method='crop') - - images, labels, filenames = preprocessor.minibatch(dataset, subset=subset) - - # If a results file path is provided, then start the prediction output file - if self.args.results_file_path: - with open(self.args.results_file_path, "w+") as fp: - fp.write("filename,actual,prediction\n") - else: - print("Inference with dummy data.") - input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] - images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: - input_graph_content = input_file.read() - graph_def.ParseFromString(input_graph_content) - - output_graph = graph_transforms.TransformGraph(graph_def, - [INPUTS], [OUTPUTS], [OPTIMIZATION]) - tf.import_graph_def(output_graph, name='') - - # Definite input and output Tensors for detection_graph - input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') - output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0') - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ - if self.args.data_location else datasets.IMAGENET_NUM_VAL_IMAGES - - if (not self.args.accuracy_only): - iteration = 0 - warm_up_iteration = self.args.warmup_steps - total_run = self.args.steps - total_time = 0 - - while num_remaining_images >= self.args.batch_size and iteration < total_run: - iteration += 1 - tf_filenames = None - np_labels = None - data_load_start = time.time() - if self.args.results_file_path: - image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) - else: - image_np = data_sess.run(images) - - data_load_time = time.time() - data_load_start - - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np}) - time_consume = time.time() - start_time - - # Write out the file name, expected label, and top prediction - self.write_results_output(predictions, tf_filenames, np_labels) - - # only add data loading time for real data, not for dummy data - if self.args.data_location: - time_consume += data_load_time - - print('Iteration %d: %.6f sec' % (iteration, time_consume)) - if iteration > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (iteration - warm_up_iteration) - print('Average time: %.6f sec' % (time_average)) - - print('Batch size = %d' % self.args.batch_size) - if (self.args.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - # print throughput for both batch size 1 and 128 - print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) - - else: # accuracy check - total_accuracy1, total_accuracy5 = (0.0, 0.0) - - while num_remaining_images >= self.args.batch_size: - # Reads and preprocess data - tf_filenames = None - if self.args.results_file_path: - np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) - else: - np_images, np_labels = data_sess.run([images, labels]) - num_processed_images += self.args.batch_size - num_remaining_images -= self.args.batch_size - - start_time = time.time() - # Compute inference on the preprocessed data - predictions = infer_sess.run(output_tensor, - {input_tensor: np_images}) - elapsed_time = time.time() - start_time - - # Write out the file name, expected label, and top prediction - self.write_results_output(predictions, tf_filenames, np_labels) - - with tf.Graph().as_default() as accu_graph: - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - with tf.Session() as accu_sess: - np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) - - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - - print("Iteration time: %0.4f ms" % elapsed_time) - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - - def validate_args(self): - """validate the arguments""" - - if not self.args.data_location: - if self.args.accuracy_only: - raise ValueError("You must use real data for accuracy measurement.") + with open(self.args.results_file_path, "w+") as fp: + fp.write("filename,actual,prediction\n") + else: + print("Inference with dummy data.") + input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + output_graph = optimize_for_inference(graph_def, [INPUTS], + [OUTPUTS], dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name='') + + # Definite input and output Tensors for detection_graph + input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') + output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0') + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ + if self.args.data_location else datasets.IMAGENET_NUM_VAL_IMAGES + + if (not self.args.accuracy_only): + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + total_time = 0 + + while num_remaining_images >= self.args.batch_size and iteration < total_run: + iteration += 1 + tf_filenames = None + np_labels = None + data_load_start = time.time() + if self.args.results_file_path: + image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + image_np = data_sess.run(images) + + data_load_time = time.time() - data_load_start + + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np}) + time_consume = time.time() - start_time + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + # only add data loading time for real data, not for dummy data + if self.args.data_location: + time_consume += data_load_time + + print('Iteration %d: %.6f sec' % (iteration, time_consume)) + if iteration > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (iteration - warm_up_iteration) + print('Average time: %.6f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + # print throughput for both batch size 1 and 128 + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + + else: # accuracy check + total_accuracy1, total_accuracy5 = (0.0, 0.0) + + while num_remaining_images >= self.args.batch_size: + # Reads and preprocess data + tf_filenames = None + if self.args.results_file_path: + np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + np_images, np_labels = data_sess.run([images, labels]) + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + # Compute inference on the preprocessed data + predictions = infer_sess.run(output_tensor, + {input_tensor: np_images}) + elapsed_time = time.time() - start_time + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + with tf.Graph().as_default() as accu_graph: + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + with tf.compat.v1.Session() as accu_sess: + np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) + + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + + print("Iteration time: %0.4f ms" % elapsed_time) + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) + + def validate_args(self): + """validate the arguments""" + + if not self.args.data_location: + if self.args.accuracy_only: + raise ValueError("You must use real data for accuracy measurement.") if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py index 7f839c45b..61f43d872 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py @@ -16,6 +16,7 @@ # limitations under the License. # +# # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # @@ -43,135 +44,134 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/filename': tf.FixedLenFeature([], dtype=tf.string, - default_value="") - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - filename = tf.cast(features['image/filename'], dtype=tf.string) - - return features['image/encoded'], label, filename + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/filename': tf.io.FixedLenFeature([], dtype=tf.string, + default_value="") + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + filename = tf.cast(features['image/filename'], dtype=tf.string) + + return features['image/encoded'], label, filename def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(distorted_image)) - return distorted_image - means - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(input=distorted_image)) + return distorted_image - means + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method="bilinear"): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index, filename = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - return (image, label_index, filename) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - # ds = ds.prefetch(buffer_size=self.batch_size) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) - - ds_iterator = ds.make_one_shot_iterator() - images, labels, filename = ds_iterator.get_next() - # reshape - labels = tf.reshape(labels, [self.batch_size]) - filename = tf.reshape(filename, [self.batch_size]) - - return images, labels, filename + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method="bilinear"): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index, filename = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + return (image, label_index, filename) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + #ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2 * self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, labels, filename = ds_iterator.get_next() + # reshape + labels = tf.reshape(labels, [self.batch_size]) + filename = tf.reshape(filename, [self.batch_size]) + + return images, labels, filename diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py index 66a2b3707..b5938b988 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py @@ -46,168 +46,168 @@ import tensorflow as tf if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--data_location", default=None, - help="dataset location") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument("--num_cores", default=28, - type=int, help="number of physical cores") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - parser.add_argument( - '--data_num_inter_threads', - help='number threads across data layer operators', - type=int, default=16) - parser.add_argument( - '--data_num_intra_threads', - help='number threads for an data layer operator', - type=int, default=14) - parser.add_argument("--warmup_steps", type=int, default=10, - help="number of warmup steps") - parser.add_argument("--steps", type=int, default=50, help="number of steps") - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--data_location", default=None, + help="dataset location") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument("--num_cores", default=28, + type=int, help="number of physical cores") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument( + '--data_num_inter_threads', + help='number threads across data layer operators', + type=int, default=16) + parser.add_argument( + '--data_num_intra_threads', + help='number threads for an data layer operator', + type=int, default=14) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = args.data_num_intra_threads + data_config.inter_op_parallelism_threads = args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = num_intra_threads + infer_config.inter_op_parallelism_threads = num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if args.data_location: + print("inference with real data") + # get the images from dataset + dataset = datasets.ImagenetData(args.data_location) + preprocessor = dataset.get_image_preprocessor(benchmark=True)( + input_height, input_width, batch_size, + num_cores=args.num_cores, + resize_method='crop') + images = preprocessor.minibatch(dataset, subset='validation') else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - warmup_steps = args.warmup_steps - steps = args.steps - assert steps > 10, "Benchmark steps should be at least 10." - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - - data_config = tf.ConfigProto() - data_config.intra_op_parallelism_threads = args.data_num_intra_threads - data_config.inter_op_parallelism_threads = args.data_num_inter_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.intra_op_parallelism_threads = num_intra_threads - infer_config.inter_op_parallelism_threads = num_inter_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if args.data_location: - print("inference with real data") - # get the images from dataset - dataset = datasets.ImagenetData(args.data_location) - preprocessor = dataset.get_image_preprocessor(benchmark=True)( - input_height, input_width, batch_size, - num_cores=args.num_cores, - resize_method='crop') - images = preprocessor.minibatch(dataset, subset='validation') - else: - # synthetic images - print("inference with dummy data") - input_shape = [batch_size, input_height, input_width, 3] - images = tf.random.uniform( - input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - with infer_graph.as_default(): - graph_def = tf.GraphDef() - with open(model_file, "rb") as f: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name='') - - input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") - output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") - tf.global_variables_initializer() - - data_sess = tf.Session(graph=data_graph, config=data_config) - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - print("[Running warmup steps...]") - step_total_time = 0 - step_total_images = 0 - - for t in range(warmup_steps): - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - step_total_time += elapsed_time - step_total_images += batch_size - - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, step_total_images / step_total_time)) - step_total_time = 0 - step_total_images = 0 - - print("[Running benchmark steps...]") - total_time = 0 - total_images = 0 - - step_total_time = 0 - step_total_images = 0 - - for t in range(steps): - try: - data_start_time = time.time() - image_data = data_sess.run(images) - data_load_time = time.time() - data_start_time - - start_time = time.time() - infer_sess.run(output_tensor, {input_tensor: image_data}) - elapsed_time = time.time() - start_time - - # only count the data loading and processing time for real data - if args.data_location: - elapsed_time += data_load_time - - total_time += elapsed_time - total_images += batch_size - - step_total_time += elapsed_time - step_total_images += batch_size - - if ((t + 1) % 10 == 0): - print("steps = {0}, {1} images/sec" - "".format(t + 1, step_total_images / step_total_time)) - step_total_time = 0 - step_total_images = 0 - - except tf.errors.OutOfRangeError: - print("Running out of images from dataset.") - break - - print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) + # synthetic images + print("inference with dummy data") + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.uniform( + input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") + output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") + tf.compat.v1.global_variables_initializer() + + data_sess = tf.compat.v1.Session(graph=data_graph, config=data_config) + infer_sess = tf.compat.v1.Session(graph=infer_graph, config=infer_config) + + print("[Running warmup steps...]") + step_total_time = 0 + step_total_images = 0 + + for t in range(warmup_steps): + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + print("[Running benchmark steps...]") + total_time = 0 + total_images = 0 + + step_total_time = 0 + step_total_images = 0 + + for t in range(steps): + try: + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + total_time += elapsed_time + total_images += batch_size + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + except tf.errors.OutOfRangeError: + print("Running out of images from dataset.") + break + + print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py index a74ec447f..6d9762371 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py @@ -40,11 +40,12 @@ def tensorflow_version_tuple(): - v = tf.__version__ - major, minor, patch = v.split('.') - return (int(major), int(minor), patch) + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) def tensorflow_version(): - vt = tensorflow_version_tuple() - return vt[0] * 1000 + vt[1] + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py index d62df7763..5112fda37 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py @@ -43,72 +43,72 @@ IMAGENET_NUM_TRAIN_IMAGES = 1281167 IMAGENET_NUM_VAL_IMAGES = 50000 - class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" + """Abstract class for cnn benchmarks dataset.""" - def __init__(self, name, height=None, width=None, depth=None, data_dir=None, - queue_runner_required=False, num_classes=1000): - self.name = name - self.height = height - self.width = width - self.depth = depth or 3 + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) - def reader(self): - return tf.TFRecordReader() + def reader(self): + return tf.compat.v1.TFRecordReader() - @property - def num_classes(self): - return self._num_classes + @property + def num_classes(self): + return self._num_classes - @num_classes.setter - def num_classes(self, val): - self._num_classes = val + @num_classes.setter + def num_classes(self, val): + self._num_classes = val - @abstractmethod - def num_examples_per_epoch(self, subset): - pass + @abstractmethod + def num_examples_per_epoch(self, subset): + pass - def __str__(self): - return self.name + def __str__(self): + return self.name - def get_image_preprocessor(self): - return None + def get_image_preprocessor(self): + return None - def queue_runner_required(self): - return self._queue_runner_required + def queue_runner_required(self): + return self._queue_runner_required - def use_synthetic_gpu_images(self): - return not self.data_dir + def use_synthetic_gpu_images(self): + return not self.data_dir class ImagenetData(Dataset): - """Configuration for Imagenet dataset.""" - - def __init__(self, data_dir=None): - super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - elif subset == 'calibrate' or subset == 'calibration': - return 100 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - def get_image_preprocessor(self, benchmark=False): - if benchmark: - import preprocessing_benchmark - return preprocessing_benchmark.RecordInputImagePreprocessor - else: - import preprocessing - return preprocessing.RecordInputImagePreprocessor + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self, benchmark=False): + if benchmark: + import preprocessing_benchmark + return preprocessing_benchmark.RecordInputImagePreprocessor + else: + import preprocessing + return preprocessing.RecordInputImagePreprocessor + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py index 734a9d058..d8a9d1534 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py @@ -53,133 +53,131 @@ NUM_TEST_IMAGES = 50000 - def load_graph(model_file): - graph = tf.Graph() - graph_def = tf.GraphDef() - - import os - file_ext = os.path.splitext(model_file)[1] + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - with graph.as_default(): - tf.import_graph_def(graph_def, name='') + import os + file_ext = os.path.splitext(model_file)[1] - return graph + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input_graph", default=None, - help="graph/model to be executed") - parser.add_argument("--data_location", default=None, - help="full path to the validation data") - parser.add_argument("--input_height", default=224, - type=int, help="input height") - parser.add_argument("--input_width", default=224, - type=int, help="input width") - parser.add_argument("--batch_size", default=32, - type=int, help="batch size") - parser.add_argument("--input_layer", default="input", - help="name of input layer") - parser.add_argument("--output_layer", default="predict", - help="name of output layer") - parser.add_argument( - '--num_inter_threads', - help='number threads across operators', - type=int, default=1) - parser.add_argument( - '--num_intra_threads', - help='number threads for an operator', - type=int, default=1) - args = parser.parse_args() - - if args.input_graph: - model_file = args.input_graph - else: - sys.exit("Please provide a graph file.") - if args.input_height: - input_height = args.input_height - else: - input_height = 224 - if args.input_width: - input_width = args.input_width - else: - input_width = 224 - batch_size = args.batch_size - input_layer = args.input_layer - output_layer = args.output_layer - num_inter_threads = args.num_inter_threads - num_intra_threads = args.num_intra_threads - data_location = args.data_location - dataset = datasets.ImagenetData(data_location) - preprocessor = preprocessing.ImagePreprocessor( - input_height, input_width, batch_size, - 1, # device count - tf.float32, # data_type for input fed to the graph - train=False, # doing inference - resize_method='crop') - images, labels, tf_records = preprocessor.minibatch(dataset, subset='train') - graph = load_graph(model_file) - input_tensor = graph.get_tensor_by_name(input_layer + ":0") - output_tensor = graph.get_tensor_by_name(output_layer + ":0") - - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - - total_accuracy1, total_accuracy5 = (0.0, 0.0) - num_processed_images = 0 - num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ - - num_processed_images - - CALIBRATION_POOL_SIZE = 1000 - CALIBRATION_SET_SIZE = 100 - calibration_pool = [] - ImageWithConfidence = namedtuple('ImageWithConfidence', - ['tf_record', 'confidence']) - current_pool_size = 0 - with tf.Session() as sess: - sess_graph = tf.Session(graph=graph, config=config) - while num_remaining_images >= batch_size: - # Reads and preprocess data - np_images, np_labels, serialized_images = sess.run( - [images[0], labels[0], tf_records]) - num_processed_images += batch_size - num_remaining_images -= batch_size - # Compute inference on the preprocessed data - predictions = sess_graph.run(output_tensor, - {input_tensor: np_images}) - selected_img_indices = np.where( - predictions.argmax(axis=1) == np_labels)[0].tolist() - current_pool_size += len(selected_img_indices) - for indx in selected_img_indices: - calibration_pool.append(ImageWithConfidence( - serialized_images[indx], predictions[indx].max())) - - accuracy1 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 1), tf.float32)) - - accuracy5 = tf.reduce_sum( - tf.cast(tf.nn.in_top_k(tf.constant(predictions), - tf.constant(np_labels), 5), tf.float32)) - np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) - total_accuracy1 += np_accuracy1 - total_accuracy5 += np_accuracy5 - print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" - % (num_processed_images, total_accuracy1 / num_processed_images, - total_accuracy5 / num_processed_images)) - if current_pool_size >= CALIBRATION_POOL_SIZE: - break - - writer = tf.python_io.TFRecordWriter('calibration-1-of-1') - calibration_pool = sorted(calibration_pool, - key=attrgetter('confidence'), reverse=True) - for i in range(CALIBRATION_SET_SIZE): - writer.write(calibration_pool[i].tf_record) - writer.close() + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = preprocessing.ImagePreprocessor( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='crop') + images, labels, tf_records = preprocessor.minibatch(dataset, subset='train') + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.compat.v1.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ + - num_processed_images + + CALIBRATION_POOL_SIZE = 1000 + CALIBRATION_SET_SIZE = 100 + calibration_pool = [] + ImageWithConfidence = namedtuple('ImageWithConfidence', + ['tf_record', 'confidence']) + current_pool_size = 0 + with tf.compat.v1.Session() as sess: + sess_graph = tf.compat.v1.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels, serialized_images = sess.run( + [images[0], labels[0], tf_records]) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + selected_img_indices = np.where( + predictions.argmax(axis=1) == np_labels)[0].tolist() + current_pool_size += len(selected_img_indices) + for indx in selected_img_indices: + calibration_pool.append(ImageWithConfidence( + serialized_images[indx], predictions[indx].max())) + + accuracy1 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=1), tf.float32)) + + accuracy5 = tf.reduce_sum( + input_tensor=tf.cast(tf.nn.in_top_k(predictions=tf.constant(predictions), + targets=tf.constant(np_labels), k=5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) + if current_pool_size >= CALIBRATION_POOL_SIZE: + break + + writer = tf.io.TFRecordWriter('calibration-1-of-1') + calibration_pool = sorted(calibration_pool, + key=attrgetter('confidence'), reverse=True) + for i in range(CALIBRATION_SET_SIZE): + writer.write(calibration_pool[i].tf_record) + writer.close() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py index 9c4cb5b5b..65c91f322 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py @@ -43,379 +43,376 @@ from tensorflow.python.ops import data_flow_ops import cnn_util - def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3, - fancy_upscaling=False, - dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - - return image + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image def eval_image(image, height, width, bbox, thread_id, resize): - """Get the image for model evaluation.""" - with tf.name_scope('eval_image'): - if not thread_id: - tf.summary.image( - 'original_image', tf.expand_dims(image, 0)) - - if resize == 'crop': - # Note: This is much slower than crop_to_bounding_box - # It seems that the redundant pad step has huge overhead - # distorted_image = tf.image.resize_image_with_crop_or_pad(image, - # height, width) - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, tf.convert_to_tensor( - [256, 256 * shape[1] / shape[0]], dtype=tf.int32)), - lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32))) - shape = tf.shape(image) - - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) - # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) - # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, - width) - else: - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.5, - aspect_ratio_range=[0.90, 1.10], - area_range=[0.10, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, _ = sample_distorted_bounding_box - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - resize_method = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA - }[resize] - # This resizing operation may distort the images because the aspect - # ratio is not respected. - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], - resize_method, - align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) - image = distorted_image - return image + """Get the image for model evaluation.""" + with tf.compat.v1.name_scope('eval_image'): + if not thread_id: + tf.compat.v1.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256, 256*shape[1]/shape[0]], dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, tf.convert_to_tensor(value=[256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(input=image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], + resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image def distort_image(image, height, width, bbox, thread_id=0, scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D float Tensor of image - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - thread_id: integer indicating the preprocessing thread. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor of distorted image used for training. - """ - # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): - # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - with tf.name_scope(scope or 'distort_image'): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - - # Display the bounding box in the first thread only. - if not thread_id: - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image( - 'image_with_bounding_boxes', image_with_box) - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an allowed - # range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.99, 1.01], - area_range=[0.05, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if not thread_id: - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image( - 'images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - resize_method = thread_id % 4 - if cnn_util.tensorflow_version() >= 11: - distorted_image = tf.image.resize_images( - distorted_image, [height, width], resize_method, align_corners=False) - else: - distorted_image = tf.image.resize_images( - distorted_image, height, width, resize_method, align_corners=False) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image( - 'cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, thread_id) - - # Note: This ensures the scaling matches the output of eval_image - distorted_image *= 256 - - if not thread_id: - tf.summary.image( - 'final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.compat.v1.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.shape(input=image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.compat.v1.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize( + distorted_image, [height, width], resize_method) + else: + distorted_image = tf.image.resize( + distorted_image, height, width, resize_method) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.compat.v1.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.compat.v1.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image def distort_color(image, thread_id=0, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: Tensor containing single image. - thread_id: preprocessing thread ID. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - # with tf.op_scope([image], scope, 'distort_color'): - # with tf.name_scope(scope, 'distort_color', [image]): - with tf.name_scope(scope or 'distort_color'): - color_ordering = thread_id % 2 - - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image class ImagePreprocessor(object): - """Preprocessor for input images.""" - - def __init__(self, - height, - width, - batch_size, - device_count, - dtype=tf.float32, - train=True, - distortions=None, - resize_method=None): - self.height = height - self.width = width - self.batch_size = batch_size - self.device_count = device_count - self.dtype = dtype - self.train = train - self.resize_method = resize_method - if distortions is None: - distortions = False - self.distortions = distortions - if self.batch_size % self.device_count != 0: - raise ValueError( - ('batch_size must be a multiple of device_count: ' - 'batch_size %d, device_count: %d') % - (self.batch_size, self.device_count)) - self.batch_size_per_device = self.batch_size // self.device_count - - def preprocess(self, image_buffer, bbox, thread_id): - """Preprocessing image_buffer using thread_id.""" - # Note: Width and height of image is known only at runtime. - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - if self.train and self.distortions: - image = distort_image(image, self.height, self.width, bbox, thread_id) - else: - image = eval_image(image, self.height, self.width, bbox, thread_id, - self.resize_method) - # Note: image is now float32 [height,width,3] with range [0, 255] - - # image = tf.cast(image, tf.uint8) # HACK TESTING - - return image - - def minibatch(self, dataset, subset): - with tf.name_scope('batch_processing'): - images = [[] for i in range(self.device_count)] - labels = [[] for i in range(self.device_count)] - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=randint(0, 9000), - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for i in xrange(self.batch_size): - value = records[i] - image_buffer, label_index, bbox, _ = parse_example_proto(value) - image = self.preprocess(image_buffer, bbox, i % 4) - device_index = i % self.device_count - images[device_index].append(image) - labels[device_index].append(label_index) - label_index_batch = [None] * self.device_count - for device_index in xrange(self.device_count): - images[device_index] = tf.parallel_stack(images[device_index]) - label_index_batch[device_index] = tf.concat(labels[device_index], 0) - - # dynamic_pad=True) # HACK TESTING dynamic_pad=True - images[device_index] = tf.cast(images[device_index], self.dtype) - depth = 3 - images[device_index] = tf.reshape( - images[device_index], - shape=[self.batch_size_per_device, self.height, self.width, depth]) - label_index_batch[device_index] = tf.reshape( - label_index_batch[device_index], [self.batch_size_per_device]) - # Display the training images in the visualizer. - # tf.summary.image('images', images) - - return images, label_index_batch, records + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + image = eval_image(image, self.height, self.width, bbox, thread_id, + self.resize_method) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.compat.v1.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch, records diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py index ff0e231b4..2f6941648 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py @@ -46,128 +46,128 @@ def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label def eval_image(image, height, width, resize_method, central_fraction=0.875, scope=None): - with tf.name_scope('eval_image'): - if resize_method == 'crop': - shape = tf.shape(image) - image = tf.cond(tf.less(shape[0], shape[1]), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], - dtype=tf.int32)), - lambda: tf.image.resize_images(image, - tf.convert_to_tensor([256 * shape[0] / shape[1], 256], - dtype=tf.int32))) - shape = tf.shape(image) - y0 = (shape[0] - height) // 2 - x0 = (shape[1] - width) // 2 - distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) - distorted_image.set_shape([height, width, 3]) - means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(distorted_image)) - return distorted_image - means - else: # bilinear - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image + with tf.compat.v1.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(input=image) + image = tf.cond(pred=tf.less(shape[0], shape[1]), + true_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + false_fn=lambda: tf.image.resize(image, + tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(input=image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(input=distorted_image)) + return distorted_image - means + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [height, width], + method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image class RecordInputImagePreprocessor(object): - """Preprocessor for images with RecordInput format.""" - - def __init__(self, - height, - width, - batch_size, - num_cores, - resize_method): - - self.height = height - self.width = width - self.batch_size = batch_size - self.num_cores = num_cores - self.resize_method = resize_method - - def parse_and_preprocess(self, value): - # parse - image_buffer, label_index = parse_example_proto(value) - # preprocess - image = tf.image.decode_jpeg( - image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, self.resize_method) - - return (image, label_index) - - def minibatch(self, dataset, subset, cache_data=False): - - with tf.name_scope('batch_processing'): - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - - if cache_data: - ds = ds.take(1).cache().repeat() - - ds = ds.prefetch(buffer_size=10000) - # ds = ds.prefetch(buffer_size=self.batch_size) - - # num of parallel batches not greater than 56 - max_num_parallel_batches = min(56, 2 * self.num_cores) - ds = ds.apply( - map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=self.batch_size, - num_parallel_batches=max_num_parallel_batches, - num_parallel_calls=None)) # this number should be tuned - - ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned - - ds_iterator = ds.make_one_shot_iterator() - images, _ = ds_iterator.get_next() - - return images + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.compat.v1.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + # ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2*self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) # this number should be tuned + + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, _ = ds_iterator.get_next() + + return images diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/__init__.py new file mode 100644 index 000000000..abc80bb29 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/__init__.py @@ -0,0 +1 @@ +from . import mlperf_log diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_gnmt_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_gnmt_tags.py new file mode 100644 index 000000000..097e97b83 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_gnmt_tags.py @@ -0,0 +1,51 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keys which only appear in GNMT RNN Translation. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +# Loss smoothing factor +MODEL_HP_LOSS_SMOOTHING = "model_hp_loss_smoothing" + +# Number of layers in encoder and in decoder +MODEL_HP_NUM_LAYERS = "model_hp_num_layers" + +# RNN hidden size +MODEL_HP_HIDDEN_SIZE = "model_hp_hidden_size" + +# Dropout +MODEL_HP_DROPOUT = "model_hp_dropout" + +# Beam size for beam search +EVAL_HP_BEAM_SIZE = "eval_hp_beam_size" + +# Maximum sequence length for training +TRAIN_HP_MAX_SEQ_LEN = "train_hp_max_sequence_length" + +# Maximum sequence length for evaluation +EVAL_HP_MAX_SEQ_LEN = "eval_hp_max_sequence_length" + +# Length normalization constant for beam search +EVAL_HP_LEN_NORM_CONST = "eval_hp_length_normalization_constant" + +# Length normalization factor for beam search +EVAL_HP_LEN_NORM_FACTOR = "eval_hp_length_normalization_factor" + +# Coverage penalty factor for beam search +EVAL_HP_COV_PENALTY_FACTOR = "eval_hp_coverage_penalty_factor" diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_maskrcnn_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_maskrcnn_tags.py new file mode 100644 index 000000000..1ec5ecdf1 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_maskrcnn_tags.py @@ -0,0 +1,53 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keys which only appear in MASKRCNN. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +# Anchor overlap threshop +FG_IOU_THRESHOLD = "foreground_iou_threshold" +BG_IOU_THRESHOLD = "background_iou_threshold" + +# Top ROIs to be selected before and after NMS +RPN_PRE_NMS_TOP_N_TRAIN = "rpn_pre_nms_top_n_train" +RPN_PRE_NMS_TOP_N_TEST = "rpn_pre_nms_top_n_test" +RPN_POST_NMS_TOP_N_TRAIN = "rpn_post_nms_top_n_train" +RPN_POST_NMS_TOP_N_TEST = "rpn_post_nms_top_n_test" + +#Global batch size during training +GLOBAL_BATCH_SIZE = "global_batch_size" + +# Batch size during eval +BATCH_SIZE_TEST = "batch_size_test" + + +# Pretrained classifer model +BACKBONE = "backbone" + +# Anchor aspect ratio +ASPECT_RATIOS = "aspect_ratios" + +# Overlap threshold for NMS +NMS_THRESHOLD = "nms_threshold" + +# data pipeline +MIN_IMAGE_SIZE = "min_image_size" +MAX_IMAGE_SIZE = "max_image_size" +RANDOM_FLIP_PROBABILITY = "random_flip_probability" +INPUT_NORMALIZATION_STD = "input_normalization_std" diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ncf_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ncf_tags.py new file mode 100644 index 000000000..a1235b818 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ncf_tags.py @@ -0,0 +1,60 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keys which only appear in NCF Recommendation. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# The minimum number of ratings for a user to be included. +PREPROC_HP_MIN_RATINGS = "preproc_hp_min_ratings" + +# The number of false negatives to use during evaluation. +PREPROC_HP_NUM_EVAL = "preproc_hp_num_eval" + +# Are evaluation negatives sampled with replacement? +PREPROC_HP_SAMPLE_EVAL_REPLACEMENT = "preproc_hp_sample_eval_replacement" + + +# The number of false negatives per postive generated during training. +INPUT_HP_NUM_NEG = "input_hp_num_neg" + +# Are training negatives sampled with replacement? +INPUT_HP_SAMPLE_TRAIN_REPLACEMENT = "input_hp_sample_train_replacement" + +# This tag should be emitted each time the submission begins construction of the +# false negatives for a trainging epoch. +INPUT_STEP_TRAIN_NEG_GEN = "input_step_train_neg_gen" + +# This tag should be emitted when the evaluation negatives are selected. This +# should occur only once. +INPUT_STEP_EVAL_NEG_GEN = "input_step_eval_neg_gen" + +# The number of users in the evaluation set. This should be the same as the +# number of users in the training set. +EVAL_HP_NUM_USERS = "eval_hp_num_users" + +# The number of false negatives per positive which actually appear during +# evaluation. This should match PREPROC_HP_NUM_EVAL. +EVAL_HP_NUM_NEG = "eval_hp_num_neg" + + +# The dimensionality of the matrix factorization portion of the model. +MODEL_HP_MF_DIM = "model_hp_mf_dim" + +# The sizes of the fully connected layers in the dense section of the model. +MODEL_HP_MLP_LAYER_SIZES = "model_hp_mlp_layer_sizes" + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_resnet_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_resnet_tags.py new file mode 100644 index 000000000..7aa9360a3 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_resnet_tags.py @@ -0,0 +1,47 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keys which only appear in ResNet. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +BOTTLENECK_BLOCK = "bottleneck_block" + +# The ResNet reference specifies that evaluation occurs once every four epochs. +# This can result in a quantization penalty for batch sizes which converge on +# certain epochs. For instance a batch size which tends to converge on epoch 81 +# or 82 would be unduly punished by evaluating at epochs 80 and 84. In order to +# address this, submissions may select an offset between 0 and 3 for the first +# evaluation. So in the example above, the submitter could select an offset of +# 1. In that case the first evaluation would occur on epoch 2, with later +# evaluations correspondingly offset. Because this would trigger an eval on +# epoch 82, the submission in this example can exit at a natural time. +EVAL_EPOCH_OFFSET = "eval_offset" + +# ============================================================================== +# == Topology ================================================================== +# ============================================================================== + +MODEL_HP_INITIAL_MAX_POOL = "model_hp_initial_max_pool" +MODEL_HP_BEGIN_BLOCK = "model_hp_begin_block" +MODEL_HP_END_BLOCK = "model_hp_end_block" +MODEL_HP_BLOCK_TYPE = "model_hp_block_type" +MODEL_HP_PROJECTION_SHORTCUT = "model_hp_projection_shortcut" +MODEL_HP_SHORTCUT_ADD = "model_hp_shorcut_add" + +MODEL_HP_RESNET_TOPOLOGY = "model_hp_resnet_topology" diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ssd_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ssd_tags.py new file mode 100644 index 000000000..f1a87bea7 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_ssd_tags.py @@ -0,0 +1,42 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Keys which only appear in SSD. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +# Pretrained classifer model +BACKBONE = "backbone" + +FEATURE_SIZES = "feature_sizes" +STEPS = "steps" +SCALES = "scales" +ASPECT_RATIOS = "aspect_ratios" +NUM_DEFAULTS_PER_CELL = "num_defaults_per_cell" +LOC_CONF_OUT_CHANNELS = "loc_conf_out_channels" +NUM_DEFAULTS = "num_default_boxes" + +# Overlap threshold for NMS +NMS_THRESHOLD = "nms_threshold" +NMS_MAX_DETECTIONS = "nms_max_detections" + +# data pipeline +NUM_CROPPING_ITERATIONS = "num_cropping_iterations" +RANDOM_FLIP_PROBABILITY = "random_flip_probability" +DATA_NORMALIZATION_MEAN = "data_normalization_mean" +DATA_NORMALIZATION_STD = "data_normalization_std" diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_transformer_tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_transformer_tags.py new file mode 100644 index 000000000..7355828cf --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/_transformer_tags.py @@ -0,0 +1,35 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keys which only appear in transformer. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +INPUT_MAX_LENGTH = "input_max_length" + +MODEL_HP_INITIALIZER_GAIN = "model_hp_initializer_gain" +MODEL_HP_VOCAB_SIZE = "model_hp_vocab_size" +MODEL_HP_NUM_HIDDEN_LAYERS = "model_hp_hidden_layers" +MODEL_HP_EMBEDDING_SHARED_WEIGHTS = "model_hp_embedding_shared_weights" +MODEL_HP_ATTENTION_DENSE = "model_hp_attention_dense" +MODEL_HP_ATTENTION_DROPOUT = "model_hp_attention_dropout" +MODEL_HP_FFN_OUTPUT_DENSE = "model_hp_ffn_output_dense" +MODEL_HP_FFN_FILTER_DENSE = "model_hp_ffn_filter_dense" +MODEL_HP_RELU_DROPOUT = "model_hp_relu_dropout" +MODEL_HP_LAYER_POSTPROCESS_DROPOUT = "model_hp_layer_postprocess_dropout" +MODEL_HP_NORM = "model_hp_norm" +MODEL_HP_SEQ_BEAM_SEARCH = "model_hp_sequence_beam_search" diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/mlperf_log.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/mlperf_log.py new file mode 100644 index 000000000..afe340ba0 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/mlperf_log.py @@ -0,0 +1,202 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convenience function for logging compliance tags to stdout. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import inspect +import logging +import json +import os +import re +import sys +import time +import uuid + +from mlperf_compliance.tags import * + +ROOT_DIR_GNMT = None +ROOT_DIR_MASKRCNN = None +ROOT_DIR_MINIGO = None +ROOT_DIR_NCF = None + +# Set by imagenet_main.py +ROOT_DIR_RESNET = None + +ROOT_DIR_SSD = None + +# Set by transformer_main.py and process_data.py +ROOT_DIR_TRANSFORMER = None + + +PATTERN = re.compile('[a-zA-Z0-9]+') + +LOG_FILE = os.getenv("COMPLIANCE_FILE") +# create logger with 'spam_application' +LOGGER = logging.getLogger('mlperf_compliance') +LOGGER.setLevel(logging.DEBUG) + +_STREAM_HANDLER = logging.StreamHandler(stream=sys.stdout) +_STREAM_HANDLER.setLevel(logging.INFO) +LOGGER.addHandler(_STREAM_HANDLER) + +if LOG_FILE: + _FILE_HANDLER = logging.FileHandler(LOG_FILE) + _FILE_HANDLER.setLevel(logging.DEBUG) + LOGGER.addHandler(_FILE_HANDLER) +else: + _STREAM_HANDLER.setLevel(logging.DEBUG) + + + +def get_caller(stack_index=2, root_dir=None): + ''' Returns file.py:lineno of your caller. A stack_index of 2 will provide + the caller of the function calling this function. Notice that stack_index + of 2 or more will fail if called from global scope. ''' + caller = inspect.getframeinfo(inspect.stack()[stack_index][0]) + + # Trim the filenames for readability. + filename = caller.filename + if root_dir is not None: + filename = re.sub("^" + root_dir + "/", "", filename) + return "%s:%d" % (filename, caller.lineno) + + +def _mlperf_print(key, value=None, benchmark=None, stack_offset=0, + tag_set=None, deferred=False, root_dir=None, + extra_print=False, prefix=""): + ''' Prints out an MLPerf Log Line. + + key: The MLPerf log key such as 'CLOCK' or 'QUALITY'. See the list of log keys in the spec. + value: The value which contains no newlines. + benchmark: The short code for the benchmark being run, see the MLPerf log spec. + stack_offset: Increase the value to go deeper into the stack to find the callsite. For example, if this + is being called by a wraper/helper you may want to set stack_offset=1 to use the callsite + of the wraper/helper itself. + tag_set: The set of tags in which key must belong. + deferred: The value is not presently known. In that case, a unique ID will + be assigned as the value of this call and will be returned. The + caller can then include said unique ID when the value is known + later. + root_dir: Directory prefix which will be trimmed when reporting calling file + for compliance logging. + extra_print: Print a blank line before logging to clear any text in the line. + prefix: String with which to prefix the log message. Useful for + differentiating raw lines if stitching will be required. + + Example output: + :::MLP-1537375353 MINGO[17] (eval.py:42) QUALITY: 43.7 + ''' + + return_value = None + + if (tag_set is None and not PATTERN.match(key)) or key not in tag_set: + raise ValueError('Invalid key for MLPerf print: ' + str(key)) + + if value is not None and deferred: + raise ValueError("deferred is set to True, but a value was provided") + + if deferred: + return_value = str(uuid.uuid4()) + value = "DEFERRED: {}".format(return_value) + + if value is None: + tag = key + else: + str_json = json.dumps(value) + tag = '{key}: {value}'.format(key=key, value=str_json) + + callsite = get_caller(2 + stack_offset, root_dir=root_dir) + now = time.time() + + message = '{prefix}:::MLPv0.5.0 {benchmark} {secs:.9f} ({callsite}) {tag}'.format( + prefix=prefix, secs=now, benchmark=benchmark, callsite=callsite, tag=tag) + + if extra_print: + print() # There could be prior text on a line + + if tag in STDOUT_TAG_SET: + LOGGER.info(message) + else: + LOGGER.debug(message) + + return return_value + + +GNMT_TAG_SET = set(GNMT_TAGS) +def gnmt_print(key, value=None, stack_offset=1, deferred=False, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=GNMT, + stack_offset=stack_offset, tag_set=GNMT_TAG_SET, + deferred=deferred, root_dir=ROOT_DIR_GNMT) + + +MASKRCNN_TAG_SET = set(MASKRCNN_TAGS) +def maskrcnn_print(key, value=None, stack_offset=1, deferred=False, + extra_print=True, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=MASKRCNN, + stack_offset=stack_offset, tag_set=MASKRCNN_TAG_SET, + deferred=deferred, extra_print=extra_print, + root_dir=ROOT_DIR_MASKRCNN, prefix=prefix) + + +MINIGO_TAG_SET = set(MINIGO_TAGS) +def minigo_print(key, value=None, stack_offset=1, deferred=False, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=MINIGO, + stack_offset=stack_offset, tag_set=MINIGO_TAG_SET, + deferred=deferred, root_dir=ROOT_DIR_MINIGO, + prefix=prefix) + + +NCF_TAG_SET = set(NCF_TAGS) +def ncf_print(key, value=None, stack_offset=1, deferred=False, + extra_print=True, prefix=""): + # Extra print is needed for the reference NCF because of tqdm. + return _mlperf_print(key=key, value=value, benchmark=NCF, + stack_offset=stack_offset, tag_set=NCF_TAG_SET, + deferred=deferred, extra_print=extra_print, + root_dir=ROOT_DIR_NCF, prefix=prefix) + + +RESNET_TAG_SET = set(RESNET_TAGS) +def resnet_print(key, value=None, stack_offset=1, deferred=False, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=RESNET, + stack_offset=stack_offset, tag_set=RESNET_TAG_SET, + deferred=deferred, root_dir=ROOT_DIR_RESNET, + prefix=prefix) + + +SSD_TAG_SET = set(SSD_TAGS) +def ssd_print(key, value=None, stack_offset=1, deferred=False, + extra_print=True, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=SSD, + stack_offset=stack_offset, tag_set=SSD_TAG_SET, + deferred=deferred, extra_print=extra_print, + root_dir=ROOT_DIR_SSD, prefix=prefix) + + +TRANSFORMER_TAG_SET = set(TRANSFORMER_TAGS) +def transformer_print(key, value=None, stack_offset=1, deferred=False, prefix=""): + return _mlperf_print(key=key, value=value, benchmark=TRANSFORMER, + stack_offset=stack_offset, tag_set=TRANSFORMER_TAG_SET, + deferred=deferred, root_dir=ROOT_DIR_TRANSFORMER, + prefix=prefix) + + +if __name__ == '__main__': + ncf_print(EVAL_ACCURACY, {'epoch': 7, 'accuracy': 43.7}) + ncf_print(INPUT_SIZE, 1024) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/resnet_log_helper.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/resnet_log_helper.py new file mode 100644 index 000000000..eb7ff8e7a --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/resnet_log_helper.py @@ -0,0 +1,84 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convenience functions for logging ResNet topology. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from mlperf_compliance import mlperf_log + +_STACK_OFFSET = 2 + +def _get_shape(input_tensor): + return "({})".format(", ".join( + [str(i) for i in input_tensor.shape.as_list()[1:]])) + + +def _in_out_shape(input_tensor, output_tensor): + return "{} -> {}".format( _get_shape(input_tensor), _get_shape(output_tensor)) + + +def log_max_pool(input_tensor, output_tensor): + mlperf_log.resnet_print( + key=mlperf_log.MODEL_HP_INITIAL_MAX_POOL, value=_in_out_shape( + input_tensor=input_tensor, output_tensor=output_tensor), + stack_offset=_STACK_OFFSET) + + +def log_begin_block(input_tensor, block_type): + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_BEGIN_BLOCK, + value={"block_type": block_type}, + stack_offset=_STACK_OFFSET) + mlperf_log.resnet_print( + key=mlperf_log.MODEL_HP_RESNET_TOPOLOGY, + value=" Block Input: {}".format(_get_shape(input_tensor)), + stack_offset=_STACK_OFFSET) + + +def log_end_block(output_tensor): + mlperf_log.resnet_print( + key=mlperf_log.MODEL_HP_END_BLOCK, + value=" Block Output: {}".format(_get_shape(output_tensor)), + stack_offset=_STACK_OFFSET) + + +def log_projection(input_tensor, output_tensor): + mlperf_log.resnet_print( + key=mlperf_log.MODEL_HP_PROJECTION_SHORTCUT, + value=_in_out_shape(input_tensor, output_tensor), + stack_offset=_STACK_OFFSET) + + +def log_conv2d(input_tensor, output_tensor, stride, filters, initializer, + use_bias): + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_CONV2D_FIXED_PADDING, + value=_in_out_shape(input_tensor, output_tensor), + stack_offset=_STACK_OFFSET) + mlperf_log.resnet_print( + key=mlperf_log.MODEL_HP_CONV2D_FIXED_PADDING, + value={"stride": stride, "filters": filters, "initializer": initializer, + "use_bias": use_bias}, + stack_offset=_STACK_OFFSET) + + +def log_batch_norm(input_tensor, output_tensor, momentum, epsilon, center, + scale, training): + assert _get_shape(input_tensor) == _get_shape(output_tensor) + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_BATCH_NORM, value={ + "shape": _get_shape(input_tensor), "momentum": momentum, "epsilon": epsilon, + "center": center, "scale": scale, "training": training}, + stack_offset=_STACK_OFFSET) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tags.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tags.py new file mode 100644 index 000000000..94bd4bb27 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tags.py @@ -0,0 +1,620 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Master list of MLPerf tags to be logged for benchmark submissions. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from mlperf_compliance._gnmt_tags import * +from mlperf_compliance._ncf_tags import * +from mlperf_compliance._resnet_tags import * +from mlperf_compliance._ssd_tags import * +from mlperf_compliance._transformer_tags import * +from mlperf_compliance._maskrcnn_tags import * + +# ============================================================================== +# == Benchmarks ================================================================ +# ============================================================================== + +# rnn_translator +GNMT = "gnmt" + +# reinforcement/ +MINIGO = "minigo" + +# recommendation/ +NCF = "ncf" + +# image_classification/ +RESNET = "resnet" + +# single_stage_detector/ +SSD = "ssd" + +# object_detection/ +MASKRCNN = "maskrcnn" + +# translation/ +TRANSFORMER = "transformer" + +# ============================================================================== +# == Tags ====================================================================== +# ============================================================================== +""" +Tags may be used by all models, a subset of models, or only one model. A +specification for which models require which tags can be found below the tag +definitions. +""" + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# All models: Tags which should appear in absolutely every MLPerf model. +# ////////////////////////////////////////////////////////////////////////////// + +# This tag signals to start the timer. Emission of this tag need not be (and +# generally will not be) the first part of a submission script. Rather, this +# tag must be emitted prior to performing any work which the MLPerf rules +# state must be timed. This tag is generally emitted directly before the first +# step which invokes random number generation or the first step which must be +# performed on the system under test. (Whichever comes first.) If clarification +# is needed, please file an issue under: +# https://github.com/mlperf/policies +RUN_START = "run_start" + +# This tag signals that a submission has reached the relevant stopping criteria, +# and has completed all tasks which are performed in the reference. The wall +# time for a submission will be computed as the difference between the time +# when this tag is emitted and the time whe the RUN_START is emitted. +RUN_STOP = "run_stop" + +# This tag should be emitted immediately before ending a run, and should be the +# last tag emitted. This tag should indicate the completion of untimed post +# processing work such as system specific cleanup. +RUN_FINAL = "run_final" + + +# Emit this tag in the place(s) where random seeds are set. +RUN_SET_RANDOM_SEED = "run_set_random_seed" + +# Emit this tag when training data has been purged from volatile caches prior +# to run start. +RUN_CLEAR_CACHES = "run_clear_caches" + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Common Values: Constants which are expected to be reported across many models. +# These values are included for convenience. +# ////////////////////////////////////////////////////////////////////////////// +BCE = "binary_cross_entropy" +CCE = "categorical_cross_entropy" + +SGD = "stochastic_gradient_descent" + +# Some conventions distinguish between "vanilla" SGD and SGD with momentum +# (where vanilla SGD would be the specific case of momentum=0) +SGD_WITH_MOMENTUM = "stochastic_gradient_descent_with_momentum" + +ADAM = "adam" +LAZY_ADAM = "lazy_adam" + +TRUNCATED_NORMAL = "truncated_normal" + +RELU = "relu" + + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Preprocessing: Tags for generic preprocessing steps +# ////////////////////////////////////////////////////////////////////////////// + +# The number of training examples in a single epoch +PREPROC_NUM_TRAIN_EXAMPLES = "preproc_num_train_examples" + +# The number of evaluation examples in a single epoch +PREPROC_NUM_EVAL_EXAMPLES = "preproc_num_eval_examples" + +# This tag is used to declare what part of code tokenizes the training data. +PREPROC_TOKENIZE_TRAINING = "preproc_tokenize_training" + +# This tag is used to declare what part of code tokenizes the evaluation data. +PREPROC_TOKENIZE_EVAL = "preproc_tokenize_eval" + +# The vocabulary size used for tokenization. +PREPROC_VOCAB_SIZE = "preproc_vocab_size" + + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Input: Tags for the timed portion of the data input pipeline +# ////////////////////////////////////////////////////////////////////////////// + +# The number of examples in the training portion of the data pipeline. Generally +# this should match PREPROC_NUM_TRAIN_EXAMPLES. If it does not (for instance +# if certain examples are dropped in compliance with MLPerf rules), the +# call which declares this tag is a good place for a comment stating why the +# disparity is expected. +INPUT_SIZE = "input_size" + +# The size of a training minibatch size. If this value is variable, please emit +# "-1" and then log an implementation specific characterization of the batch +# size which is a reasonable analog to the reference. (For instance log that +# all but the last batch has size 64, and the last batch is a partial batch) +INPUT_BATCH_SIZE = "input_batch_size" + +# This tag indicates where the location of the code which defines the order in +# which training examples are traversed. It is not necessary to describe the +# method in the tag emission (though comments are always welcome). Rather, this +# should simply provide a good starting point to an interested party. +INPUT_ORDER = "input_order" + +# The shard size (in items) when shuffling in the input pipeline. +INPUT_SHARD = "input_shard" + +# The number of samples iver which BN stats are computed for normalization during training +INPUT_BN_SPAN = "input_bn_span" + + +# -------------------------------------- +# -- Data Augmentation and Alteration -- +# -------------------------------------- + +# ResNet random cropping +INPUT_CENTRAL_CROP = "input_central_crop" + +INPUT_CROP_USES_BBOXES = "input_crop_uses_bboxes" + +INPUT_DISTORTED_CROP_MIN_OBJ_COV = "input_distorted_crop_min_object_covered" +INPUT_DISTORTED_CROP_RATIO_RANGE = "input_distorted_crop_aspect_ratio_range" +INPUT_DISTORTED_CROP_AREA_RANGE = "input_distorted_crop_area_range" +INPUT_DISTORTED_CROP_MAX_ATTEMPTS = "input_distorted_crop_max_attempts" + +INPUT_MEAN_SUBTRACTION = "input_mean_subtraction" + +# Random flip of an image for data augmentation +INPUT_RANDOM_FLIP = "input_random_flip" + +INPUT_RESIZE = "input_resize" +INPUT_RESIZE_ASPECT_PRESERVING = "input_resize_aspect_preserving" + + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Opt: Tags for declaring optimizer specific information. Submissions should +# declare and log explicit values rather than relying on defaults. +# ////////////////////////////////////////////////////////////////////////////// + +# The name of the optimizer used. (SGD, Adam, etc.) +OPT_NAME = "opt_name" + +OPT_LR = "opt_learning_rate" +OPT_MOMENTUM = "opt_momentum" + +OPT_WEIGHT_DECAY = "opt_weight_decay" + +# beta1, beta2, and epsilon are optimizer hyperparameters associated with the +# Adam optimizer and its variants (e.g. LazyAdam). +OPT_HP_ADAM_BETA1 = "opt_hp_Adam_beta1" +OPT_HP_ADAM_BETA2 = "opt_hp_Adam_beta2" +OPT_HP_ADAM_EPSILON = "opt_hp_Adam_epsilon" + +# The number of warm-up steps (SGD). +OPT_LR_WARMUP_STEPS = "opt_learning_rate_warmup_steps" + + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Train: Tags for control flow during model training. +# ////////////////////////////////////////////////////////////////////////////// + +# This tag is emitted when a model first enters its training loop. This is not +# necessarily when it begins to apply gradients; rather, it should be placed at +# a location which logically partitions the submission code. +TRAIN_LOOP = "train_loop" + +# The current epoch as said epoch begins training. +TRAIN_EPOCH = "train_epoch" + +# This tag is used to indicate approximately where checkpoints are written. Some +# frameworks abstract away checkpoint saving; in such cases simply choose a +# logical place in the code which signals that the framework has been instructed +# to save checkpoints, along with an explanatory comment. +TRAIN_CHECKPOINT = "train_checkpoint" + + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Eval: Tags for control flow during model evaluation. +# ////////////////////////////////////////////////////////////////////////////// + +# This tag should be emitted whenever the submission begins an evaluation pass +# for a given set of weights. +EVAL_START = "eval_start" + +# The number of examples on which evaluation is performed. +EVAL_SIZE = "eval_size" + +# The target quality at which the model may stop training. +EVAL_TARGET = "eval_target" + +# The observed accuracy of the model at a given epoch. +EVAL_ACCURACY = "eval_accuracy" + +# This tag should be emitted whenever the submission ends an evaluation pass +# for a given set of weights. +EVAL_STOP = "eval_stop" + + +# The observed accuracy of the model at a given iteration. This is only for +# models which evaluate at certain iterations instead of epochs. +EVAL_ITERATION_ACCURACY = "eval_iteration_accuracy" + +# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ +# Model: Tags for logging topology specific information. +# ////////////////////////////////////////////////////////////////////////////// + +# The loss function (cross entropy, squared error, etc.) used by the model. For +# more exotic loss functions such as those encountered in object detection +# models, additional benchmark specific subcomponents should also be logged. +MODEL_HP_LOSS_FN = "model_hp_loss_fn" + +MODEL_HP_INITIAL_SHAPE = "model_hp_initial_shape" +MODEL_HP_FINAL_SHAPE = "model_hp_final_shape" + +MODEL_L2_REGULARIZATION = "model_l2_regularization" +MODEL_EXCLUDE_BN_FROM_L2 = "model_exclude_bn_from_l2" + +MODEL_HP_RELU = "model_hp_relu" +MODEL_HP_CONV2D_FIXED_PADDING = "model_hp_conv2d_fixed_padding" +MODEL_HP_BATCH_NORM = "model_hp_batch_norm" +MODEL_HP_DENSE = "model_hp_dense" + + +# ============================================================================== +# == Stdout tags =============================================================== +# ============================================================================== + +# These tags are always logged to stdout. The rest will be logged to a file if +# one is available. +STDOUT_TAG_SET = { + RUN_START, + RUN_STOP, + RUN_FINAL, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, +} + + +# ============================================================================== +# == Benchmark tag sets ======================================================== +# ============================================================================== +ALL_USED_TAGS = set() + +GNMT_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_SET_RANDOM_SEED, + RUN_CLEAR_CACHES, + + PREPROC_VOCAB_SIZE, + PREPROC_TOKENIZE_TRAINING, + PREPROC_TOKENIZE_EVAL, + PREPROC_NUM_TRAIN_EXAMPLES, + PREPROC_NUM_EVAL_EXAMPLES, + + INPUT_SIZE, + INPUT_BATCH_SIZE, + INPUT_ORDER, + INPUT_SHARD, + + OPT_NAME, + OPT_LR, + OPT_LR_WARMUP_STEPS, + OPT_HP_ADAM_BETA1, + OPT_HP_ADAM_BETA2, + OPT_HP_ADAM_EPSILON, + + TRAIN_LOOP, + TRAIN_EPOCH, + TRAIN_CHECKPOINT, + TRAIN_HP_MAX_SEQ_LEN, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + EVAL_HP_BEAM_SIZE, + EVAL_HP_MAX_SEQ_LEN, + EVAL_HP_LEN_NORM_CONST, + EVAL_HP_LEN_NORM_FACTOR, + EVAL_HP_COV_PENALTY_FACTOR, + + MODEL_HP_LOSS_FN, + MODEL_HP_LOSS_SMOOTHING, + MODEL_HP_NUM_LAYERS, + MODEL_HP_HIDDEN_SIZE, + MODEL_HP_DROPOUT +) + +MASKRCNN_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_CLEAR_CACHES, + + INPUT_SIZE, + INPUT_BATCH_SIZE, + GLOBAL_BATCH_SIZE, + INPUT_ORDER, + INPUT_SHARD, + + BACKBONE, + NMS_THRESHOLD, + NMS_MAX_DETECTIONS, + + OPT_NAME, + OPT_LR, + OPT_LR_WARMUP_STEPS, + OPT_MOMENTUM, + OPT_WEIGHT_DECAY, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + INPUT_MEAN_SUBTRACTION, + INPUT_NORMALIZATION_STD, + INPUT_RESIZE, + INPUT_RESIZE_ASPECT_PRESERVING, + MIN_IMAGE_SIZE, + MAX_IMAGE_SIZE, + RUN_SET_RANDOM_SEED, + INPUT_RANDOM_FLIP, + RANDOM_FLIP_PROBABILITY, + FG_IOU_THRESHOLD, + BG_IOU_THRESHOLD, + RPN_PRE_NMS_TOP_N_TRAIN, + RPN_PRE_NMS_TOP_N_TEST, + RPN_POST_NMS_TOP_N_TRAIN, + RPN_POST_NMS_TOP_N_TEST, + BATCH_SIZE_TEST, + ASPECT_RATIOS, +) + +MINIGO_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_SET_RANDOM_SEED, + RUN_CLEAR_CACHES, + + INPUT_SHARD, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, +) + +NCF_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_CLEAR_CACHES, + + PREPROC_HP_MIN_RATINGS, + PREPROC_HP_NUM_EVAL, + PREPROC_HP_SAMPLE_EVAL_REPLACEMENT, + + INPUT_SIZE, + INPUT_BATCH_SIZE, + INPUT_ORDER, + INPUT_SHARD, + INPUT_HP_NUM_NEG, + INPUT_HP_SAMPLE_TRAIN_REPLACEMENT, + INPUT_STEP_TRAIN_NEG_GEN, + INPUT_STEP_EVAL_NEG_GEN, + + OPT_NAME, + OPT_LR, + OPT_HP_ADAM_BETA1, + OPT_HP_ADAM_BETA2, + OPT_HP_ADAM_EPSILON, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + EVAL_HP_NUM_USERS, + EVAL_HP_NUM_NEG, + + MODEL_HP_LOSS_FN, + MODEL_HP_MF_DIM, + MODEL_HP_MLP_LAYER_SIZES, +) + +RESNET_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_SET_RANDOM_SEED, + RUN_CLEAR_CACHES, + + PREPROC_NUM_TRAIN_EXAMPLES, + PREPROC_NUM_EVAL_EXAMPLES, + + INPUT_SIZE, + INPUT_BATCH_SIZE, + INPUT_ORDER, + INPUT_SHARD, + INPUT_CENTRAL_CROP, + INPUT_CROP_USES_BBOXES, + INPUT_DISTORTED_CROP_MIN_OBJ_COV, + INPUT_DISTORTED_CROP_RATIO_RANGE, + INPUT_DISTORTED_CROP_AREA_RANGE, + INPUT_DISTORTED_CROP_MAX_ATTEMPTS, + INPUT_MEAN_SUBTRACTION, + INPUT_RANDOM_FLIP, + INPUT_RESIZE, + INPUT_RESIZE_ASPECT_PRESERVING, + INPUT_BN_SPAN, + + OPT_NAME, + OPT_LR, + OPT_LR_WARMUP_STEPS, + OPT_MOMENTUM, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + EVAL_EPOCH_OFFSET, + + MODEL_HP_LOSS_FN, + MODEL_L2_REGULARIZATION, + MODEL_EXCLUDE_BN_FROM_L2, + + MODEL_HP_INITIAL_SHAPE, + MODEL_HP_FINAL_SHAPE, + MODEL_HP_INITIAL_MAX_POOL, + MODEL_HP_BEGIN_BLOCK, + MODEL_HP_END_BLOCK, + MODEL_HP_BLOCK_TYPE, + MODEL_HP_PROJECTION_SHORTCUT, + MODEL_HP_SHORTCUT_ADD, + MODEL_HP_RELU, + MODEL_HP_CONV2D_FIXED_PADDING, + MODEL_HP_BATCH_NORM, + MODEL_HP_DENSE, + MODEL_HP_RESNET_TOPOLOGY, +) + +SSD_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_CLEAR_CACHES, + + INPUT_SIZE, + INPUT_BATCH_SIZE, + INPUT_ORDER, + INPUT_SHARD, + INPUT_BN_SPAN, + + BACKBONE, + FEATURE_SIZES, + STEPS, + SCALES, + ASPECT_RATIOS, + NUM_DEFAULTS_PER_CELL, + LOC_CONF_OUT_CHANNELS, + NUM_DEFAULTS, + NMS_THRESHOLD, + NMS_MAX_DETECTIONS, + + NUM_CROPPING_ITERATIONS, + RANDOM_FLIP_PROBABILITY, + DATA_NORMALIZATION_MEAN, + DATA_NORMALIZATION_STD, + + OPT_NAME, + OPT_LR, + OPT_MOMENTUM, + OPT_WEIGHT_DECAY, + OPT_LR_WARMUP_STEPS, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_SIZE, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + EVAL_ITERATION_ACCURACY, +) + +TRANSFORMER_TAGS = ( + RUN_START, + RUN_STOP, + RUN_FINAL, + RUN_SET_RANDOM_SEED, + RUN_CLEAR_CACHES, + + PREPROC_NUM_TRAIN_EXAMPLES, + PREPROC_NUM_EVAL_EXAMPLES, + PREPROC_TOKENIZE_TRAINING, + PREPROC_TOKENIZE_EVAL, + PREPROC_VOCAB_SIZE, + + INPUT_BATCH_SIZE, + INPUT_MAX_LENGTH, + INPUT_ORDER, + INPUT_SHARD, + + OPT_NAME, + OPT_LR, + OPT_LR_WARMUP_STEPS, + OPT_HP_ADAM_BETA1, + OPT_HP_ADAM_BETA2, + OPT_HP_ADAM_EPSILON, + + TRAIN_LOOP, + TRAIN_EPOCH, + + EVAL_START, + EVAL_TARGET, + EVAL_ACCURACY, + EVAL_STOP, + + MODEL_HP_INITIALIZER_GAIN, + MODEL_HP_VOCAB_SIZE, + MODEL_HP_NUM_HIDDEN_LAYERS, + MODEL_HP_EMBEDDING_SHARED_WEIGHTS, + MODEL_HP_ATTENTION_DENSE, + MODEL_HP_ATTENTION_DROPOUT, + MODEL_HP_FFN_OUTPUT_DENSE, + MODEL_HP_FFN_FILTER_DENSE, + MODEL_HP_RELU_DROPOUT, + MODEL_HP_LAYER_POSTPROCESS_DROPOUT, + MODEL_HP_NORM, + MODEL_HP_SEQ_BEAM_SEARCH, +) + +ALL_USED_TAGS.update(GNMT_TAGS) +ALL_USED_TAGS.update(MASKRCNN_TAGS) +ALL_USED_TAGS.update(NCF_TAGS) +ALL_USED_TAGS.update(RESNET_TAGS) +ALL_USED_TAGS.update(SSD_TAGS) +ALL_USED_TAGS.update(TRANSFORMER_TAGS) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/test_tag_set.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/test_tag_set.py new file mode 100644 index 000000000..7c6bcb4af --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/test_tag_set.py @@ -0,0 +1,69 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Verification script to check model tag sets. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import defaultdict +import re + +from mlperf_compliance import _gnmt_tags +from mlperf_compliance import _ncf_tags +from mlperf_compliance import _resnet_tags +from mlperf_compliance import _ssd_tags +from mlperf_compliance import _transformer_tags +from mlperf_compliance import tags + + +_MODEL_TAG_MODULES = [_gnmt_tags, _ncf_tags, _resnet_tags, _ssd_tags, + _transformer_tags] + +TAG_PATTERN = re.compile("^[A-Za-z0-9_]+$") + + +def extract_tags(module): + output = [] + for i in dir(module): + if i.startswith("_") or not isinstance(getattr(module, i), str): + continue + output.append(i) + return output + + +def check_collisions(): + defining_modules = defaultdict(list) + for module in _MODEL_TAG_MODULES: + name = module.__name__ + for tag in extract_tags(module): + defining_modules[tag].append(name) + + duplicate_defs = {k: v for k, v in defining_modules.items() if len(v) > 1} + for key in duplicate_defs.keys(): + print("Variable {} defined multiple times".format(key)) + + +def check_format(): + for tag in sorted(tags.ALL_USED_TAGS): + if not TAG_PATTERN.match(tag): + print("Malformed tag: {}".format(tag)) + + +if __name__ == "__main__": + check_collisions() + check_format() + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tf_mlperf_log.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tf_mlperf_log.py new file mode 100644 index 000000000..7dfa2aa17 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_compliance/tf_mlperf_log.py @@ -0,0 +1,95 @@ +# Copyright 2018 MLBenchmark Group. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convenience function for extracting the values for logging calls. + +Because TensorFlow generally defers computation of values to a session run call, +it is impractical to log the values of tensors when they are defined. Instead, +the definition of a tensor is logged as normal using the log function in +mlperf_log.py and a tf.print statement helper function can be used to report +the relevant values as they are computed. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import uuid + +import tensorflow as tf + + +def log_deferred(op, log_id, every_n=1, first_n=None): + """Helper method inserting compliance logging ops. + + Note: This helper is not guaranteed to be efficient, as it will insert ops + and control dependencies. If this proves to be a bottleneck, submitters + may wish to consider other methods such as extracting values from an + .events file. + + Args: + op: A tf op to be printed. + log_id: a uuid provided by the logger in mlperf_log.py + every_n: If repeat is True, with what frequency should the input op be ' + logged. If repeat is False, this argument is ignored. + first_n: Only log this many values. This arg does not interact with every_n. + The first_n refers to the first n that would have been logged. + """ + + prefix = ":::MLPv0.5.0 [{}]".format(log_id) + if not first_n is not None and first_n == 1: + return tf.compat.v1.Print(op, [tf.timestamp(), op], message=prefix, first_n=1) + + counter = tf.Variable(tf.zeros(shape=(), dtype=tf.int32) - 1, + aggregation=tf.VariableAggregation.MEAN) + increment = tf.compat.v1.assign_add(counter, 1, use_locking=True) + return tf.cond( + pred=tf.equal(tf.math.mod(increment, every_n), 0), + true_fn=lambda :tf.compat.v1.Print(op, [tf.timestamp(), op], message=prefix, + first_n=first_n), + false_fn=lambda :op + ) + + +def sum_metric(tensor, name): + sum_var = tf.compat.v1.Variable( + initial_value=tf.zeros(shape=(), dtype=tensor.dtype), + trainable=False, + collections=[ + tf.compat.v1.GraphKeys.LOCAL_VARIABLES, + tf.compat.v1.GraphKeys.METRIC_VARIABLES, + ], + name="{}_total".format(name), + aggregation=tf.VariableAggregation.SUM + ) + + update_op = tf.identity(tf.compat.v1.assign_add(sum_var, tensor)) + return tf.identity(sum_var, name=name), update_op + + +def _example(): + for kwargs in [dict(first_n=1), dict(), dict(every_n=2), + dict(first_n=2, every_n=2)]: + op = tf.compat.v1.assign_add(tf.Variable(tf.zeros(shape=(), dtype=tf.int32) - 1), 1) + op = log_deferred(op, str(uuid.uuid4()), **kwargs) + init = [tf.compat.v1.local_variables_initializer(), tf.compat.v1.global_variables_initializer()] + print("-" * 5) + with tf.compat.v1.Session().as_default() as sess: + sess.run(init) + for _ in range(6): + sess.run(op) + + +if __name__ == "__main__": + _example() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_main.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_main.py new file mode 100644 index 000000000..cbfcdeff0 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_main.py @@ -0,0 +1,351 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Runs a ResNet model on the ImageNet dataset.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import random + +import numpy.random +import tensorflow as tf # pylint: disable=g-bad-import-order + +from mlperf_compliance import mlperf_log +from mlperf_resnet import imagenet_preprocessing +from mlperf_resnet import resnet_model +from mlperf_resnet import resnet_run_loop + +# import horovod if the above resnet_run_loop indiciates MPI +if resnet_run_loop.is_mpi: + import horovod.tensorflow as hvd + +_DEFAULT_IMAGE_SIZE = 224 +_NUM_CHANNELS = 3 +_NUM_CLASSES = 1001 + +_NUM_IMAGES = { + 'train': 1281167, + 'validation': 50000, +} + +_NUM_TRAIN_FILES = 1024 +_SHUFFLE_BUFFER = 1500 + + +_BASE_LR = 0.128 + +############################################################################### +# Data processing +############################################################################### +def get_filenames(is_training, data_dir): + """Return filenames for dataset.""" + if is_training: + return [ + os.path.join(data_dir, 'train-%05d-of-01024' % i) + for i in range(_NUM_TRAIN_FILES)] + else: + return [ + os.path.join(data_dir, 'validation-%05d-of-00128' % i) + for i in range(128)] + + +def _parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields (values are included as examples): + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.io.FixedLenFeature([], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label + + +def parse_record(raw_record, is_training, dtype): + """Parses a record containing a training example of an image. + + The input record is parsed into a label and image, and the image is passed + through preprocessing steps (cropping, flipping, and so on). + + Args: + raw_record: scalar Tensor tf.string containing a serialized + Example protocol buffer. + is_training: A boolean denoting whether the input is for training. + dtype: data type to use for images/features. + + Returns: + Tuple with processed image tensor and one-hot-encoded label tensor. + """ + image_buffer, label = _parse_example_proto(raw_record) + + image = imagenet_preprocessing.preprocess_image( + image_buffer=image_buffer, + output_height=_DEFAULT_IMAGE_SIZE, + output_width=_DEFAULT_IMAGE_SIZE, + num_channels=_NUM_CHANNELS, + is_training=is_training) + image = tf.cast(image, dtype) + + return image, label + + +def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_gpus=None, + dtype=tf.float32): + """Input function which provides batches for train or eval. + + Args: + is_training: A boolean denoting whether the input is for training. + data_dir: The directory containing the input data. + batch_size: The number of samples per batch. + num_epochs: The number of epochs to repeat the dataset. + num_gpus: The number of gpus used for training. + dtype: Data type to use for images/features + + Returns: + A dataset that can be used for iteration. + """ + mlperf_log.resnet_print(key=mlperf_log.INPUT_ORDER) + filenames = get_filenames(is_training, data_dir) + dataset = tf.data.Dataset.from_tensor_slices(filenames) + + if is_training: + # Shuffle the input files + dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES) + + # Convert to individual records + dataset = dataset.flat_map(tf.data.TFRecordDataset) + + return resnet_run_loop.process_record_dataset( + dataset=dataset, + is_training=is_training, + batch_size=batch_size, + shuffle_buffer=_SHUFFLE_BUFFER, + parse_record_fn=parse_record, + num_epochs=num_epochs, + num_gpus=num_gpus, + examples_per_epoch=_NUM_IMAGES['train'] if is_training else None, + dtype=dtype + ) + + +def get_synth_input_fn(): + return resnet_run_loop.get_synth_input_fn( + _DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS, _NUM_CLASSES) + + +############################################################################### +# Running the model +############################################################################### +class ImagenetModel(resnet_model.Model): + """Model class with appropriate defaults for Imagenet data.""" + + def __init__(self, resnet_size, data_format=None, num_classes=_NUM_CLASSES, + version=resnet_model.DEFAULT_VERSION, + dtype=resnet_model.DEFAULT_DTYPE): + """These are the parameters that work for Imagenet data. + + Args: + resnet_size: The number of convolutional layers needed in the model. + data_format: Either 'channels_first' or 'channels_last', specifying which + data format to use when setting up the model. + num_classes: The number of output classes needed from the model. This + enables users to extend the same model to their own datasets. + version: Integer representing which version of the ResNet network to use. + See README for details. Valid values: [1, 2] + dtype: The TensorFlow dtype to use for calculations. + """ + + # For bigger models, we want to use "bottleneck" layers + if resnet_size < 50: + bottleneck = False + final_size = 512 + else: + bottleneck = True + final_size = 2048 + + super(ImagenetModel, self).__init__( + resnet_size=resnet_size, + bottleneck=bottleneck, + num_classes=num_classes, + num_filters=64, + kernel_size=7, + conv_stride=2, + first_pool_size=3, + first_pool_stride=2, + second_pool_size=7, + second_pool_stride=1, + block_sizes=_get_block_sizes(resnet_size), + block_strides=[1, 2, 2, 2], + final_size=final_size, + version=version, + data_format=data_format, + dtype=dtype + ) + + +def _get_block_sizes(resnet_size): + """Retrieve the size of each block_layer in the ResNet model. + + The number of block layers used for the Resnet model varies according + to the size of the model. This helper grabs the layer set we want, throwing + an error if a non-standard size has been selected. + + Args: + resnet_size: The number of convolutional layers needed in the model. + + Returns: + A list of block sizes to use in building the model. + + Raises: + KeyError: if invalid resnet_size is received. + """ + choices = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + 200: [3, 24, 36, 3] + } + + try: + return choices[resnet_size] + except KeyError: + err = ('Could not find layers for selected Resnet size.\n' + 'Size received: {}; sizes allowed: {}.'.format( + resnet_size, choices.keys())) + raise ValueError(err) + + +def imagenet_model_fn(features, labels, mode, params): + """Our model_fn for ResNet to be used with our Estimator.""" + + # Warmup and higher lr may not be valid for fine tuning with small batches + # and smaller numbers of training images. + if params['fine_tune']: + base_lr = .1 + else: + base_lr = .128 + + num_workers = 1 if resnet_run_loop.is_mpi == 0 else hvd.size() + global_batch_size = params['batch_size'] * num_workers + learning_rate_fn = resnet_run_loop.learning_rate_with_decay( + batch_size=global_batch_size, batch_denom=256, + num_images=_NUM_IMAGES['train'], boundary_epochs=[30, 60, 80, 90], + decay_rates=[1, 0.1, 0.01, 0.001, 1e-4], base_lr=_BASE_LR, + enable_lars=params['enable_lars']) + + return resnet_run_loop.resnet_model_fn( + features=features, + labels=labels, + mode=mode, + model_class=ImagenetModel, + resnet_size=params['resnet_size'], + weight_decay=params['weight_decay'], + learning_rate_fn=learning_rate_fn, + momentum=0.9, + data_format=params['data_format'], + version=params['version'], + loss_scale=params['loss_scale'], + loss_filter_fn=None, + dtype=params['dtype'], + label_smoothing=params['label_smoothing'], + enable_lars=params['enable_lars'], + use_bfloat16=params['use_bfloat16'] + ) + + +def main(argv): + parser = resnet_run_loop.ResnetArgParser( + resnet_size_choices=[18, 34, 50, 101, 152, 200]) + + parser.set_defaults( + train_epochs=90, + version=1 + ) + + flags = parser.parse_args(args=argv[2:]) + + seed = int(argv[1]) + print('Setting random seed = ', seed) + print('special seeding') + mlperf_log.resnet_print(key=mlperf_log.RUN_SET_RANDOM_SEED, value=seed) + random.seed(seed) + tf.compat.v1.set_random_seed(seed) + numpy.random.seed(seed) + + mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_TRAIN_EXAMPLES, + value=_NUM_IMAGES['train']) + mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_EVAL_EXAMPLES, + value=_NUM_IMAGES['validation']) + input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn + + resnet_run_loop.resnet_main(seed, + flags, imagenet_model_fn, input_function, + shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS]) + + +if __name__ == '__main__': + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + mlperf_log.ROOT_DIR_RESNET = os.path.split(os.path.abspath(__file__))[0] + main(argv=sys.argv) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_preprocessing.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_preprocessing.py new file mode 100644 index 000000000..1fa0dea62 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/imagenet_preprocessing.py @@ -0,0 +1,290 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images. + +Training images are sampled using the provided bounding boxes, and subsequently +cropped to the sampled bounding box. Images are additionally flipped randomly, +then resized to the target output size (without aspect-ratio preservation). + +Images used during evaluation are resized (with aspect-ratio preservation) and +centrally cropped. + +All images undergo mean color subtraction. + +Note that these steps are colloquially referred to as "ResNet preprocessing," +and they differ from "VGG preprocessing," which does not use bounding boxes +and instead does an aspect-preserving resize followed by random crop during +training. (These both differ from "Inception preprocessing," which introduces +color distortion steps.) + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from mlperf_compliance import mlperf_log + + +_R_MEAN = 123.68 +_G_MEAN = 116.78 +_B_MEAN = 103.94 +_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN] + +# The lower bound for the smallest side of the image for aspect-preserving +# resizing. For example, if an image is 500 x 1000, it will be resized to +# _RESIZE_MIN x (_RESIZE_MIN * 2). +_RESIZE_MIN = 256 + + +def _decode_crop_and_flip(image_buffer, num_channels): + """Crops the given image to a random part of the image, and randomly flips. + + We use the fused decode_and_crop op, which performs better than the two ops + used separately in series, but note that this requires that the image be + passed in as an un-decoded string Tensor. + + Args: + image_buffer: scalar string Tensor representing the raw JPEG image buffer. + num_channels: Integer depth of the image buffer for decoding. + + Returns: + 3-D tensor with cropped image. + + """ + # A large fraction of image datasets contain a human-annotated bounding box + # delineating the region of the image containing the object of interest. We + # choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + + min_object_covered=0.1 + aspect_ratio_range=[0.75, 1.33] + area_range=[0.05, 1.0] + max_attempts=100 + + mlperf_log.resnet_print(key=mlperf_log.INPUT_DISTORTED_CROP_MIN_OBJ_COV, + value=min_object_covered) + mlperf_log.resnet_print(key=mlperf_log.INPUT_DISTORTED_CROP_RATIO_RANGE, + value=aspect_ratio_range) + mlperf_log.resnet_print(key=mlperf_log.INPUT_DISTORTED_CROP_AREA_RANGE, + value=area_range) + mlperf_log.resnet_print(key=mlperf_log.INPUT_DISTORTED_CROP_MAX_ATTEMPTS, + value=max_attempts) + mlperf_log.resnet_print(key=mlperf_log.INPUT_CROP_USES_BBOXES, value=False) + + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, shape=[1, 1, 4]) #From the entire image + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + image_size=tf.image.extract_jpeg_shape(image_buffer), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + + # Reassemble the bounding box in the format the crop op requires. + offset_y, offset_x, _ = tf.unstack(bbox_begin) + target_height, target_width, _ = tf.unstack(bbox_size) + crop_window = tf.stack([offset_y, offset_x, target_height, target_width]) + + # Use the fused decode and crop op here, which is faster than each in series. + cropped = tf.image.decode_and_crop_jpeg( + image_buffer, crop_window, channels=num_channels) + + # Flip to add a little more random distortion in. + mlperf_log.resnet_print(key=mlperf_log.INPUT_RANDOM_FLIP) + cropped = tf.image.random_flip_left_right(cropped) + return cropped + + +def _central_crop(image, crop_height, crop_width): + """Performs central crops of the given image list. + + Args: + image: a 3-D image tensor + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. + + Returns: + 3-D tensor with cropped image. + """ + shape = tf.shape(input=image) + height, width = shape[0], shape[1] + + mlperf_log.resnet_print(key=mlperf_log.INPUT_CENTRAL_CROP, + value=[crop_height, crop_width]) + + amount_to_be_cropped_h = (height - crop_height) + crop_top = amount_to_be_cropped_h // 2 + amount_to_be_cropped_w = (width - crop_width) + crop_left = amount_to_be_cropped_w // 2 + return tf.slice( + image, [crop_top, crop_left, 0], [crop_height, crop_width, -1]) + + +def _mean_image_subtraction(image, means, num_channels): + """Subtracts the given means from each image channel. + + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) + + Note that the rank of `image` must be known. + + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. + num_channels: number of color channels in the image that will be distorted. + + Returns: + the centered image. + + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') + + mlperf_log.resnet_print(key=mlperf_log.INPUT_MEAN_SUBTRACTION, + value=means) + + # We have a 1-D tensor of means; convert to 3-D. + means = tf.expand_dims(tf.expand_dims(means, 0), 0) + + return image - means + + +def _smallest_size_at_least(height, width, resize_min): + """Computes new shape with the smallest side equal to `smallest_side`. + + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. + + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + resize_min: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: an int32 scalar tensor indicating the new width. + """ + resize_min = tf.cast(resize_min, tf.float32) + + # Convert to floats to make subsequent calculations go smoothly. + height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32) + + smaller_dim = tf.minimum(height, width) + scale_ratio = resize_min / smaller_dim + + # Convert back to ints to make heights and widths that TF ops will accept. + new_height = tf.cast(height * scale_ratio, tf.int32) + new_width = tf.cast(width * scale_ratio, tf.int32) + + return new_height, new_width + + +def _aspect_preserving_resize(image, resize_min): + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + resize_min: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + mlperf_log.resnet_print(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING, + value={"min": resize_min}) + + shape = tf.shape(input=image) + height, width = shape[0], shape[1] + + new_height, new_width = _smallest_size_at_least(height, width, resize_min) + + return _resize_image(image, new_height, new_width) + + +def _resize_image(image, height, width): + """Simple wrapper around tf.resize_images. + + This is primarily to make sure we use the same `ResizeMethod` and other + details each time. + + Args: + image: A 3-D image `Tensor`. + height: The target height for the resized image. + width: The target width for the resized image. + + Returns: + resized_image: A 3-D tensor containing the resized image. The first two + dimensions have the shape [height, width]. + """ + return tf.image.resize( + image, [height, width], method=tf.image.ResizeMethod.BILINEAR) + + +def preprocess_image(image_buffer, output_height, output_width, + num_channels, is_training=False): + """Preprocesses the given image. + + Preprocessing includes decoding, cropping, and resizing for both training + and eval images. Training preprocessing, however, introduces some random + distortion of the image to improve accuracy. + + Args: + image_buffer: scalar string Tensor representing the raw JPEG image buffer. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + num_channels: Integer depth of the image buffer for decoding. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + + Returns: + A preprocessed image. + """ + if is_training: + # For training, we want to randomize some of the distortions. + image = _decode_crop_and_flip(image_buffer, num_channels) + + mlperf_log.resnet_print(key=mlperf_log.INPUT_RESIZE, + value=[output_height, output_width]) + image = _resize_image(image, output_height, output_width) + else: + # For validation, we want to decode, resize, then just crop the middle. + image = tf.image.decode_jpeg(image_buffer, channels=num_channels) + image = _aspect_preserving_resize(image, _RESIZE_MIN) + + mlperf_log.resnet_print(key=mlperf_log.INPUT_RESIZE, + value=[output_height, output_width]) + image = _central_crop(image, output_height, output_width) + + image.set_shape([output_height, output_width, num_channels]) + + return _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_model.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_model.py new file mode 100644 index 000000000..68922ceb8 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_model.py @@ -0,0 +1,467 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for Residual Networks. + +Residual networks ('v1' ResNets) were originally proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +The full preactivation 'v2' ResNet variant was introduced by: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The key difference of the full preactivation 'v2' variant compared to the +'v1' variant in [1] is the use of batch normalization before every weight layer +rather than after. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from mlperf_compliance import mlperf_log +from mlperf_compliance import resnet_log_helper + + +_BATCH_NORM_DECAY = 0.9 +_BATCH_NORM_EPSILON = 1e-5 +DEFAULT_VERSION = 2 +DEFAULT_DTYPE = tf.float32 +CASTABLE_TYPES = (tf.float16,tf.bfloat16) +ALLOWED_TYPES = (DEFAULT_DTYPE,) + CASTABLE_TYPES + + +################################################################################ +# Convenience functions for building the ResNet model. +################################################################################ +def batch_norm(inputs, training, data_format): + """Performs a batch normalization using a standard set of parameters.""" + # We set fused=True for a significant performance boost. See + # https://www.tensorflow.org/performance/performance_guide#common_fused_ops + outputs = tf.compat.v1.layers.batch_normalization( + inputs=inputs, axis=1 if data_format == 'channels_first' else 3, + momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, + scale=True, training=training, fused=True) + + resnet_log_helper.log_batch_norm( + input_tensor=inputs, output_tensor=outputs, momentum=_BATCH_NORM_DECAY, + epsilon=_BATCH_NORM_EPSILON, center=True, scale=True, training=training) + + return outputs + + +def fixed_padding(inputs, kernel_size, data_format): + """Pads the input along the spatial dimensions independently of input size. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + Should be a positive integer. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + A tensor with the same format as the input with the data either intact + (if kernel_size == 1) or padded (if kernel_size > 1). + """ + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + + if data_format == 'channels_first': + padded_inputs = tf.pad(tensor=inputs, paddings=[[0, 0], [0, 0], + [pad_beg, pad_end], [pad_beg, pad_end]]) + else: + padded_inputs = tf.pad(tensor=inputs, paddings=[[0, 0], [pad_beg, pad_end], + [pad_beg, pad_end], [0, 0]]) + return padded_inputs + + +def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): + """Strided 2-D convolution with explicit padding.""" + # The padding is consistent and is based only on `kernel_size`, not on the + # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). + + inputs_for_logging = inputs + if strides > 1: + inputs = fixed_padding(inputs, kernel_size, data_format) + + outputs = tf.compat.v1.layers.conv2d( + inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, + padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, + kernel_initializer=tf.compat.v1.variance_scaling_initializer( + distribution="truncated_normal"), + data_format=data_format) + + resnet_log_helper.log_conv2d( + input_tensor=inputs_for_logging, output_tensor=outputs, stride=strides, + filters=filters, initializer=mlperf_log.TRUNCATED_NORMAL, use_bias=False) + + return outputs + + +################################################################################ +# ResNet block definitions. +################################################################################ +def _building_block_v1(inputs, filters, training, projection_shortcut, strides, + data_format): + raise NotImplementedError + + +def _building_block_v2(inputs, filters, training, projection_shortcut, strides, + data_format): + raise NotImplementedError + + +def _bottleneck_block_v1(inputs, filters, training, projection_shortcut, + strides, data_format): + """A single block for ResNet v1, with a bottleneck. + + Similar to _building_block_v1(), except using the "bottleneck" blocks + described in: + Convolution then batch normalization then ReLU as described by: + Deep Residual Learning for Image Recognition + https://arxiv.org/pdf/1512.03385.pdf + by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + filters: The number of filters for the convolutions. + training: A Boolean for whether the model is in training or inference + mode. Needed for batch normalization. + projection_shortcut: The function to use for projection shortcuts + (typically a 1x1 convolution when downsampling the input). + strides: The block's stride. If greater than 1, this block will ultimately + downsample the input. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + The output tensor of the block; shape should match inputs. + """ + resnet_log_helper.log_begin_block( + input_tensor=inputs, block_type=mlperf_log.BOTTLENECK_BLOCK) + + shortcut = inputs + + if projection_shortcut is not None: + shortcut = projection_shortcut(inputs) + resnet_log_helper.log_projection(input_tensor=inputs, + output_tensor=shortcut) + shortcut = batch_norm(inputs=shortcut, training=training, + data_format=data_format) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=1, strides=1, + data_format=data_format) + inputs = batch_norm(inputs, training, data_format) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) + inputs = tf.nn.relu(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=3, strides=strides, + data_format=data_format) + inputs = batch_norm(inputs, training, data_format) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) + inputs = tf.nn.relu(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, + data_format=data_format) + inputs = batch_norm(inputs, training, data_format) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_SHORTCUT_ADD) + # TODO(nhasabni): temporarily replacing Add by AddN for performance. + # Remove it later once we optimize this in graph. + inputs = tf.math.add_n([inputs, shortcut]) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) + inputs = tf.nn.relu(inputs) + + resnet_log_helper.log_end_block(output_tensor=inputs) + return inputs + + +def _bottleneck_block_v2(inputs, filters, training, projection_shortcut, + strides, data_format): + raise NotImplementedError + + +def block_layer(inputs, filters, bottleneck, block_fn, blocks, strides, + training, name, data_format): + """Creates one layer of blocks for the ResNet model. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + filters: The number of filters for the first convolution of the layer. + bottleneck: Is the block created a bottleneck block. + block_fn: The block to use within the model, either `building_block` or + `bottleneck_block`. + blocks: The number of blocks contained in the layer. + strides: The stride to use for the first convolution of the layer. If + greater than 1, this layer will ultimately downsample the input. + training: Either True or False, whether we are currently training the + model. Needed for batch norm. + name: A string name for the tensor output of the block layer. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + The output tensor of the block layer. + """ + + # Bottleneck blocks end with 4x the number of filters as they start with + filters_out = filters * 4 if bottleneck else filters + + def projection_shortcut(inputs): + return conv2d_fixed_padding( + inputs=inputs, filters=filters_out, kernel_size=1, strides=strides, + data_format=data_format) + + # Only the first block per block_layer uses projection_shortcut and strides + inputs = block_fn(inputs, filters, training, projection_shortcut, strides, + data_format) + + for _ in range(1, blocks): + inputs = block_fn(inputs, filters, training, None, 1, data_format) + + return tf.identity(inputs, name) + + +class Model(object): + """Base class for building the Resnet Model.""" + + def __init__(self, resnet_size, bottleneck, num_classes, num_filters, + kernel_size, + conv_stride, first_pool_size, first_pool_stride, + second_pool_size, second_pool_stride, block_sizes, block_strides, + final_size, version=DEFAULT_VERSION, data_format=None, + dtype=DEFAULT_DTYPE): + """Creates a model for classifying an image. + + Args: + resnet_size: A single integer for the size of the ResNet model. + bottleneck: Use regular blocks or bottleneck blocks. + num_classes: The number of classes used as labels. + num_filters: The number of filters to use for the first block layer + of the model. This number is then doubled for each subsequent block + layer. + kernel_size: The kernel size to use for convolution. + conv_stride: stride size for the initial convolutional layer + first_pool_size: Pool size to be used for the first pooling layer. + If none, the first pooling layer is skipped. + first_pool_stride: stride size for the first pooling layer. Not used + if first_pool_size is None. + second_pool_size: Pool size to be used for the second pooling layer. + second_pool_stride: stride size for the final pooling layer + block_sizes: A list containing n values, where n is the number of sets of + block layers desired. Each value should be the number of blocks in the + i-th set. + block_strides: List of integers representing the desired stride size for + each of the sets of block layers. Should be same length as block_sizes. + final_size: The expected size of the model after the second pooling. + version: Integer representing which version of the ResNet network to use. + See README for details. Valid values: [1, 2] + data_format: Input format ('channels_last', 'channels_first', or None). + If set to None, the format is dependent on whether a GPU is available. + dtype: The TensorFlow dtype to use for calculations. If not specified + tf.float32 is used. + + Raises: + ValueError: if invalid version is selected. + """ + self.resnet_size = resnet_size + + if not data_format: + data_format = ( + 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last') + + self.resnet_version = version + if version not in (1, 2): + raise ValueError( + 'Resnet version should be 1 or 2. See README for citations.') + + self.bottleneck = bottleneck + if bottleneck: + if version == 1: + self.block_fn = _bottleneck_block_v1 + else: + self.block_fn = _bottleneck_block_v2 + else: + if version == 1: + self.block_fn = _building_block_v1 + else: + self.block_fn = _building_block_v2 + + if dtype not in ALLOWED_TYPES: + raise ValueError('dtype must be one of: {}'.format(ALLOWED_TYPES)) + + self.data_format = data_format + self.num_classes = num_classes + self.num_filters = num_filters + self.kernel_size = kernel_size + self.conv_stride = conv_stride + self.first_pool_size = first_pool_size + self.first_pool_stride = first_pool_stride + self.second_pool_size = second_pool_size + self.second_pool_stride = second_pool_stride + self.block_sizes = block_sizes + self.block_strides = block_strides + self.final_size = final_size + self.dtype = dtype + self.pre_activation = version == 2 + + def _custom_dtype_getter(self, getter, name, shape=None, dtype=DEFAULT_DTYPE, + *args, **kwargs): + """Creates variables in fp32, then casts to fp16 if necessary. + + This function is a custom getter. A custom getter is a function with the + same signature as tf.get_variable, except it has an additional getter + parameter. Custom getters can be passed as the `custom_getter` parameter of + tf.variable_scope. Then, tf.get_variable will call the custom getter, + instead of directly getting a variable itself. This can be used to change + the types of variables that are retrieved with tf.get_variable. + The `getter` parameter is the underlying variable getter, that would have + been called if no custom getter was used. Custom getters typically get a + variable with `getter`, then modify it in some way. + + This custom getter will create an fp32 variable. If a low precision + (e.g. float16) variable was requested it will then cast the variable to the + requested dtype. The reason we do not directly create variables in low + precision dtypes is that applying small gradients to such variables may + cause the variable not to change. + + Args: + getter: The underlying variable getter, that has the same signature as + tf.get_variable and returns a variable. + name: The name of the variable to get. + shape: The shape of the variable to get. + dtype: The dtype of the variable to get. Note that if this is a low + precision dtype, the variable will be created as a tf.float32 variable, + then cast to the appropriate dtype + *args: Additional arguments to pass unmodified to getter. + **kwargs: Additional keyword arguments to pass unmodified to getter. + + Returns: + A variable which is cast to fp16 if necessary. + """ + + if dtype in CASTABLE_TYPES: + var = getter(name, shape, tf.float32, *args, **kwargs) + return tf.cast(var, dtype=dtype, name=name + '_cast') + else: + return getter(name, shape, dtype, *args, **kwargs) + + def _model_variable_scope(self): + """Returns a variable scope that the model should be created under. + + If self.dtype is a castable type, model variable will be created in fp32 + then cast to self.dtype before being used. + + Returns: + A variable scope for the model. + """ + + return tf.compat.v1.variable_scope('resnet_model', + custom_getter=self._custom_dtype_getter) + + def __call__(self, inputs, training): + """Add operations to classify a batch of input images. + + Args: + inputs: A Tensor representing a batch of input images. + training: A boolean. Set to True to add operations required only when + training the classifier. + + Returns: + A logits Tensor with shape [, self.num_classes]. + """ + + # Drop batch size from shape logging. + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_INITIAL_SHAPE, + value=inputs.shape.as_list()[1:]) + + with self._model_variable_scope(): + if self.data_format == 'channels_first': + # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). + # This provides a large performance boost on GPU. See + # https://www.tensorflow.org/performance/performance_guide#data_formats + inputs = tf.transpose(a=inputs, perm=[0, 3, 1, 2]) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, + strides=self.conv_stride, data_format=self.data_format) + inputs = tf.identity(inputs, 'initial_conv') + + # We do not include batch normalization or activation functions in V2 + # for the initial conv1 because the first ResNet unit will perform these + # for both the shortcut and non-shortcut paths as part of the first + # block's projection. Cf. Appendix of [2]. + if self.resnet_version == 1: + inputs = batch_norm(inputs, training, self.data_format) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) + inputs = tf.nn.relu(inputs) + + if self.first_pool_size: + pooled_inputs = tf.compat.v1.layers.max_pooling2d( + inputs=inputs, pool_size=self.first_pool_size, + strides=self.first_pool_stride, padding='SAME', + data_format=self.data_format) + resnet_log_helper.log_max_pool(input_tensor=inputs, output_tensor=pooled_inputs) + inputs = tf.identity(pooled_inputs, 'initial_max_pool') + + for i, num_blocks in enumerate(self.block_sizes): + num_filters = self.num_filters * (2**i) + inputs = block_layer( + inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, + block_fn=self.block_fn, blocks=num_blocks, + strides=self.block_strides[i], training=training, + name='block_layer{}'.format(i + 1), data_format=self.data_format) + + # Only apply the BN and ReLU for model that does pre_activation in each + # building/bottleneck block, eg resnet V2. + if self.pre_activation: + inputs = batch_norm(inputs, training, self.data_format) + + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_RELU) + inputs = tf.nn.relu(inputs) + + # The current top layer has shape + # `batch_size x pool_size x pool_size x final_size`. + # ResNet does an Average Pooling layer over pool_size, + # but that is the same as doing a reduce_mean. We do a reduce_mean + # here because it performs better than AveragePooling2D. + axes = [2, 3] if self.data_format == 'channels_first' else [1, 2] + inputs = tf.reduce_mean(input_tensor=inputs, axis=axes, keepdims=True) + inputs = tf.identity(inputs, 'final_reduce_mean') + + inputs = tf.reshape(inputs, [-1, self.final_size]) + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_DENSE, + value=self.num_classes) + inputs = tf.compat.v1.layers.dense( + inputs=inputs, + units=self.num_classes, + kernel_initializer=tf.compat.v1.random_normal_initializer(stddev=.01)) + inputs = tf.identity(inputs, 'final_dense') + + # Drop batch size from shape logging. + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_FINAL_SHAPE, + value=inputs.shape.as_list()[1:]) + return inputs diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_run_loop.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_run_loop.py new file mode 100644 index 000000000..0b9f2e78d --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_resnet/resnet_run_loop.py @@ -0,0 +1,692 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains utility and supporting functions for ResNet. + + This module contains ResNet code which does not directly build layers. This +includes dataset management, hyperparameter and optimizer code, and argument +parsing. Code for defining the ResNet layers can be found in resnet_model.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from mlperf_compliance import mlperf_log +from mlperf_compliance import tf_mlperf_log +from mlperf_resnet import resnet_model +from mlperf_utils.arg_parsers import parsers +from mlperf_utils.export import export +from mlperf_utils.logs import hooks_helper +from mlperf_utils.logs import logger +from mlperf_utils.misc import model_helpers + +global is_mpi +try: + import horovod.tensorflow as hvd + hvd.init() + is_mpi = hvd.size() +except ImportError: + is_mpi = 0 + print("No MPI horovod support, this is running in no-MPI mode!") + + +_NUM_EXAMPLES_NAME = "num_examples" +_NUM_IMAGES = { + 'train': 1281167, + 'validation': 50000 +} + + +################################################################################ +# Functions for input processing. +################################################################################ +def process_record_dataset(dataset, is_training, batch_size, shuffle_buffer, + parse_record_fn, num_epochs=1, num_gpus=None, + examples_per_epoch=None, dtype=tf.float32): + """Given a Dataset with raw records, return an iterator over the records. + + Args: + dataset: A Dataset representing raw records + is_training: A boolean denoting whether the input is for training. + batch_size: The number of samples per batch. + shuffle_buffer: The buffer size to use when shuffling records. A larger + value results in better randomness, but smaller values reduce startup + time and use less memory. + parse_record_fn: A function that takes a raw record and returns the + corresponding (image, label) pair. + num_epochs: The number of epochs to repeat the dataset. + num_gpus: The number of gpus used for training. + examples_per_epoch: The number of examples in an epoch. + dtype: Data type to use for images/features. + + Returns: + Dataset of (image, label) pairs ready for iteration. + """ + + # We prefetch a batch at a time, This can help smooth out the time taken to + # load input files as we go through shuffling and processing. + dataset = dataset.prefetch(buffer_size=batch_size) + if is_training: + if is_mpi: + dataset = dataset.shard(hvd.size(), hvd.rank()) + # Shuffle the records. Note that we shuffle before repeating to ensure + # that the shuffling respects epoch boundaries. + mlperf_log.resnet_print(key=mlperf_log.INPUT_ORDER) + dataset = dataset.shuffle(buffer_size=shuffle_buffer) + + # If we are training over multiple epochs before evaluating, repeat the + # dataset for the appropriate number of epochs. + dataset = dataset.repeat(num_epochs) + + # Parse the raw records into images and labels. Testing has shown that setting + # num_parallel_batches > 1 produces no improvement in throughput, since + # batch_size is almost always much greater than the number of CPU cores. + dataset = dataset.apply( + tf.data.experimental.map_and_batch( + lambda value: parse_record_fn(value, is_training, dtype), + batch_size=batch_size, + num_parallel_batches=1)) + + # Operations between the final prefetch and the get_next call to the iterator + # will happen synchronously during run time. We prefetch here again to + # background all of the above processing work and keep it out of the + # critical training path. Setting buffer_size to tf.contrib.data.AUTOTUNE + # allows DistributionStrategies to adjust how many batches to fetch based + # on how many devices are present. + dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + + return dataset + + +def get_synth_input_fn(height, width, num_channels, num_classes): + """Returns an input function that returns a dataset with zeroes. + + This is useful in debugging input pipeline performance, as it removes all + elements of file reading and image preprocessing. + + Args: + height: Integer height that will be used to create a fake image tensor. + width: Integer width that will be used to create a fake image tensor. + num_channels: Integer depth that will be used to create a fake image tensor. + num_classes: Number of classes that should be represented in the fake labels + tensor + + Returns: + An input_fn that can be used in place of a real one to return a dataset + that can be used for iteration. + """ + def input_fn(is_training, data_dir, batch_size, *args, **kwargs): # pylint: disable=unused-argument + images = tf.zeros((batch_size, height, width, num_channels), tf.float32) + labels = tf.zeros((batch_size, num_classes), tf.int32) + return tf.data.Dataset.from_tensors((images, labels)).repeat() + + return input_fn + + +################################################################################ +# Functions for running training/eval/validation loops for the model. +################################################################################ +def learning_rate_with_decay( + batch_size, batch_denom, num_images, boundary_epochs, decay_rates, + base_lr=0.1, enable_lars=False): + """Get a learning rate that decays step-wise as training progresses. + + Args: + batch_size: the number of examples processed in each training batch. + batch_denom: this value will be used to scale the base learning rate. + `0.1 * batch size` is divided by this number, such that when + batch_denom == batch_size, the initial learning rate will be 0.1. + num_images: total number of images that will be used for training. + boundary_epochs: list of ints representing the epochs at which we + decay the learning rate. + decay_rates: list of floats representing the decay rates to be used + for scaling the learning rate. It should have one more element + than `boundary_epochs`, and all elements should have the same type. + base_lr: Initial learning rate scaled based on batch_denom. + + Returns: + Returns a function that takes a single argument - the number of batches + trained so far (global_step)- and returns the learning rate to be used + for training the next batch. + """ + initial_learning_rate = base_lr * batch_size / batch_denom + batches_per_epoch = num_images / batch_size + + # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. + boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs] + vals = [initial_learning_rate * decay for decay in decay_rates] + + def learning_rate_fn(global_step): + lr = tf.compat.v1.train.piecewise_constant(global_step, boundaries, vals) + warmup_steps = int(batches_per_epoch * 5) + warmup_lr = ( + initial_learning_rate * tf.cast(global_step, tf.float32) / tf.cast( + warmup_steps, tf.float32)) + return tf.cond(pred=global_step < warmup_steps, true_fn=lambda: warmup_lr, false_fn=lambda: lr) + + def poly_rate_fn(global_step): + """Handles linear scaling rule, gradual warmup, and LR decay. + + The learning rate starts at 0, then it increases linearly per step. After + flags.poly_warmup_epochs, we reach the base learning rate (scaled to account + for batch size). The learning rate is then decayed using a polynomial rate + decay schedule with power 2.0. + + Args: + global_step: the current global_step + + Returns: + returns the current learning rate + """ + + # Learning rate schedule for LARS polynomial schedule + if batch_size <= 4096: + plr = 5.0 + w_epochs = 5 + elif batch_size <= 8192: + plr = 10.0 + w_epochs = 5 + elif batch_size <= 16384: + plr = 25.0 + w_epochs = 5 + else: # e.g. 32768 + plr = 33.0 + w_epochs = 25 + + w_steps = int(w_epochs * batches_per_epoch) + wrate = (plr * tf.cast(global_step, tf.float32) / tf.cast( + w_steps, tf.float32)) + + num_epochs = flags.train_epochs + train_steps = batches_per_epoch * num_epochs + + min_step = tf.constant(1, dtype=tf.int64) + decay_steps = tf.maximum(min_step, tf.subtract(global_step, w_steps)) + poly_rate = tf.compat.v1.train.polynomial_decay( + plr, + decay_steps, + train_steps - w_steps + 1, + power=2.0) + return tf.compat.v1.where(global_step <= w_steps, wrate, poly_rate) + + # For LARS we have a new learning rate schedule + if enable_lars: + return poly_rate_fn + + return learning_rate_fn + + +def resnet_model_fn(features, labels, mode, model_class, + resnet_size, weight_decay, learning_rate_fn, momentum, + data_format, version, loss_scale, loss_filter_fn=None, + dtype=resnet_model.DEFAULT_DTYPE, + label_smoothing=0.0, enable_lars=False, + use_bfloat16=False): + """Shared functionality for different resnet model_fns. + + Initializes the ResnetModel representing the model layers + and uses that model to build the necessary EstimatorSpecs for + the `mode` in question. For training, this means building losses, + the optimizer, and the train op that get passed into the EstimatorSpec. + For evaluation and prediction, the EstimatorSpec is returned without + a train op, but with the necessary parameters for the given mode. + + Args: + features: tensor representing input images + labels: tensor representing class labels for all input images + mode: current estimator mode; should be one of + `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT` + model_class: a class representing a TensorFlow model that has a __call__ + function. We assume here that this is a subclass of ResnetModel. + resnet_size: A single integer for the size of the ResNet model. + weight_decay: weight decay loss rate used to regularize learned variables. + learning_rate_fn: function that returns the current learning rate given + the current global_step + momentum: momentum term used for optimization + data_format: Input format ('channels_last', 'channels_first', or None). + If set to None, the format is dependent on whether a GPU is available. + version: Integer representing which version of the ResNet network to use. + See README for details. Valid values: [1, 2] + loss_scale: The factor to scale the loss for numerical stability. A detailed + summary is present in the arg parser help text. + loss_filter_fn: function that takes a string variable name and returns + True if the var should be included in loss calculation, and False + otherwise. If None, batch_normalization variables will be excluded + from the loss. + dtype: the TensorFlow dtype to use for calculations. + use_bfloat16: Whether to use bfloat16 type for calculations. + + Returns: + EstimatorSpec parameterized according to the input params and the + current mode. + """ + + # Generate a summary node for the images + tf.compat.v1.summary.image('images', features, max_outputs=6) + + # Checks that features/images have same data type being used for calculations. + assert features.dtype == dtype + + if use_bfloat16 == True: + dtype = tf.bfloat16 + + features = tf.cast(features, dtype) + + model = model_class(resnet_size, data_format, version=version, dtype=dtype) + + logits = model(features, mode == tf.estimator.ModeKeys.TRAIN) + + # This acts as a no-op if the logits are already in fp32 (provided logits are + # not a SparseTensor). If dtype is is low precision, logits must be cast to + # fp32 for numerical stability. + logits = tf.cast(logits, tf.float32) + + num_examples_metric = tf_mlperf_log.sum_metric(tensor=tf.shape(input=logits)[0], name=_NUM_EXAMPLES_NAME) + + predictions = { + 'classes': tf.argmax(input=logits, axis=1), + 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') + } + + + if mode == tf.estimator.ModeKeys.PREDICT: + # Return the predictions and the specification for serving a SavedModel + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + export_outputs={ + 'predict': tf.estimator.export.PredictOutput(predictions) + }) + + # Calculate loss, which includes softmax cross entropy and L2 regularization. + mlperf_log.resnet_print(key=mlperf_log.MODEL_HP_LOSS_FN, value=mlperf_log.CCE) + + if label_smoothing != 0.0: + one_hot_labels = tf.one_hot(labels, 1001) + cross_entropy = tf.compat.v1.losses.softmax_cross_entropy( + logits=logits, onehot_labels=one_hot_labels, + label_smoothing=label_smoothing) + else: + cross_entropy = tf.compat.v1.losses.sparse_softmax_cross_entropy( + logits=logits, labels=labels) + + # Create a tensor named cross_entropy for logging purposes. + tf.identity(cross_entropy, name='cross_entropy') + tf.compat.v1.summary.scalar('cross_entropy', cross_entropy) + + # If no loss_filter_fn is passed, assume we want the default behavior, + # which is that batch_normalization variables are excluded from loss. + def exclude_batch_norm(name): + return 'batch_normalization' not in name + loss_filter_fn = loss_filter_fn or exclude_batch_norm + + mlperf_log.resnet_print(key=mlperf_log.MODEL_EXCLUDE_BN_FROM_L2, + value=not loss_filter_fn('batch_normalization')) + + # Add weight decay to the loss. + mlperf_log.resnet_print(key=mlperf_log.MODEL_L2_REGULARIZATION, + value=weight_decay) + l2_loss = weight_decay * tf.add_n( + # loss is computed using fp32 for numerical stability. + [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.compat.v1.trainable_variables() + if loss_filter_fn(v.name)]) + tf.compat.v1.summary.scalar('l2_loss', l2_loss) + loss = cross_entropy + l2_loss + + if mode == tf.estimator.ModeKeys.TRAIN: + global_step = tf.compat.v1.train.get_or_create_global_step() + + learning_rate = learning_rate_fn(global_step) + + log_id = mlperf_log.resnet_print(key=mlperf_log.OPT_LR, deferred=True) + learning_rate = tf_mlperf_log.log_deferred(op=learning_rate, log_id=log_id, + every_n=100) + + # Create a tensor named learning_rate for logging purposes + tf.identity(learning_rate, name='learning_rate') + tf.compat.v1.summary.scalar('learning_rate', learning_rate) + + mlperf_log.resnet_print(key=mlperf_log.OPT_NAME, + value=mlperf_log.SGD_WITH_MOMENTUM) + mlperf_log.resnet_print(key=mlperf_log.OPT_MOMENTUM, value=momentum) + + if enable_lars: + optimizer = tf.contrib.opt.LARSOptimizer( + learning_rate, + momentum=momentum, + weight_decay=weight_decay, + skip_list=['batch_normalization', 'bias']) + else: + optimizer = tf.compat.v1.train.MomentumOptimizer( + learning_rate=learning_rate, + momentum=momentum + ) + if is_mpi: + optimizer = hvd.DistributedOptimizer(optimizer) + + if loss_scale != 1: + # When computing fp16 gradients, often intermediate tensor values are + # so small, they underflow to 0. To avoid this, we multiply the loss by + # loss_scale to make these tensor values loss_scale times bigger. + scaled_grad_vars = optimizer.compute_gradients(loss * loss_scale) + + # Once the gradient computation is complete we can scale the gradients + # back to the correct scale before passing them to the optimizer. + unscaled_grad_vars = [(grad / loss_scale, var) + for grad, var in scaled_grad_vars] + minimize_op = optimizer.apply_gradients(unscaled_grad_vars, global_step) + else: + minimize_op = optimizer.minimize(loss, global_step) + + update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) + train_op = tf.group(minimize_op, update_ops, num_examples_metric[1]) + else: + train_op = None + + accuracy = tf.compat.v1.metrics.accuracy(labels, predictions['classes']) + accuracy_top_5 = tf.compat.v1.metrics.mean(tf.nn.in_top_k(predictions=logits, + targets=labels, + k=5, + name='top_5_op')) + + metrics = {'accuracy': accuracy, + 'accuracy_top_5': accuracy_top_5, + _NUM_EXAMPLES_NAME: num_examples_metric} + + # Create a tensor named train_accuracy for logging purposes + tf.identity(accuracy[1], name='train_accuracy') + tf.identity(accuracy_top_5[1], name='train_accuracy_top_5') + tf.compat.v1.summary.scalar('train_accuracy', accuracy[1]) + tf.compat.v1.summary.scalar('train_accuracy_top_5', accuracy_top_5[1]) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics) + + +def per_device_batch_size(batch_size, num_gpus): + """For multi-gpu, batch-size must be a multiple of the number of GPUs. + + Note that this should eventually be handled by DistributionStrategies + directly. Multi-GPU support is currently experimental, however, + so doing the work here until that feature is in place. + + Args: + batch_size: Global batch size to be divided among devices. This should be + equal to num_gpus times the single-GPU batch_size for multi-gpu training. + num_gpus: How many GPUs are used with DistributionStrategies. + + Returns: + Batch size per device. + + Raises: + ValueError: if batch_size is not divisible by number of devices + """ + if num_gpus <= 1: + return batch_size + + remainder = batch_size % num_gpus + if remainder: + err = ('When running with multiple GPUs, batch size ' + 'must be a multiple of the number of available GPUs. Found {} ' + 'GPUs with a batch size of {}; try --batch_size={} instead.' + ).format(num_gpus, batch_size, batch_size - remainder) + raise ValueError(err) + return int(batch_size / num_gpus) + + +def resnet_main(seed, flags, model_function, input_function, shape=None): + """Shared main loop for ResNet Models. + + Args: + flags: FLAGS object that contains the params for running. See + ResnetArgParser for created flags. + model_function: the function that instantiates the Model and builds the + ops for train/eval. This will be passed directly into the estimator. + input_function: the function that processes the dataset and returns a + dataset that the estimator can train on. This will be wrapped with + all the relevant flags for running and passed to estimator. + shape: list of ints representing the shape of the images used for training. + This is only used if flags.export_dir is passed. + """ + + mlperf_log.resnet_print(key=mlperf_log.RUN_START) + + # Using the Winograd non-fused algorithms provides a small performance boost. + os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' + + # Create session config based on values of inter_op_parallelism_threads and + # intra_op_parallelism_threads. Note that we default to having + # allow_soft_placement = True, which is required for multi-GPU and not + # harmful for other modes. + session_config = tf.compat.v1.ConfigProto( + inter_op_parallelism_threads=flags.inter_op_parallelism_threads, + intra_op_parallelism_threads=flags.intra_op_parallelism_threads, + allow_soft_placement=True) + + if flags.num_gpus == 0: + distribution = tf.distribute.OneDeviceStrategy('device:CPU:0') + elif flags.num_gpus == 1: + distribution = tf.distribute.OneDeviceStrategy('device:GPU:0') + else: + distribution = tf.distribute.MirroredStrategy( + num_gpus=flags.num_gpus + ) + + mlperf_log.resnet_print(key=mlperf_log.RUN_SET_RANDOM_SEED, value=seed) + run_config = tf.estimator.RunConfig(train_distribute=distribution, + session_config=session_config, + log_step_count_steps=10, # output logs more frequently + tf_random_seed=seed) + + mlperf_log.resnet_print(key=mlperf_log.INPUT_BATCH_SIZE, + value=flags.batch_size) + + if is_mpi: + if hvd.rank() == 0: + model_dir = os.path.join(flags.model_dir,"main") + else: + model_dir = os.path.join(flags.model_dir,"tmp{}".format(hvd.rank())) + benchmark_log_dir = flags.benchmark_log_dir if hvd.rank() == 0 else None + else: + model_dir = flags.model_dir + benchmark_log_dir = flags.benchmark_log_dir + + classifier = tf.estimator.Estimator( + model_fn=model_function, model_dir=model_dir, config=run_config, + params={ + 'resnet_size': flags.resnet_size, + 'data_format': flags.data_format, + 'batch_size': flags.batch_size, + 'version': flags.version, + 'loss_scale': flags.loss_scale, + 'dtype': flags.dtype, + 'label_smoothing': flags.label_smoothing, + 'enable_lars': flags.enable_lars, + 'weight_decay': flags.weight_decay, + 'fine_tune': flags.fine_tune, + 'use_bfloat16': flags.use_bfloat16 + }) + + if benchmark_log_dir is not None: + benchmark_logger = logger.BenchmarkLogger(benchmark_log_dir) + benchmark_logger.log_run_info('resnet') + else: + benchmark_logger = None + + mlperf_log.resnet_print(key=mlperf_log.TRAIN_LOOP) + + # for MPI only to figure out the steps per epoch or per eval, per worker + if is_mpi: + num_eval_steps = _NUM_IMAGES['validation'] // flags.batch_size + steps_per_epoch = _NUM_IMAGES['train'] // flags.batch_size + steps_per_epoch_per_worker = steps_per_epoch // hvd.size() + steps_per_eval_per_worker = steps_per_epoch_per_worker * flags.epochs_between_evals + + # The reference performs the first evaluation on the fourth epoch. (offset + # eval by 3 epochs) + mlperf_log.resnet_print(key=mlperf_log.EVAL_EPOCH_OFFSET, value=3) + success = False + for i in range(flags.train_epochs // flags.epochs_between_evals): + # Data for epochs_between_evals (i.e. 4 epochs between evals) worth of + # epochs is concatenated and run as a single block inside a session. For + # this reason we declare all of the epochs that will be run at the start. + # Submitters may report in a way which is reasonable for their control flow. + for j in range(flags.epochs_between_evals): + mlperf_log.resnet_print(key=mlperf_log.TRAIN_EPOCH, + value=i * flags.epochs_between_evals + j) + + flags.hooks += ["examplespersecondhook"] + if is_mpi: + train_hooks = [hvd.BroadcastGlobalVariablesHook(0)] + train_hooks = train_hooks + hooks_helper.get_train_hooks( + flags.hooks, + batch_size=flags.batch_size*hvd.size(), + benchmark_log_dir=flags.benchmark_log_dir) + else: + train_hooks = hooks_helper.get_train_hooks( + flags.hooks, + batch_size=flags.batch_size, + benchmark_log_dir=flags.benchmark_log_dir) + + _log_cache = [] + def formatter(x): + """Abuse side effects to get tensors out of the model_fn.""" + if _log_cache: + _log_cache.pop() + _log_cache.append(x.copy()) + return str(x) + + compliance_hook = tf.estimator.LoggingTensorHook( + tensors={_NUM_EXAMPLES_NAME: _NUM_EXAMPLES_NAME}, + every_n_iter=int(1e10), + at_end=True, + formatter=formatter) + + print('Starting a training cycle.') + + def input_fn_train(): + return input_function( + is_training=True, + data_dir=flags.data_dir, + batch_size=per_device_batch_size(flags.batch_size, flags.num_gpus), + num_epochs=flags.epochs_between_evals, + num_gpus=flags.num_gpus, + dtype=flags.dtype + ) + if is_mpi: + # if max step is set, use max_step, not the steps_per_eval_per_worker + # assuming max_train_steps is smaller than steps_per_eval_per_worker + # Also assuming when -- steps is specified, the train epochs should + # be set to be equal to epochs_between_evals so that the + # range(flags.train_epochs // flags.epochs_between_evals) gets to be 1 + if flags.max_train_steps < steps_per_eval_per_worker: + train_steps = flags.max_train_steps + else: + train_steps = steps_per_eval_per_worker + + classifier.train(input_fn=input_fn_train, hooks=train_hooks + [compliance_hook], + steps=train_steps) + else: + classifier.train(input_fn=input_fn_train, hooks=train_hooks + [compliance_hook], max_steps=flags.max_train_steps) + + #train_examples = int(_log_cache.pop()[_NUM_EXAMPLES_NAME]) + #mlperf_log.resnet_print(key=mlperf_log.INPUT_SIZE, value=train_examples) + + print('Starting to evaluate.') + # Evaluate the model and print results + def input_fn_eval(): + return input_function( + is_training=False, + data_dir=flags.data_dir, + batch_size=per_device_batch_size(flags.batch_size, flags.num_gpus), + num_epochs=1, + dtype=flags.dtype + ) + + + mlperf_log.resnet_print(key=mlperf_log.EVAL_START) + # flags.max_train_steps is generally associated with testing and profiling. + # As a result it is frequently called with synthetic data, which will + # iterate forever. Passing steps=flags.max_train_steps allows the eval + # (which is generally unimportant in those circumstances) to terminate. + # Note that eval will run for max_train_steps each loop, regardless of the + # global_step count. + eval_results = classifier.evaluate(input_fn=input_fn_eval, + steps=flags.max_train_steps) + mlperf_log.resnet_print(key=mlperf_log.EVAL_STOP) + mlperf_log.resnet_print(key=mlperf_log.EVAL_SIZE, value=int(eval_results[_NUM_EXAMPLES_NAME])) + mlperf_log.resnet_print(key=mlperf_log.EVAL_ACCURACY, value=float(eval_results['accuracy'])) + mlperf_log.resnet_print(key=mlperf_log.EVAL_TARGET, value=flags.stop_threshold) + print(eval_results) + + if benchmark_logger: + benchmark_logger.log_estimator_evaluation_result(eval_results) + + if model_helpers.past_stop_threshold( + flags.stop_threshold, eval_results['accuracy']): + success = True + break + + mlperf_log.resnet_print(key=mlperf_log.RUN_STOP, value={"success": success}) + mlperf_log.resnet_print(key=mlperf_log.RUN_FINAL) + + +class ResnetArgParser(argparse.ArgumentParser): + """Arguments for configuring and running a Resnet Model.""" + + def __init__(self, resnet_size_choices=None): + super(ResnetArgParser, self).__init__(parents=[ + parsers.BaseParser(multi_gpu=False), + parsers.PerformanceParser(num_parallel_calls=False), + parsers.ImageModelParser(), + parsers.ExportParser(), + parsers.BenchmarkParser(), + ]) + + self.add_argument( + '--version', '-v', type=int, choices=[1, 2], + default=resnet_model.DEFAULT_VERSION, + help='Version of ResNet. (1 or 2) See README.md for details.' + ) + + self.add_argument( + '--resnet_size', '-rs', type=int, default=50, + choices=resnet_size_choices, + help='[default: %(default)s] The size of the ResNet model to use.', + metavar='' if resnet_size_choices is None else None + ) + + self.add_argument( + '--use_bfloat16', action='store_true', default=False, + help='Whether to use bfloat16 type for computations.' + ) + + def parse_args(self, args=None, namespace=None): + args = super(ResnetArgParser, self).parse_args( + args=args, namespace=namespace) + + # handle coupling between dtype and loss_scale + parsers.parse_dtype_info(args) + + return args diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/parsers.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/parsers.py new file mode 100644 index 000000000..4747ceb15 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/arg_parsers/parsers.py @@ -0,0 +1,413 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Collection of parsers which are shared among the official models. + +The parsers in this module are intended to be used as parents to all arg +parsers in official models. For instance, one might define a new class: + +class ExampleParser(argparse.ArgumentParser): + def __init__(self): + super(ExampleParser, self).__init__(parents=[ + arg_parsers.LocationParser(data_dir=True, model_dir=True), + arg_parsers.DummyParser(use_synthetic_data=True), + ]) + + self.add_argument( + "--application_specific_arg", "-asa", type=int, default=123, + help="[default: %(default)s] This arg is application specific.", + metavar="" + ) + +Notes about add_argument(): + Argparse will automatically template in default values in help messages if + the "%(default)s" string appears in the message. Using the example above: + + parser = ExampleParser() + parser.set_defaults(application_specific_arg=3141592) + parser.parse_args(["-h"]) + + When the help text is generated, it will display 3141592 to the user. (Even + though the default was 123 when the flag was created.) + + + The metavar variable determines how the flag will appear in help text. If + not specified, the convention is to use name.upper(). Thus rather than: + + --app_specific_arg APP_SPECIFIC_ARG, -asa APP_SPECIFIC_ARG + + if metavar="" is set, the user sees: + + --app_specific_arg , -asa + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse + +import tensorflow as tf + + +# Map string to (TensorFlow dtype, default loss scale) +DTYPE_MAP = { + "fp16": (tf.float16, 128), + "fp32": (tf.float32, 1), +} + + +def parse_dtype_info(flags): + """Convert dtype string to tf dtype, and set loss_scale default as needed. + + Args: + flags: namespace object returned by arg parser. + + Raises: + ValueError: If an invalid dtype is provided. + """ + if flags.dtype in (i[0] for i in DTYPE_MAP.values()): + return # Make function idempotent + + try: + flags.dtype, default_loss_scale = DTYPE_MAP[flags.dtype] + except KeyError: + raise ValueError("Invalid dtype: {}".format(flags.dtype)) + + flags.loss_scale = flags.loss_scale or default_loss_scale + + +class BaseParser(argparse.ArgumentParser): + """Parser to contain flags which will be nearly universal across models. + + Args: + add_help: Create the "--help" flag. False if class instance is a parent. + data_dir: Create a flag for specifying the input data directory. + model_dir: Create a flag for specifying the model file directory. + train_epochs: Create a flag to specify the number of training epochs. + epochs_between_evals: Create a flag to specify the frequency of testing. + stop_threshold: Create a flag to specify a threshold accuracy or other + eval metric which should trigger the end of training. + batch_size: Create a flag to specify the global batch size. + multi_gpu: Create a flag to allow the use of all available GPUs. + num_gpu: Create a flag to specify the number of GPUs used. + hooks: Create a flag to specify hooks for logging. + """ + + def __init__(self, add_help=False, data_dir=True, model_dir=True, + train_epochs=True, epochs_between_evals=True, + stop_threshold=True, batch_size=True, + multi_gpu=False, num_gpu=True, hooks=True, + enable_lars=True, label_smoothing=True, weight_decay=True, fine_tune=True): + super(BaseParser, self).__init__(add_help=add_help) + + if data_dir: + self.add_argument( + "--data_dir", "-dd", default="/tmp", + help="[default: %(default)s] The location of the input data.", + metavar="
", + ) + + if model_dir: + self.add_argument( + "--model_dir", "-md", default="/tmp", + help="[default: %(default)s] The location of the model checkpoint " + "files.", + metavar="", + ) + + if train_epochs: + self.add_argument( + "--train_epochs", "-te", type=int, default=1, + help="[default: %(default)s] The number of epochs used to train.", + metavar="" + ) + + if epochs_between_evals: + self.add_argument( + "--epochs_between_evals", "-ebe", type=int, default=1, + help="[default: %(default)s] The number of training epochs to run " + "between evaluations.", + metavar="" + ) + + if stop_threshold: + self.add_argument( + "--stop_threshold", "-st", type=float, default=None, + help="[default: %(default)s] If passed, training will stop at " + "the earlier of train_epochs and when the evaluation metric is " + "greater than or equal to stop_threshold.", + metavar="" + ) + + if batch_size: + self.add_argument( + "--batch_size", "-bs", type=int, default=32, + help="[default: %(default)s] Global batch size for training and " + "evaluation.", + metavar="" + ) + + if enable_lars: + self.add_argument( + "--enable_lars", "-el", action='store_true', + help='[default: %(default)s] Enable LARS optimizer for large batch training.' + ) + + if label_smoothing: + self.add_argument( + "--label_smoothing", "-lsm", type=float, default=0.0, + help='[default: %(default)s] Label smoothing parameter used in the softmax_cross_entropy', + metavar="" + ) + + if weight_decay: + self.add_argument( + "--weight_decay", "-wd", type=float, default=1e-4, + help='[default: %(default)s] Weight decay coefficiant for l2 regularization.', + metavar="" + ) + + if fine_tune: + self.add_argument( + "--fine_tune", "-ft", action='store_true', + help="[default: %(default)s] fine_tune: If True only train the dense layers(final layers." + ) + + assert not (multi_gpu and num_gpu) + + if multi_gpu: + self.add_argument( + "--multi_gpu", action="store_true", + help="If set, run across all available GPUs." + ) + + if num_gpu: + self.add_argument( + "--num_gpus", "-ng", + type=int, + default=1 if tf.test.is_built_with_cuda() else 0, + help="[default: %(default)s] How many GPUs to use with the " + "DistributionStrategies API. The default is 1 if TensorFlow was" + "built with CUDA, and 0 otherwise.", + metavar="" + ) + + if hooks: + self.add_argument( + "--hooks", "-hk", nargs="+", default=["LoggingTensorHook"], + help="[default: %(default)s] A list of strings to specify the names " + "of train hooks. " + "Example: --hooks LoggingTensorHook ExamplesPerSecondHook. " + "Allowed hook names (case-insensitive): LoggingTensorHook, " + "ProfilerHook, ExamplesPerSecondHook, LoggingMetricHook." + "See official.utils.logs.hooks_helper for details.", + metavar="" + ) + + +class PerformanceParser(argparse.ArgumentParser): + """Default parser for specifying performance tuning arguments. + + Args: + add_help: Create the "--help" flag. False if class instance is a parent. + num_parallel_calls: Create a flag to specify parallelism of data loading. + inter_op: Create a flag to allow specification of inter op threads. + intra_op: Create a flag to allow specification of intra op threads. + """ + + def __init__(self, add_help=False, num_parallel_calls=True, inter_op=True, + intra_op=True, use_synthetic_data=True, max_train_steps=True, + dtype=True): + super(PerformanceParser, self).__init__(add_help=add_help) + + if num_parallel_calls: + self.add_argument( + "--num_parallel_calls", "-npc", + type=int, default=5, + help="[default: %(default)s] The number of records that are " + "processed in parallel during input processing. This can be " + "optimized per data set but for generally homogeneous data " + "sets, should be approximately the number of available CPU " + "cores.", + metavar="" + ) + + if inter_op: + self.add_argument( + "--inter_op_parallelism_threads", "-inter", + type=int, default=0, + help="[default: %(default)s Number of inter_op_parallelism_threads " + "to use for CPU. See TensorFlow config.proto for details.", + metavar="" + ) + + if intra_op: + self.add_argument( + "--intra_op_parallelism_threads", "-intra", + type=int, default=0, + help="[default: %(default)s Number of intra_op_parallelism_threads " + "to use for CPU. See TensorFlow config.proto for details.", + metavar="" + ) + + if use_synthetic_data: + self.add_argument( + "--use_synthetic_data", "-synth", + action="store_true", + help="If set, use fake data (zeroes) instead of a real dataset. " + "This mode is useful for performance debugging, as it removes " + "input processing steps, but will not learn anything." + ) + + if max_train_steps: + self.add_argument( + "--max_train_steps", "-mts", type=int, default=None, + help="[default: %(default)s] The model will stop training if the " + "global_step reaches this value. If not set, training will run" + "until the specified number of epochs have run as usual. It is" + "generally recommended to set --train_epochs=1 when using this" + "flag.", + metavar="" + ) + + if dtype: + self.add_argument( + "--dtype", "-dt", + default="fp32", + choices=list(DTYPE_MAP.keys()), + help="[default: %(default)s] {%(choices)s} The TensorFlow datatype " + "used for calculations. Variables may be cast to a higher" + "precision on a case-by-case basis for numerical stability.", + metavar="
" + ) + + self.add_argument( + "--loss_scale", "-ls", + type=int, + help="[default: %(default)s] The amount to scale the loss by when " + "the model is run. Before gradients are computed, the loss is " + "multiplied by the loss scale, making all gradients loss_scale " + "times larger. To adjust for this, gradients are divided by the " + "loss scale before being applied to variables. This is " + "mathematically equivalent to training without a loss scale, " + "but the loss scale helps avoid some intermediate gradients " + "from underflowing to zero. If not provided the default for " + "fp16 is 128 and 1 for all other dtypes.", + ) + + +class ImageModelParser(argparse.ArgumentParser): + """Default parser for specification image specific behavior. + + Args: + add_help: Create the "--help" flag. False if class instance is a parent. + data_format: Create a flag to specify image axis convention. + """ + + def __init__(self, add_help=False, data_format=True): + super(ImageModelParser, self).__init__(add_help=add_help) + if data_format: + self.add_argument( + "--data_format", "-df", + default=None, + choices=["channels_first", "channels_last"], + help="A flag to override the data format used in the model. " + "channels_first provides a performance boost on GPU but is not " + "always compatible with CPU. If left unspecified, the data " + "format will be chosen automatically based on whether TensorFlow" + "was built for CPU or GPU.", + metavar="" + ) + + +class ExportParser(argparse.ArgumentParser): + """Parsing options for exporting saved models or other graph defs. + + This is a separate parser for now, but should be made part of BaseParser + once all models are brought up to speed. + + Args: + add_help: Create the "--help" flag. False if class instance is a parent. + export_dir: Create a flag to specify where a SavedModel should be exported. + """ + + def __init__(self, add_help=False, export_dir=True): + super(ExportParser, self).__init__(add_help=add_help) + if export_dir: + self.add_argument( + "--export_dir", "-ed", + help="[default: %(default)s] If set, a SavedModel serialization of " + "the model will be exported to this directory at the end of " + "training. See the README for more details and relevant links.", + metavar="" + ) + + +class BenchmarkParser(argparse.ArgumentParser): + """Default parser for benchmark logging. + + Args: + add_help: Create the "--help" flag. False if class instance is a parent. + benchmark_log_dir: Create a flag to specify location for benchmark logging. + """ + + def __init__(self, add_help=False, benchmark_log_dir=True, + bigquery_uploader=True): + super(BenchmarkParser, self).__init__(add_help=add_help) + if benchmark_log_dir: + self.add_argument( + "--benchmark_log_dir", "-bld", default=None, + help="[default: %(default)s] The location of the benchmark logging.", + metavar="" + ) + if bigquery_uploader: + self.add_argument( + "--gcp_project", "-gp", default=None, + help="[default: %(default)s] The GCP project name where the benchmark" + " will be uploaded.", + metavar="" + ) + self.add_argument( + "--bigquery_data_set", "-bds", default="test_benchmark", + help="[default: %(default)s] The Bigquery dataset name where the" + " benchmark will be uploaded.", + metavar="" + ) + self.add_argument( + "--bigquery_run_table", "-brt", default="benchmark_run", + help="[default: %(default)s] The Bigquery table name where the" + " benchmark run information will be uploaded.", + metavar="" + ) + self.add_argument( + "--bigquery_metric_table", "-bmt", default="benchmark_metric", + help="[default: %(default)s] The Bigquery table name where the" + " benchmark metric information will be uploaded.", + metavar="" + ) + + +class EagerParser(BaseParser): + """Remove options not relevant for Eager from the BaseParser.""" + + def __init__(self, add_help=False, data_dir=True, model_dir=True, + train_epochs=True, batch_size=True): + super(EagerParser, self).__init__( + add_help=add_help, data_dir=data_dir, model_dir=model_dir, + train_epochs=train_epochs, epochs_between_evals=False, + stop_threshold=False, batch_size=batch_size, multi_gpu=False, + hooks=False) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/export.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/export.py new file mode 100644 index 000000000..8061c2881 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/export/export.py @@ -0,0 +1,49 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convenience functions for exporting models as SavedModels or other types.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +def build_tensor_serving_input_receiver_fn(shape, dtype=tf.float32, + batch_size=1): + """Returns a input_receiver_fn that can be used during serving. + + This expects examples to come through as float tensors, and simply + wraps them as TensorServingInputReceivers. + + Arguably, this should live in tf.estimator.export. Testing here first. + + Args: + shape: list representing target size of a single example. + dtype: the expected datatype for the input example + batch_size: number of input tensors that will be passed for prediction + + Returns: + A function that itself returns a TensorServingInputReceiver. + """ + def serving_input_receiver_fn(): + # Prep a placeholder where the input example will be fed in + features = tf.compat.v1.placeholder( + dtype=dtype, shape=[batch_size] + shape, name='input_tensor') + + return tf.estimator.export.TensorServingInputReceiver( + features=features, receiver_tensors=features) + + return serving_input_receiver_fn diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/benchmark_uploader.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/benchmark_uploader.py new file mode 100644 index 000000000..39a61b22a --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/benchmark_uploader.py @@ -0,0 +1,129 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Library to upload benchmark generated by BenchmarkLogger to remote repo. + +This library require google cloud bigquery lib as dependency, which can be +installed with: + > pip install --upgrade google-cloud-bigquery +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import os +import sys +import uuid + +from google.cloud import bigquery + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.arg_parsers import parsers +from official.utils.logs import logger + + +class BigQueryUploader(object): + """Upload the benchmark and metric info to BigQuery.""" + + def __init__(self, logging_dir, gcp_project=None, credentials=None): + """Initialized BigQueryUploader with proper setting. + + Args: + logging_dir: string, logging directory that contains the benchmark log. + gcp_project: string, the name of the GCP project that the log will be + uploaded to. The default project name will be detected from local + environment if no value is provided. + credentials: google.auth.credentials. The credential to access the + BigQuery service. The default service account credential will be + detected from local environment if no value is provided. Please use + google.oauth2.service_account.Credentials to load credential from local + file for the case that the test is run out side of GCP. + """ + self._logging_dir = logging_dir + self._bq_client = bigquery.Client( + project=gcp_project, credentials=credentials) + + def upload_benchmark_run(self, dataset_name, table_name, run_id): + """Upload benchmark run information to Bigquery. + + Args: + dataset_name: string, the name of bigquery dataset where the data will be + uploaded. + table_name: string, the name of bigquery table under the dataset where + the data will be uploaded. + run_id: string, a unique ID that will be attached to the data, usually + this is a UUID4 format. + """ + expected_file = os.path.join( + self._logging_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME) + with tf.io.gfile.GFile(expected_file) as f: + benchmark_json = json.load(f) + benchmark_json["model_id"] = run_id + table_ref = self._bq_client.dataset(dataset_name).table(table_name) + errors = self._bq_client.insert_rows_json(table_ref, [benchmark_json]) + if errors: + tf.compat.v1.logging.error( + "Failed to upload benchmark info to bigquery: {}".format(errors)) + + def upload_metric(self, dataset_name, table_name, run_id): + """Upload metric information to Bigquery. + + Args: + dataset_name: string, the name of bigquery dataset where the data will be + uploaded. + table_name: string, the name of bigquery table under the dataset where + the metric data will be uploaded. This is different from the + benchmark_run table. + run_id: string, a unique ID that will be attached to the data, usually + this is a UUID4 format. This should be the same as the benchmark run_id. + """ + expected_file = os.path.join( + self._logging_dir, logger.METRIC_LOG_FILE_NAME) + with tf.io.gfile.GFile(expected_file) as f: + lines = f.readlines() + metrics = [] + for line in filter(lambda l: l.strip(), lines): + metric = json.loads(line) + metric["run_id"] = run_id + metrics.append(metric) + table_ref = self._bq_client.dataset(dataset_name).table(table_name) + errors = self._bq_client.insert_rows_json(table_ref, metrics) + if errors: + tf.compat.v1.logging.error( + "Failed to upload benchmark info to bigquery: {}".format(errors)) + + +def main(argv): + parser = parsers.BenchmarkParser() + flags = parser.parse_args(args=argv[1:]) + if not flags.benchmark_log_dir: + print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir") + sys.exit(1) + + uploader = BigQueryUploader( + flags.benchmark_log_dir, + gcp_project=flags.gcp_project) + run_id = str(uuid.uuid4()) + uploader.upload_benchmark_run( + flags.bigquery_data_set, flags.bigquery_run_table, run_id) + uploader.upload_metric( + flags.bigquery_data_set, flags.bigquery_metric_table, run_id) + + +if __name__ == "__main__": + main(argv=sys.argv) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks.py new file mode 100644 index 000000000..325f6652a --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks.py @@ -0,0 +1,117 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Hook that counts examples per second every N steps or seconds.""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +class ExamplesPerSecondHook(tf.estimator.SessionRunHook): + """Hook to print out examples per second. + + Total time is tracked and then divided by the total number of steps + to get the average step time and then batch_size is used to determine + the running average of examples per second. The examples per second for the + most recent interval is also logged. + """ + + def __init__(self, + batch_size, + every_n_steps=None, + every_n_secs=None, + warm_steps=0): + """Initializer for ExamplesPerSecondHook. + + Args: + batch_size: Total batch size across all workers used to calculate + examples/second from global time. + every_n_steps: Log stats every n steps. + every_n_secs: Log stats every n seconds. Exactly one of the + `every_n_steps` or `every_n_secs` should be set. + warm_steps: The number of steps to be skipped before logging and running + average calculation. warm_steps steps refers to global steps across all + workers, not on each worker + + Raises: + ValueError: if neither `every_n_steps` or `every_n_secs` is set, or + both are set. + """ + + if (every_n_steps is None) == (every_n_secs is None): + raise ValueError('exactly one of every_n_steps' + ' and every_n_secs should be provided.') + + self._timer = tf.estimator.SecondOrStepTimer( + every_steps=every_n_steps, every_secs=every_n_secs) + + self._step_train_time = 0 + self._total_steps = 0 + self._total_measured_steps = 0 + self._batch_size = batch_size + self._warm_steps = warm_steps + + def begin(self): + """Called once before using the session to check global step.""" + self._global_step_tensor = tf.compat.v1.train.get_global_step() + if self._global_step_tensor is None: + raise RuntimeError( + 'Global step should be created to use StepCounterHook.') + + def before_run(self, run_context): # pylint: disable=unused-argument + """Called before each call to run(). + + Args: + run_context: A SessionRunContext object. + + Returns: + A SessionRunArgs object or None if never triggered. + """ + return tf.estimator.SessionRunArgs(self._global_step_tensor) + + def after_run(self, run_context, run_values): # pylint: disable=unused-argument + """Called after each call to run(). + + Args: + run_context: A SessionRunContext object. + run_values: A SessionRunValues object. + """ + global_step = run_values.results + if self._timer.should_trigger_for_step(global_step): + elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(global_step) + if elapsed_time is not None: + # current examples per second is based on the elapsed training steps + # and training time per batch + current_examples_per_sec = self._batch_size * ( + elapsed_steps / elapsed_time) + self._total_steps += elapsed_steps + if global_step > self._warm_steps: + self._step_train_time += elapsed_time + self._total_measured_steps += elapsed_steps + # average examples per second is based on the total (accumulative) + # training steps and training time so far + average_examples_per_sec = self._batch_size * ( + self._total_measured_steps / self._step_train_time) + # Current examples/sec followed by average examples/sec + tf.compat.v1.logging.info('Batch [%g]: last %g steps exp/sec = %g, total average exp/sec = ' + '%g', self._total_steps, elapsed_steps, current_examples_per_sec, average_examples_per_sec) + else: + # Current examples/sec followed by completed warmup steps + tf.compat.v1.logging.info('Batch [%g]: last %g steps exp/sec = %g, completed %g/%g wamrup steps' + , self._total_steps, elapsed_steps, current_examples_per_sec, self._total_steps, self._warm_steps) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks_helper.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks_helper.py new file mode 100644 index 000000000..e92dc5d23 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/hooks_helper.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Hooks helper to return a list of TensorFlow hooks for training by name. + +More hooks can be added to this set. To add a new hook, 1) add the new hook to +the registry in HOOKS, 2) add a corresponding function that parses out necessary +parameters. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from mlperf_utils.logs import hooks +from mlperf_utils.logs import metric_hook + +_TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate', + 'cross_entropy', + 'train_accuracy']) + + +def get_train_hooks(name_list, **kwargs): + """Factory for getting a list of TensorFlow hooks for training by name. + + Args: + name_list: a list of strings to name desired hook classes. Allowed: + LoggingTensorHook, ProfilerHook, ExamplesPerSecondHook, which are defined + as keys in HOOKS + **kwargs: a dictionary of arguments to the hooks. + + Returns: + list of instantiated hooks, ready to be used in a classifier.train call. + + Raises: + ValueError: if an unrecognized name is passed. + """ + + if not name_list: + return [] + + train_hooks = [] + for name in name_list: + hook_name = HOOKS.get(name.strip().lower()) + if hook_name is None: + raise ValueError('Unrecognized training hook requested: {}'.format(name)) + else: + train_hooks.append(hook_name(**kwargs)) + + return train_hooks + + +def get_logging_tensor_hook(every_n_iter=100, tensors_to_log=None, **kwargs): # pylint: disable=unused-argument + """Function to get LoggingTensorHook. + + Args: + every_n_iter: `int`, print the values of `tensors` once every N local + steps taken on the current worker. + tensors_to_log: List of tensor names or dictionary mapping labels to tensor + names. If not set, log _TENSORS_TO_LOG by default. + **kwargs: a dictionary of arguments to LoggingTensorHook. + + Returns: + Returns a LoggingTensorHook with a standard set of tensors that will be + printed to stdout. + """ + if tensors_to_log is None: + tensors_to_log = _TENSORS_TO_LOG + + return tf.estimator.LoggingTensorHook( + tensors=tensors_to_log, + every_n_iter=every_n_iter) + + +def get_profiler_hook(save_steps=1000, **kwargs): # pylint: disable=unused-argument + """Function to get ProfilerHook. + + Args: + save_steps: `int`, print profile traces every N steps. + **kwargs: a dictionary of arguments to ProfilerHook. + + Returns: + Returns a ProfilerHook that writes out timelines that can be loaded into + profiling tools like chrome://tracing. + """ + return tf.estimator.ProfilerHook(save_steps=save_steps) + + +def get_examples_per_second_hook(every_n_steps=100, + batch_size=128, + warm_steps=500, + **kwargs): # pylint: disable=unused-argument + """Function to get ExamplesPerSecondHook. + + Args: + every_n_steps: `int`, print current and average examples per second every + N steps. + batch_size: `int`, total batch size used to calculate examples/second from + global time. + warm_steps: skip this number of steps before logging and running average. + **kwargs: a dictionary of arguments to ExamplesPerSecondHook. + + Returns: + Returns a ProfilerHook that writes out timelines that can be loaded into + profiling tools like chrome://tracing. + """ + return hooks.ExamplesPerSecondHook(every_n_steps=every_n_steps, + batch_size=batch_size, + warm_steps=warm_steps) + + +def get_logging_metric_hook(benchmark_log_dir=None, + tensors_to_log=None, + every_n_secs=600, + **kwargs): # pylint: disable=unused-argument + """Function to get LoggingMetricHook. + + Args: + benchmark_log_dir: `string`, directory path to save the metric log. + tensors_to_log: List of tensor names or dictionary mapping labels to tensor + names. If not set, log _TENSORS_TO_LOG by default. + every_n_secs: `int`, the frequency for logging the metric. Default to every + 10 mins. + + Returns: + Returns a ProfilerHook that writes out timelines that can be loaded into + profiling tools like chrome://tracing. + """ + if benchmark_log_dir is None: + raise ValueError("metric_log_dir should be provided to use metric logger") + if tensors_to_log is None: + tensors_to_log = _TENSORS_TO_LOG + return metric_hook.LoggingMetricHook( + tensors=tensors_to_log, + log_dir=benchmark_log_dir, + every_n_secs=every_n_secs) + + +# A dictionary to map one hook name and its corresponding function +HOOKS = { + 'loggingtensorhook': get_logging_tensor_hook, + 'profilerhook': get_profiler_hook, + 'examplespersecondhook': get_examples_per_second_hook, + 'loggingmetrichook': get_logging_metric_hook, +} diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/logger.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/logger.py new file mode 100644 index 000000000..ae3da6e60 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/logger.py @@ -0,0 +1,196 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Logging utilities for benchmark. + +For collecting local environment metrics like CPU and memory, certain python +packages need be installed. See README for details. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import datetime +import json +import multiprocessing +import numbers +import os + +import tensorflow as tf +from tensorflow.python.client import device_lib + +METRIC_LOG_FILE_NAME = "metric.log" +BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log" +_DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ" + + +class BenchmarkLogger(object): + """Class to log the benchmark information to local disk.""" + + def __init__(self, logging_dir): + self._logging_dir = logging_dir + if not tf.io.gfile.isdir(self._logging_dir): + tf.io.gfile.makedirs(self._logging_dir) + + def log_estimator_evaluation_result(self, eval_results): + """Log the evaluation result for a estimator. + + The evaluate result is a directory that contains metrics defined in + model_fn. It also contains a entry for global_step which contains the value + of the global step when evaluation was performed. + + Args: + eval_results: dict, the result of evaluate() from a estimator. + """ + if not isinstance(eval_results, dict): + tf.compat.v1.logging.warning("eval_results should be directory for logging. Got %s", + type(eval_results)) + return + global_step = eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP] + for key in sorted(eval_results): + if key != tf.compat.v1.GraphKeys.GLOBAL_STEP: + self.log_metric(key, eval_results[key], global_step=global_step) + + def log_metric(self, name, value, unit=None, global_step=None, extras=None): + """Log the benchmark metric information to local file. + + Currently the logging is done in a synchronized way. This should be updated + to log asynchronously. + + Args: + name: string, the name of the metric to log. + value: number, the value of the metric. The value will not be logged if it + is not a number type. + unit: string, the unit of the metric, E.g "image per second". + global_step: int, the global_step when the metric is logged. + extras: map of string:string, the extra information about the metric. + """ + if not isinstance(value, numbers.Number): + tf.compat.v1.logging.warning( + "Metric value to log should be a number. Got %s", type(value)) + return + if extras: + extras = [{"name": k, "value": v} for k, v in sorted(extras.items())] + else: + extras = [] + with tf.io.gfile.GFile( + os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a") as f: + metric = { + "name": name, + "value": float(value), + "unit": unit, + "global_step": global_step, + "timestamp": datetime.datetime.now().strftime( + _DATE_TIME_FORMAT_PATTERN), + "extras": extras} + try: + json.dump(metric, f) + f.write("\n") + except (TypeError, ValueError) as e: + tf.compat.v1.logging.warning("Failed to dump metric to log file: " + "name %s, value %s, error %s", name, value, e) + + def log_run_info(self, model_name): + """Collect most of the TF runtime information for the local env. + + The schema of the run info follows official/benchmark/datastore/schema. + + Args: + model_name: string, the name of the model. + """ + run_info = { + "model_name": model_name, + "machine_config": {}, + "run_date": datetime.datetime.now().strftime(_DATE_TIME_FORMAT_PATTERN)} + _collect_tensorflow_info(run_info) + _collect_tensorflow_environment_variables(run_info) + _collect_cpu_info(run_info) + _collect_gpu_info(run_info) + _collect_memory_info(run_info) + + with tf.io.gfile.GFile(os.path.join( + self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f: + try: + json.dump(run_info, f) + f.write("\n") + except (TypeError, ValueError) as e: + tf.compat.v1.logging.warning("Failed to dump benchmark run info to log file: %s", + e) + + +def _collect_tensorflow_info(run_info): + run_info["tensorflow_version"] = { + "version": tf.version.VERSION, "git_hash": tf.version.GIT_VERSION} + + +def _collect_tensorflow_environment_variables(run_info): + run_info["tensorflow_environment_variables"] = [ + {"name": k, "value": v} + for k, v in sorted(os.environ.items()) if k.startswith("TF_")] + + +# The following code is mirrored from tensorflow/tools/test/system_info_lib +# which is not exposed for import. +def _collect_cpu_info(run_info): + """Collect the CPU information for the local environment.""" + cpu_info = {} + + cpu_info["num_cores"] = multiprocessing.cpu_count() + + # Note: cpuinfo is not installed in the TensorFlow OSS tree. + # It is installable via pip. + import cpuinfo # pylint: disable=g-import-not-at-top + + info = cpuinfo.get_cpu_info() + cpu_info["cpu_info"] = info["brand"] + cpu_info["mhz_per_cpu"] = info["hz_advertised_raw"][0] / 1.0e6 + + run_info["machine_config"]["cpu_info"] = cpu_info + + +def _collect_gpu_info(run_info): + """Collect local GPU information by TF device library.""" + gpu_info = {} + local_device_protos = device_lib.list_local_devices() + + gpu_info["count"] = len([d for d in local_device_protos + if d.device_type == "GPU"]) + # The device description usually is a JSON string, which contains the GPU + # model info, eg: + # "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0" + for d in local_device_protos: + if d.device_type == "GPU": + gpu_info["model"] = _parse_gpu_model(d.physical_device_desc) + # Assume all the GPU connected are same model + break + run_info["machine_config"]["gpu_info"] = gpu_info + + +def _collect_memory_info(run_info): + # Note: psutil is not installed in the TensorFlow OSS tree. + # It is installable via pip. + import psutil # pylint: disable=g-import-not-at-top + vmem = psutil.virtual_memory() + run_info["machine_config"]["memory_total"] = vmem.total + run_info["machine_config"]["memory_available"] = vmem.available + + +def _parse_gpu_model(physical_device_desc): + # Assume all the GPU connected are same model + for kv in physical_device_desc.split(","): + k, _, v = kv.partition(":") + if k.strip() == "name": + return v.strip() + return None diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/metric_hook.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/metric_hook.py new file mode 100644 index 000000000..1f2df049c --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/logs/metric_hook.py @@ -0,0 +1,106 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Session hook for logging benchmark metric.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from mlperf_utils.logs import logger + + +class LoggingMetricHook(tf.estimator.LoggingTensorHook): + """Hook to log benchmark metric information. + + This hook is very similar as tf.train.LoggingTensorHook, which logs given + tensors every N local steps, every N seconds, or at the end. The metric + information will be logged to given log_dir or via metric_logger in JSON + format, which can be consumed by data analysis pipeline later. + + Note that if `at_end` is True, `tensors` should not include any tensor + whose evaluation produces a side effect such as consuming additional inputs. + """ + + def __init__(self, tensors, log_dir=None, metric_logger=None, + every_n_iter=None, every_n_secs=None, at_end=False): + """Initializer for LoggingMetricHook. + + Args: + tensors: `dict` that maps string-valued tags to tensors/tensor names, + or `iterable` of tensors/tensor names. + log_dir: `string`, directory path that metric hook should write log to. + metric_logger: instance of `BenchmarkLogger`, the benchmark logger that + hook should use to write the log. Exactly one of the `log_dir` and + `metric_logger` should be provided. + every_n_iter: `int`, print the values of `tensors` once every N local + steps taken on the current worker. + every_n_secs: `int` or `float`, print the values of `tensors` once every N + seconds. Exactly one of `every_n_iter` and `every_n_secs` should be + provided. + at_end: `bool` specifying whether to print the values of `tensors` at the + end of the run. + + Raises: + ValueError: + 1. `every_n_iter` is non-positive, or + 2. Exactly one of every_n_iter and every_n_secs should be provided. + 3. Exactly one of log_dir and metric_logger should be provided. + """ + super(LoggingMetricHook, self).__init__( + tensors=tensors, + every_n_iter=every_n_iter, + every_n_secs=every_n_secs, + at_end=at_end) + + if (log_dir is None) == (metric_logger is None): + raise ValueError( + "exactly one of log_dir and metric_logger should be provided.") + + if log_dir is not None: + self._logger = logger.BenchmarkLogger(log_dir) + else: + self._logger = metric_logger + + def begin(self): + super(LoggingMetricHook, self).begin() + self._global_step_tensor = tf.compat.v1.train.get_global_step() + if self._global_step_tensor is None: + raise RuntimeError( + "Global step should be created to use LoggingMetricHook.") + if self._global_step_tensor.name not in self._current_tensors: + self._current_tensors[self._global_step_tensor.name] = ( + self._global_step_tensor) + + def after_run(self, unused_run_context, run_values): + # should_trigger is a internal state that populated at before_run, and it is + # using self_timer to determine whether it should trigger. + if self._should_trigger: + self._log_metric(run_values.results) + + self._iter_count += 1 + + def end(self, session): + if self._log_at_end: + values = session.run(self._current_tensors) + self._log_metric(values) + + def _log_metric(self, tensor_values): + self._timer.update_last_triggered_step(self._iter_count) + global_step = tensor_values[self._global_step_tensor.name] + # self._tag_order is populated during the init of LoggingTensorHook + for tag in self._tag_order: + self._logger.log_metric(tag, tensor_values[tag], global_step=global_step) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/model_helpers.py b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/model_helpers.py new file mode 100644 index 000000000..2878c4f1b --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/training/mlperf_utils/misc/model_helpers.py @@ -0,0 +1,55 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Miscellaneous functions that can be called by models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numbers + +import tensorflow as tf + + +def past_stop_threshold(stop_threshold, eval_metric): + """Return a boolean representing whether a model should be stopped. + + Args: + stop_threshold: float, the threshold above which a model should stop + training. + eval_metric: float, the current value of the relevant metric to check. + + Returns: + True if training should stop, False otherwise. + + Raises: + ValueError: if either stop_threshold or eval_metric is not a number + """ + if stop_threshold is None: + return False + + if not isinstance(stop_threshold, numbers.Number): + raise ValueError("Threshold for checking stop conditions must be a number.") + if not isinstance(eval_metric, numbers.Number): + raise ValueError("Eval metric being checked against stop conditions " + "must be a number.") + + if eval_metric >= stop_threshold: + tf.compat.v1.logging.info( + "Stop threshold of {} was passed with metric value {}.".format( + stop_threshold, eval_metric)) + return True + + return False diff --git a/models/image_segmentation/__init__.py b/models/image_segmentation/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/image_segmentation/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/image_segmentation/tensorflow/__init__.py b/models/image_segmentation/tensorflow/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/image_segmentation/tensorflow/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/image_segmentation/tensorflow/maskrcnn/__init__.py b/models/image_segmentation/tensorflow/maskrcnn/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/image_segmentation/tensorflow/maskrcnn/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/image_segmentation/tensorflow/maskrcnn/coco.py b/models/image_segmentation/tensorflow/maskrcnn/coco.py deleted file mode 100644 index facce2645..000000000 --- a/models/image_segmentation/tensorflow/maskrcnn/coco.py +++ /dev/null @@ -1,588 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -""" -Mask R-CNN -Configurations and data loading code for MS COCO. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla - ------------------------------------------------------------- - -Usage: import the module (see Jupyter notebooks for examples), or run from - the command line as such: - - # Train a new model starting from pre-trained COCO weights - python3 coco.py train --dataset=/path/to/coco/ --model=coco - - # Train a new model starting from ImageNet weights - python3 coco.py train --dataset=/path/to/coco/ --model=imagenet - - # Continue training a model that you had trained earlier - python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 - - # Continue training the last model you trained - python3 coco.py train --dataset=/path/to/coco/ --model=last - - # Run COCO evaluatoin on the last model you trained - python3 coco.py evaluate --dataset=/path/to/coco/ --model=last -""" - -import os -import time -import numpy as np -import subprocess -from pdb import set_trace as bp - -# Download and install the Python COCO tools from https://github.com/waleedka/coco -# That's a fork from the original https://github.com/pdollar/coco with a bug -# fix for Python 3. -# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 -# If the PR is merged then use the original repo. -# Note: Edit PythonAPI/Makefile and replace "python" with "python3". -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from pycocotools import mask as maskUtils - -import zipfile -import urllib.request -import shutil - -from config import Config -import utils -import model as modellib - -# Path to trained weights file -COCO_MODEL_PATH = os.path.join(os.environ["MOUNT_EXTERNAL_MODELS_SOURCE"], "mask_rcnn_coco.h5") - -# Directory to save logs and model checkpoints, if not provided -# through the command line argument --logs -DEFAULT_LOGS_DIR = os.path.join(os.environ["MOUNT_BENCHMARK"], "common/tensorflow/logs") -DEFAULT_DATASET_YEAR = "2014" - -############################################################ -# Configurations -############################################################ - - -class CocoConfig(Config): - """Configuration for training on MS COCO. - Derives from the base Config class and overrides values specific - to the COCO dataset. - """ - # Give the configuration a recognizable name - NAME = "coco" - - # We use a GPU with 12GB memory, which can fit two images. - # Adjust down if you use a smaller GPU. - IMAGES_PER_GPU = 2 - - # Uncomment to train on 8 GPUs (default is 1) - # GPU_COUNT = 8 - - # Number of classes (including background) - NUM_CLASSES = 1 + 80 # COCO has 80 classes - - -############################################################ -# Dataset -############################################################ - -class CocoDataset(utils.Dataset): - def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, - class_map=None, return_coco=False, auto_download=False): - """Load a subset of the COCO dataset. - dataset_dir: The root directory of the COCO dataset. - subset: What to load (train, val, minival, valminusminival) - year: What dataset year to load (2014, 2017) as a string, not an integer - class_ids: If provided, only loads images that have the given classes. - class_map: TODO: Not implemented yet. Supports maping classes from - different datasets to the same class ID. - return_coco: If True, returns the COCO object. - auto_download: Automatically download and unzip MS-COCO images and annotations - """ - - if auto_download is True: - self.auto_download(dataset_dir, subset, year) - - coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) - if subset == "minival" or subset == "valminusminival": - subset = "val" - image_dir = "{}/{}{}".format(dataset_dir, subset, year) - - # Load all classes or a subset? - if not class_ids: - # All classes - class_ids = sorted(coco.getCatIds()) - - # All images or a subset? - if class_ids: - image_ids = [] - for id in class_ids: - image_ids.extend(list(coco.getImgIds(catIds=[id]))) - # Remove duplicates - image_ids = list(set(image_ids)) - else: - # All images - image_ids = list(coco.imgs.keys()) - - # Add classes - for i in class_ids: - self.add_class("coco", i, coco.loadCats(i)[0]["name"]) - - # Add images - for i in image_ids: - self.add_image( - "coco", image_id=i, - path=os.path.join(image_dir, coco.imgs[i]['file_name']), - width=coco.imgs[i]["width"], - height=coco.imgs[i]["height"], - annotations=coco.loadAnns(coco.getAnnIds( - imgIds=[i], catIds=class_ids, iscrowd=None))) - if return_coco: - return coco - - def auto_download(self, dataDir, dataType, dataYear): - """Download the COCO dataset/annotations if requested. - dataDir: The root directory of the COCO dataset. - dataType: What to load (train, val, minival, valminusminival) - dataYear: What dataset year to load (2014, 2017) as a string, not an integer - Note: - For 2014, use "train", "val", "minival", or "valminusminival" - For 2017, only "train" and "val" annotations are available - """ - - # Setup paths and file names - if dataType == "minival" or dataType == "valminusminival": - imgDir = "{}/{}{}".format(dataDir, "val", dataYear) - imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) - imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) - else: - imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) - imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) - imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) - # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) - - # Create main folder if it doesn't exist yet - if not os.path.exists(dataDir): - os.makedirs(dataDir) - - # Download images if not available locally - if not os.path.exists(imgDir): - os.makedirs(imgDir) - print("Downloading images to " + imgZipFile + " ...") - with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: - shutil.copyfileobj(resp, out) - print("... done downloading.") - print("Unzipping " + imgZipFile) - with zipfile.ZipFile(imgZipFile, "r") as zip_ref: - zip_ref.extractall(dataDir) - print("... done unzipping") - print("Will use images in " + imgDir) - - # Setup annotations data paths - annDir = "{}/annotations".format(dataDir) - if dataType == "minival": - annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) - annFile = "{}/instances_minival2014.json".format(annDir) - annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" - unZipDir = annDir - elif dataType == "valminusminival": - annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) - annFile = "{}/instances_valminusminival2014.json".format(annDir) - annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" - unZipDir = annDir - else: - annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) - annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) - annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) - unZipDir = dataDir - # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) - - # Download annotations if not available locally - if not os.path.exists(annDir): - os.makedirs(annDir) - if not os.path.exists(annFile): - if not os.path.exists(annZipFile): - print("Downloading zipped annotations to " + annZipFile + " ...") - with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: - shutil.copyfileobj(resp, out) - print("... done downloading.") - print("Unzipping " + annZipFile) - with zipfile.ZipFile(annZipFile, "r") as zip_ref: - zip_ref.extractall(unZipDir) - print("... done unzipping") - print("Will use annotations in " + annFile) - - def load_mask(self, image_id): - """Load instance masks for the given image. - - Different datasets use different ways to store masks. This - function converts the different mask format to one format - in the form of a bitmap [height, width, instances]. - - Returns: - masks: A bool array of shape [height, width, instance count] with - one mask per instance. - class_ids: a 1D array of class IDs of the instance masks. - """ - # If not a COCO image, delegate to parent class. - image_info = self.image_info[image_id] - if image_info["source"] != "coco": - return super(CocoDataset, self).load_mask(image_id) - - instance_masks = [] - class_ids = [] - annotations = self.image_info[image_id]["annotations"] - # Build mask of shape [height, width, instance_count] and list - # of class IDs that correspond to each channel of the mask. - for annotation in annotations: - class_id = self.map_source_class_id( - "coco.{}".format(annotation['category_id'])) - if class_id: - m = self.annToMask(annotation, image_info["height"], - image_info["width"]) - # Some objects are so small that they're less than 1 pixel area - # and end up rounded out. Skip those objects. - if m.max() < 1: - continue - # Is it a crowd? If so, use a negative class ID. - if annotation['iscrowd']: - # Use negative class ID for crowds - class_id *= -1 - # For crowd masks, annToMask() sometimes returns a mask - # smaller than the given dimensions. If so, resize it. - if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: - m = np.ones([image_info["height"], image_info["width"]], dtype=bool) - instance_masks.append(m) - class_ids.append(class_id) - - # Pack instance masks into an array - if class_ids: - mask = np.stack(instance_masks, axis=2) - class_ids = np.array(class_ids, dtype=np.int32) - return mask, class_ids - else: - # Call super class to return an empty mask - return super(CocoDataset, self).load_mask(image_id) - - def image_reference(self, image_id): - """Return a link to the image in the COCO Website.""" - info = self.image_info[image_id] - if info["source"] == "coco": - return "http://cocodataset.org/#explore?id={}".format(info["id"]) - else: - super(CocoDataset, self).image_reference(image_id) - - # The following two functions are from pycocotools with a few changes. - - def annToRLE(self, ann, height, width): - """ - Convert annotation which can be polygons, uncompressed RLE to RLE. - :return: binary mask (numpy 2D array) - """ - segm = ann['segmentation'] - if isinstance(segm, list): - # polygon -- a single object might consist of multiple parts - # we merge all parts into one mask rle code - rles = maskUtils.frPyObjects(segm, height, width) - rle = maskUtils.merge(rles) - elif isinstance(segm['counts'], list): - # uncompressed RLE - rle = maskUtils.frPyObjects(segm, height, width) - else: - # rle - rle = ann['segmentation'] - return rle - - def annToMask(self, ann, height, width): - """ - Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. - :return: binary mask (numpy 2D array) - """ - rle = self.annToRLE(ann, height, width) - m = maskUtils.decode(rle) - return m - - -############################################################ -# COCO Evaluation -############################################################ - -def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): - """Arrange resutls to match COCO specs in http://cocodataset.org/#format - """ - # If no results, return an empty list - if rois is None: - return [] - - results = [] - for image_id in image_ids: - # Loop through detections - for i in range(rois.shape[0]): - class_id = class_ids[i] - score = scores[i] - bbox = np.around(rois[i], 1) - mask = masks[:, :, i] - - result = { - "image_id": image_id, - "category_id": dataset.get_source_class_id(class_id, "coco"), - "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], - "score": score, - "segmentation": maskUtils.encode(np.asfortranarray(mask)) - } - results.append(result) - return results - - -def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, warmup=0, image_ids=None): - """Runs official COCO evaluation. - dataset: A Dataset object with valiadtion data - eval_type: "bbox" or "segm" for bounding box or segmentation evaluation - limit: if not 0, it's the number of images to use for evaluation - """ - # Pick COCO images from the dataset - image_ids = image_ids or dataset.image_ids - - limit = int(limit / config.BATCH_SIZE) * config.BATCH_SIZE - - # Limit to a subset - if limit: - image_ids = image_ids[:limit] - - # Get corresponding COCO image IDs. - coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] - - t_prediction = 0 - t_start = time.time() - - results = [] - for i, image_id in enumerate(image_ids): - # Load image - # image = dataset.load_image(image_id) - - if (i % config.BATCH_SIZE != 0): - continue - image_list = [] - for j in range(0, config.BATCH_SIZE): - print("i image_id", i + j, image_id + j) - image = dataset.load_image(image_id + j) - image_list.append(image) - - # Run detection - t = time.time() - r = model.detect(image_list, verbose=0)[0] - t1 = time.time() - t - # t_prediction += (time.time() - t) - if (i / config.BATCH_SIZE >= warmup): - t_prediction += t1 - print("pred time:", i, t1) - - # Convert results to COCO format - image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], - r["rois"], r["class_ids"], - r["scores"], r["masks"]) - results.extend(image_results) - - # Load results. This modifies results with additional attributes. - coco_results = coco.loadRes(results) - - # Evaluate - cocoEval = COCOeval(coco, coco_results, eval_type) - cocoEval.params.imgIds = coco_image_ids - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - print("Batch size: %d" % (config.BATCH_SIZE)) - print("Time spent per BATCH: %.4f ms" % (t_prediction / (len(image_ids) / config.BATCH_SIZE - warmup) * 1000)) - print("Total samples/sec: %.4f samples/s" % - ((len(image_ids) / config.BATCH_SIZE - warmup) * config.BATCH_SIZE / t_prediction)) - print("Total time: ", time.time() - t_start) - - -############################################################ -# Training -############################################################ - - -if __name__ == '__main__': - import argparse - - # Parse command line arguments - parser = argparse.ArgumentParser( - description='Train Mask R-CNN on MS COCO.') - parser.add_argument("command", - metavar="", - help="'train' or 'evaluate' on MS COCO") - parser.add_argument("--trainbs", required=False, - default=2, - metavar="", - help="Batchsize to train (default=2)") - parser.add_argument("--infbs", required=False, - default=1, - metavar="", - help="Batchsize to inference (default=1)") - parser.add_argument("--num_intra_threads", required=False, - default=56, - metavar="", - help="Num intra threads (default=56)") - parser.add_argument("--num_inter_threads", required=False, - default=1, - metavar="", - help="Num inter threads (default=1)") - parser.add_argument('--dataset', required=True, - metavar="/path/to/coco/", - help='Directory of the MS-COCO dataset') - parser.add_argument('--year', required=False, - default=DEFAULT_DATASET_YEAR, - metavar="", - help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') - parser.add_argument('--model', required=True, - metavar="/path/to/weights.h5", - help="Path to weights .h5 file or 'coco'") - parser.add_argument('--cp', required=False, - default=DEFAULT_LOGS_DIR, - metavar="/path/to/logs/", - help='Logs and checkpoints directory (default=logs/)') - parser.add_argument('--nb', required=False, - default=50, - metavar="", - help='Images to use for evaluation (default=500)') - parser.add_argument('--nw', required=False, - default=5, - metavar="", - help='Images to use for evaluation warmup (default=10)') - parser.add_argument('--download', required=False, - default=False, - metavar="", - help='Automatically download and unzip MS-COCO files (default=False)', - type=bool) - args = parser.parse_args() - print("Command: ", args.command) - print("Model: ", args.model) - print("Dataset: ", args.dataset) - print("Year: ", args.year) - print("Logs: ", args.cp) - print("Auto Download: ", args.download) - - # For pycocotools updates - ppath = subprocess.Popen(["python3", "-m", "site", "--user-site"], - stdout=subprocess.PIPE).communicate()[0].decode("utf-8") - ppath = ppath[:-1] + "/pycocotools/coco.py" - ret = subprocess.Popen(["sed", "-i", "s/unicode/bytes/", ppath], - stdout=subprocess.PIPE).communicate()[0] - - # Configurations - if args.command == "train": - class TrainConfig(CocoConfig): - GPU_COUNT = 1 - IMAGES_PER_GPU = int(args.trainbs) - STEPS_PER_EPOCH = int(args.nb) - config = TrainConfig() - else: - class InferenceConfig(CocoConfig): - # Set batch size to 1 since we'll be running inference on - # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU - GPU_COUNT = 1 - IMAGES_PER_GPU = int(args.infbs) - DETECTION_MIN_CONFIDENCE = 0 - config = InferenceConfig() - config.NUM_INTRA = int(args.num_intra_threads) - config.NUM_INTER = int(args.num_inter_threads) - config.display() - - # Create model - if args.command == "train": - model = modellib.MaskRCNN(mode="training", config=config, - model_dir=args.cp) - else: - model = modellib.MaskRCNN(mode="inference", config=config, - model_dir=args.cp) - - # Select weights file to load - if args.model.lower() == "coco": - model_path = COCO_MODEL_PATH - elif args.model.lower() == "last": - # Find last trained weights - model_path = model.find_last()[1] - elif args.model.lower() == "imagenet": - # Start from ImageNet trained weights - model_path = model.get_imagenet_weights() - else: - model_path = args.model - - # Load weights - print("Loading weights ", model_path) - model.load_weights(model_path, by_name=True) - - # Train or evaluate - if args.command == "train": - # Training dataset. Use the training set and 35K from the - # validation set, as as in the Mask RCNN paper. - dataset_train = CocoDataset() - dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) - dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) - dataset_train.prepare() - - # Validation dataset - dataset_val = CocoDataset() - dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) - dataset_val.prepare() - - # *** This training schedule is an example. Update to your needs *** - - # Training - Stage 1 - print("Training network heads") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=1, # 40, - layers='heads', warmup=int(args.nw)) - - # Training - Stage 2 - # Finetune layers from ResNet stage 4 and up - print("Fine tune Resnet stage 4 and up") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=2, # 120, - layers='4+', warmup=int(args.nw)) - - # Training - Stage 3 - # Fine tune all layers - print("Fine tune all layers") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE / 10, - epochs=3, # 160, - layers='all', warmup=int(args.nw)) - - elif args.command == "evaluate": - # Validation dataset - dataset_val = CocoDataset() - coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, - return_coco=True, auto_download=args.download) - dataset_val.prepare() - print("Running COCO evaluation on {} images.".format(args.nb)) - evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.nb), warmup=int(args.nw)) - else: - print("'{}' is not recognized. " - "Use 'train' or 'evaluate'".format(args.command)) diff --git a/models/image_segmentation/tensorflow/maskrcnn/config.py b/models/image_segmentation/tensorflow/maskrcnn/config.py deleted file mode 100644 index 7c14975da..000000000 --- a/models/image_segmentation/tensorflow/maskrcnn/config.py +++ /dev/null @@ -1,188 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -""" -Mask R-CNN -Base Configurations class. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import math -import numpy as np - - -# Base Configuration Class -# Don't use this class directly. Instead, sub-class it and override -# the configurations you need to change. - -class Config(object): - """Base configuration class. For custom configurations, create a - sub-class that inherits from this one and override properties - that need to be changed. - """ - # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. - # Useful if your code needs to do things differently depending on which - # experiment is running. - NAME = None # Override in sub-classes - - # NUMBER OF GPUs to use. For CPU training, use 1 - GPU_COUNT = 1 - - # Number of images to train with on each GPU. A 12GB GPU can typically - # handle 2 images of 1024x1024px. - # Adjust based on your GPU memory and image sizes. Use the highest - # number that your GPU can handle for best performance. - IMAGES_PER_GPU = 2 - - # Number of training steps per epoch - # This doesn't need to match the size of the training set. Tensorboard - # updates are saved at the end of each epoch, so setting this to a - # smaller number means getting more frequent TensorBoard updates. - # Validation stats are also calculated at each epoch end and they - # might take a while, so don't set this too small to avoid spending - # a lot of time on validation stats. - STEPS_PER_EPOCH = 1000 - - # Number of validation steps to run at the end of every training epoch. - # A bigger number improves accuracy of validation stats, but slows - # down the training. - VALIDATION_STEPS = 50 - - # The strides of each layer of the FPN Pyramid. These values - # are based on a Resnet101 backbone. - BACKBONE_STRIDES = [4, 8, 16, 32, 64] - - # Number of classification classes (including background) - NUM_CLASSES = 1 # Override in sub-classes - - # Length of square anchor side in pixels - RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) - - # Ratios of anchors at each cell (width/height) - # A value of 1 represents a square anchor, and 0.5 is a wide anchor - RPN_ANCHOR_RATIOS = [0.5, 1, 2] - - # Anchor stride - # If 1 then anchors are created for each cell in the backbone feature map. - # If 2, then anchors are created for every other cell, and so on. - RPN_ANCHOR_STRIDE = 1 - - # Non-max suppression threshold to filter RPN proposals. - # You can reduce this during training to generate more propsals. - RPN_NMS_THRESHOLD = 0.7 - - # How many anchors per image to use for RPN training - RPN_TRAIN_ANCHORS_PER_IMAGE = 256 - - # ROIs kept after non-maximum supression (training and inference) - POST_NMS_ROIS_TRAINING = 2000 - POST_NMS_ROIS_INFERENCE = 1000 - - # If enabled, resizes instance masks to a smaller size to reduce - # memory load. Recommended when using high-resolution images. - USE_MINI_MASK = True - MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask - - # Input image resing - # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and - # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't - # be satisfied together the IMAGE_MAX_DIM is enforced. - IMAGE_MIN_DIM = 800 - IMAGE_MAX_DIM = 1024 - # If True, pad images with zeros such that they're (max_dim by max_dim) - IMAGE_PADDING = True # currently, the False option is not supported - - # Image mean (RGB) - MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) - - # Number of ROIs per image to feed to classifier/mask heads - # The Mask RCNN paper uses 512 but often the RPN doesn't generate - # enough positive proposals to fill this and keep a positive:negative - # ratio of 1:3. You can increase the number of proposals by adjusting - # the RPN NMS threshold. - TRAIN_ROIS_PER_IMAGE = 200 - - # Percent of positive ROIs used to train classifier/mask heads - ROI_POSITIVE_RATIO = 0.33 - - # Pooled ROIs - POOL_SIZE = 7 - MASK_POOL_SIZE = 14 - MASK_SHAPE = [28, 28] - - # Maximum number of ground truth instances to use in one image - MAX_GT_INSTANCES = 100 - - # Bounding box refinement standard deviation for RPN and final detections. - RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) - BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) - - # Max number of final detections - DETECTION_MAX_INSTANCES = 100 - - # Minimum probability value to accept a detected instance - # ROIs below this threshold are skipped - DETECTION_MIN_CONFIDENCE = 0.7 - - # Non-maximum suppression threshold for detection - DETECTION_NMS_THRESHOLD = 0.3 - - # Learning rate and momentum - # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes - # weights to explode. Likely due to differences in optimzer - # implementation. - LEARNING_RATE = 0.001 - LEARNING_MOMENTUM = 0.9 - - # Weight decay regularization - WEIGHT_DECAY = 0.0001 - - # Use RPN ROIs or externally generated ROIs for training - # Keep this True for most situations. Set to False if you want to train - # the head branches on ROI generated by code rather than the ROIs from - # the RPN. For example, to debug the classifier head without having to - # train the RPN. - USE_RPN_ROIS = True - - def __init__(self): - """Set values of computed attributes.""" - # Effective batch size - self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT - - # Input image size - self.IMAGE_SHAPE = np.array( - [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) - - # Compute backbone size from input image size - self.BACKBONE_SHAPES = np.array( - [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), - int(math.ceil(self.IMAGE_SHAPE[1] / stride))] - for stride in self.BACKBONE_STRIDES]) - - def display(self): - """Display Configuration values.""" - print("\nConfigurations:") - for a in dir(self): - if not a.startswith("__") and not callable(getattr(self, a)): - print("{:30} {}".format(a, getattr(self, a))) - print("\n") diff --git a/models/image_segmentation/tensorflow/maskrcnn/model.py b/models/image_segmentation/tensorflow/maskrcnn/model.py deleted file mode 100644 index 4cb56f604..000000000 --- a/models/image_segmentation/tensorflow/maskrcnn/model.py +++ /dev/null @@ -1,2612 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -""" -Mask R-CNN -The main Mask R-CNN model implemenetation. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import os -import sys -import glob -import random -import math -import datetime -import itertools -import json -import re -import logging -from collections import OrderedDict -import numpy as np -import scipy.misc -import tensorflow as tf -import keras -import keras.backend as K -import keras.layers as KL -import keras.initializers as KI -import keras.engine as KE -import keras.models as KM - -import utils -import time - -# Requires TensorFlow 1.3+ and Keras 2.0.8+. -from distutils.version import LooseVersion -assert LooseVersion(tf.__version__) >= LooseVersion("1.3") -assert LooseVersion(keras.__version__) >= LooseVersion('2.0.8') - - -############################################################ -# Utility Functions -############################################################ -class TimeHistory(keras.callbacks.Callback): - def __init__(self, warmup, batch_size): - self.warmup = warmup - self.batch_size = batch_size - - def on_train_begin(self, logs={}): - self.time_start = 0 - self.total_time = 0 - self.count = 0 - - def on_batch_begin(self, batch, logs={}): - self.time_start = time.time() - - def on_batch_end(self, batch, logs={}): - self.time_end = time.time() - print(" Elapsed Time: %f (sec/step) " % (self.time_end - self.time_start)) - if self.count >= self.warmup: - self.total_time += self.time_end - self.time_start - self.count += 1 - - def on_epoch_end(self, epoch, logs={}): - print("Batchsize: %d" % (self.batch_size)) - print("Time spent per BATCH: %.4f ms" % (self.total_time / (self.count - self.warmup) * 1000)) - print("Total samples/sec: %.4f samples/s" % ((self.count - self.warmup) * self.batch_size / self.total_time)) - - -def log(text, array=None): - """Prints a text message. And, optionally, if a Numpy array is provided it - prints it's shape, min, and max values. - """ - if array is not None: - text = text.ljust(25) - text += ("shape: {:20} min: {:10.5f} max: {:10.5f}".format( - str(array.shape), - array.min() if array.size else "", - array.max() if array.size else "")) - print(text) - - -class BatchNorm(KL.BatchNormalization): - """Batch Normalization class. Subclasses the Keras BN class and - hardcodes training=False so the BN layer doesn't update - during training. - - Batch normalization has a negative effect on training if batches are small - so we disable it here. - """ - - def call(self, inputs, training=None): - return super(self.__class__, self).call(inputs, training=False) - - -############################################################ -# Resnet Graph -############################################################ - -# Code adopted from: -# https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py - -def identity_block(input_tensor, kernel_size, filters, stage, block, - use_bias=True): - """The identity_block is the block that has no conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - """ - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', - use_bias=use_bias)(input_tensor) - x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', - name=conv_name_base + '2b', use_bias=use_bias)(x) - x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', - use_bias=use_bias)(x) - x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) - - x = KL.Add()([x, input_tensor]) - x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, - strides=(2, 2), use_bias=True): - """conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - """ - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, - name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) - x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', - name=conv_name_base + '2b', use_bias=use_bias)(x) - x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', use_bias=use_bias)(x) - x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) - - shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, - name=conv_name_base + '1', use_bias=use_bias)(input_tensor) - shortcut = BatchNorm(axis=3, name=bn_name_base + '1')(shortcut) - - x = KL.Add()([x, shortcut]) - x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) - return x - - -def resnet_graph(input_image, architecture, stage5=False): - assert architecture in ["resnet50", "resnet101"] - # Stage 1 - x = KL.ZeroPadding2D((3, 3))(input_image) - x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) - x = BatchNorm(axis=3, name='bn_conv1')(x) - x = KL.Activation('relu')(x) - C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) - # Stage 2 - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - # Stage 3 - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - # Stage 4 - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - block_count = {"resnet50": 5, "resnet101": 22}[architecture] - for i in range(block_count): - x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i)) - C4 = x - # Stage 5 - if stage5: - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - else: - C5 = None - return [C1, C2, C3, C4, C5] - - -############################################################ -# Proposal Layer -############################################################ - -def apply_box_deltas_graph(boxes, deltas): - """Applies the given deltas to the given boxes. - boxes: [N, 4] where each row is y1, x1, y2, x2 - deltas: [N, 4] where each row is [dy, dx, log(dh), log(dw)] - """ - # Convert to y, x, h, w - height = boxes[:, 2] - boxes[:, 0] - width = boxes[:, 3] - boxes[:, 1] - center_y = boxes[:, 0] + 0.5 * height - center_x = boxes[:, 1] + 0.5 * width - # Apply deltas - center_y += deltas[:, 0] * height - center_x += deltas[:, 1] * width - height *= tf.exp(deltas[:, 2]) - width *= tf.exp(deltas[:, 3]) - # Convert back to y1, x1, y2, x2 - y1 = center_y - 0.5 * height - x1 = center_x - 0.5 * width - y2 = y1 + height - x2 = x1 + width - result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out") - return result - - -def clip_boxes_graph(boxes, window): - """ - boxes: [N, 4] each row is y1, x1, y2, x2 - window: [4] in the form y1, x1, y2, x2 - """ - # Split corners - wy1, wx1, wy2, wx2 = tf.split(window, 4) - y1, x1, y2, x2 = tf.split(boxes, 4, axis=1) - # Clip - y1 = tf.maximum(tf.minimum(y1, wy2), wy1) - x1 = tf.maximum(tf.minimum(x1, wx2), wx1) - y2 = tf.maximum(tf.minimum(y2, wy2), wy1) - x2 = tf.maximum(tf.minimum(x2, wx2), wx1) - clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes") - clipped.set_shape((clipped.shape[0], 4)) - return clipped - - -class ProposalLayer(KE.Layer): - """Receives anchor scores and selects a subset to pass as proposals - to the second stage. Filtering is done based on anchor scores and - non-max suppression to remove overlaps. It also applies bounding - box refinement deltas to anchors. - - Inputs: - rpn_probs: [batch, anchors, (bg prob, fg prob)] - rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] - - Returns: - Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] - """ - - def __init__(self, proposal_count, nms_threshold, anchors, - config=None, **kwargs): - """ - anchors: [N, (y1, x1, y2, x2)] anchors defined in image coordinates - """ - super(ProposalLayer, self).__init__(**kwargs) - self.config = config - self.proposal_count = proposal_count - self.nms_threshold = nms_threshold - self.anchors = anchors.astype(np.float32) - - def call(self, inputs): - # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] - scores = inputs[0][:, :, 1] - # Box deltas [batch, num_rois, 4] - deltas = inputs[1] - deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) - # Base anchors - anchors = self.anchors - - # Improve performance by trimming to top anchors by score - # and doing the rest on the smaller subset. - pre_nms_limit = min(6000, self.anchors.shape[0]) - ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, - name="top_anchors").indices - scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), - self.config.IMAGES_PER_GPU) - deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), - self.config.IMAGES_PER_GPU) - anchors = utils.batch_slice(ix, lambda x: tf.gather(anchors, x), - self.config.IMAGES_PER_GPU, - names=["pre_nms_anchors"]) - - # Apply deltas to anchors to get refined anchors. - # [batch, N, (y1, x1, y2, x2)] - boxes = utils.batch_slice([anchors, deltas], - lambda x, y: apply_box_deltas_graph(x, y), - self.config.IMAGES_PER_GPU, - names=["refined_anchors"]) - - # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] - height, width = self.config.IMAGE_SHAPE[:2] - window = np.array([0, 0, height, width]).astype(np.float32) - boxes = utils.batch_slice(boxes, - lambda x: clip_boxes_graph(x, window), - self.config.IMAGES_PER_GPU, - names=["refined_anchors_clipped"]) - - # Filter out small boxes - # According to Xinlei Chen's paper, this reduces detection accuracy - # for small objects, so we're skipping it. - - # Normalize dimensions to range of 0 to 1. - normalized_boxes = boxes / np.array([[height, width, height, width]]) - - # Non-max suppression - def nms(normalized_boxes, scores): - indices = tf.image.non_max_suppression( - normalized_boxes, scores, self.proposal_count, - self.nms_threshold, name="rpn_non_max_suppression") - proposals = tf.gather(normalized_boxes, indices) - # Pad if needed - padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) - proposals = tf.pad(proposals, [(0, padding), (0, 0)]) - return proposals - proposals = utils.batch_slice([normalized_boxes, scores], nms, - self.config.IMAGES_PER_GPU) - return proposals - - def compute_output_shape(self, input_shape): - return (None, self.proposal_count, 4) - - -############################################################ -# ROIAlign Layer -############################################################ - -def log2_graph(x): - """Implementatin of Log2. TF doesn't have a native implemenation.""" - return tf.log(x) / tf.log(2.0) - - -class PyramidROIAlign(KE.Layer): - """Implements ROI Pooling on multiple levels of the feature pyramid. - - Params: - - pool_shape: [height, width] of the output pooled regions. Usually [7, 7] - - image_shape: [height, width, channels]. Shape of input image in pixels - - Inputs: - - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized - coordinates. Possibly padded with zeros if not enough - boxes to fill the array. - - Feature maps: List of feature maps from different levels of the pyramid. - Each is [batch, height, width, channels] - - Output: - Pooled regions in the shape: [batch, num_boxes, height, width, channels]. - The width and height are those specific in the pool_shape in the layer - constructor. - """ - - def __init__(self, pool_shape, image_shape, **kwargs): - super(PyramidROIAlign, self).__init__(**kwargs) - self.pool_shape = tuple(pool_shape) - self.image_shape = tuple(image_shape) - - def call(self, inputs): - # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords - boxes = inputs[0] - - # Feature Maps. List of feature maps from different level of the - # feature pyramid. Each is [batch, height, width, channels] - feature_maps = inputs[1:] - - # Assign each ROI to a level in the pyramid based on the ROI area. - y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) - h = y2 - y1 - w = x2 - x1 - # Equation 1 in the Feature Pyramid Networks paper. Account for - # the fact that our coordinates are normalized here. - # e.g. a 224x224 ROI (in pixels) maps to P4 - image_area = tf.cast( - self.image_shape[0] * self.image_shape[1], tf.float32) - roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) - roi_level = tf.minimum(5, tf.maximum( - 2, 4 + tf.cast(tf.round(roi_level), tf.int32))) - roi_level = tf.squeeze(roi_level, 2) - - # Loop through levels and apply ROI pooling to each. P2 to P5. - pooled = [] - box_to_level = [] - for i, level in enumerate(range(2, 6)): - ix = tf.where(tf.equal(roi_level, level)) - level_boxes = tf.gather_nd(boxes, ix) - - # Box indicies for crop_and_resize. - box_indices = tf.cast(ix[:, 0], tf.int32) - - # Keep track of which box is mapped to which level - box_to_level.append(ix) - - # Stop gradient propogation to ROI proposals - level_boxes = tf.stop_gradient(level_boxes) - box_indices = tf.stop_gradient(box_indices) - - # Crop and Resize - # From Mask R-CNN paper: "We sample four regular locations, so - # that we can evaluate either max or average pooling. In fact, - # interpolating only a single value at each bin center (without - # pooling) is nearly as effective." - # - # Here we use the simplified approach of a single value per bin, - # which is how it's done in tf.crop_and_resize() - # Result: [batch * num_boxes, pool_height, pool_width, channels] - pooled.append(tf.image.crop_and_resize( - feature_maps[i], level_boxes, box_indices, self.pool_shape, - method="bilinear")) - - # Pack pooled features into one tensor - pooled = tf.concat(pooled, axis=0) - - # Pack box_to_level mapping into one array and add another - # column representing the order of pooled boxes - box_to_level = tf.concat(box_to_level, axis=0) - box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) - box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], - axis=1) - - # Rearrange pooled features to match the order of the original boxes - # Sort box_to_level by batch then box index - # TF doesn't have a way to sort by two columns, so merge them and sort. - sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] - ix = tf.nn.top_k(sorting_tensor, k=tf.shape( - box_to_level)[0]).indices[::-1] - ix = tf.gather(box_to_level[:, 2], ix) - pooled = tf.gather(pooled, ix) - - # Re-add the batch dimension - pooled = tf.expand_dims(pooled, 0) - return pooled - - def compute_output_shape(self, input_shape): - return input_shape[0][:2] + self.pool_shape + (input_shape[1][-1], ) - - -############################################################ -# Detection Target Layer -############################################################ - -def overlaps_graph(boxes1, boxes2): - """Computes IoU overlaps between two sets of boxes. - boxes1, boxes2: [N, (y1, x1, y2, x2)]. - """ - # 1. Tile boxes2 and repeate boxes1. This allows us to compare - # every boxes1 against every boxes2 without loops. - # TF doesn't have an equivalent to np.repeate() so simulate it - # using tf.tile() and tf.reshape. - b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), - [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) - b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) - # 2. Compute intersections - b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1) - b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1) - y1 = tf.maximum(b1_y1, b2_y1) - x1 = tf.maximum(b1_x1, b2_x1) - y2 = tf.minimum(b1_y2, b2_y2) - x2 = tf.minimum(b1_x2, b2_x2) - intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0) - # 3. Compute unions - b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1) - b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1) - union = b1_area + b2_area - intersection - # 4. Compute IoU and reshape to [boxes1, boxes2] - iou = intersection / union - overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) - return overlaps - - -def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): - """Generates detection targets for one image. Subsamples proposals and - generates target class IDs, bounding box deltas, and masks for each. - - Inputs: - proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might - be zero padded if there are not enough proposals. - gt_class_ids: [MAX_GT_INSTANCES] int class IDs - gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. - gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. - - Returns: Target ROIs and corresponding class IDs, bounding box shifts, - and masks. - rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates - class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. - deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] - Class-specific bbox refinements. - masks: [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox - boundaries and resized to neural network output size. - - Note: Returned arrays might be zero padded if not enough target ROIs. - """ - # Assertions - asserts = [ - tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], - name="roi_assertion"), - ] - with tf.control_dependencies(asserts): - proposals = tf.identity(proposals) - - # Remove zero padding - proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") - gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") - gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, - name="trim_gt_class_ids") - gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, - name="trim_gt_masks") - - # Handle COCO crowds - # A crowd box in COCO is a bounding box around several instances. Exclude - # them from training. A crowd box is given a negative class ID. - crowd_ix = tf.where(gt_class_ids < 0)[:, 0] - non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] - crowd_boxes = tf.gather(gt_boxes, crowd_ix) - crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2) - gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) - gt_boxes = tf.gather(gt_boxes, non_crowd_ix) - gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) - - # Compute overlaps matrix [proposals, gt_boxes] - overlaps = overlaps_graph(proposals, gt_boxes) - - # Compute overlaps with crowd boxes [anchors, crowds] - crowd_overlaps = overlaps_graph(proposals, crowd_boxes) - crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) - no_crowd_bool = (crowd_iou_max < 0.001) - - # Determine postive and negative ROIs - roi_iou_max = tf.reduce_max(overlaps, axis=1) - # 1. Positive ROIs are those with >= 0.5 IoU with a GT box - positive_roi_bool = (roi_iou_max >= 0.5) - positive_indices = tf.where(positive_roi_bool)[:, 0] - # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. - negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] - - # Subsample ROIs. Aim for 33% positive - # Positive ROIs - positive_count = int(config.TRAIN_ROIS_PER_IMAGE * - config.ROI_POSITIVE_RATIO) - positive_indices = tf.random_shuffle(positive_indices)[:positive_count] - positive_count = tf.shape(positive_indices)[0] - # Negative ROIs. Add enough to maintain positive:negative ratio. - r = 1.0 / config.ROI_POSITIVE_RATIO - negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count - negative_indices = tf.random_shuffle(negative_indices)[:negative_count] - # Gather selected ROIs - positive_rois = tf.gather(proposals, positive_indices) - negative_rois = tf.gather(proposals, negative_indices) - - # Assign positive ROIs to GT boxes. - positive_overlaps = tf.gather(overlaps, positive_indices) - roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) - roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) - roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) - - # Compute bbox refinement for positive ROIs - deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) - deltas /= config.BBOX_STD_DEV - - # Assign positive ROIs to GT masks - # Permute masks to [N, height, width, 1] - transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) - # Pick the right mask for each ROI - roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) - - # Compute mask targets - boxes = positive_rois - if config.USE_MINI_MASK: - # Transform ROI corrdinates from normalized image space - # to normalized mini-mask space. - y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) - gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) - gt_h = gt_y2 - gt_y1 - gt_w = gt_x2 - gt_x1 - y1 = (y1 - gt_y1) / gt_h - x1 = (x1 - gt_x1) / gt_w - y2 = (y2 - gt_y1) / gt_h - x2 = (x2 - gt_x1) / gt_w - boxes = tf.concat([y1, x1, y2, x2], 1) - box_ids = tf.range(0, tf.shape(roi_masks)[0]) - masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, - box_ids, - config.MASK_SHAPE) - # Remove the extra dimension from masks. - masks = tf.squeeze(masks, axis=3) - - # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with - # binary cross entropy loss. - masks = tf.round(masks) - - # Append negative ROIs and pad bbox deltas and masks that - # are not used for negative ROIs with zeros. - rois = tf.concat([positive_rois, negative_rois], axis=0) - N = tf.shape(negative_rois)[0] - P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) - rois = tf.pad(rois, [(0, P), (0, 0)]) - roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) - roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) - deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) - masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) - - return rois, roi_gt_class_ids, deltas, masks - - -class DetectionTargetLayer(KE.Layer): - """Subsamples proposals and generates target box refinement, class_ids, - and masks for each. - - Inputs: - proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might - be zero padded if there are not enough proposals. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized - coordinates. - gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type - - Returns: Target ROIs and corresponding class IDs, bounding box shifts, - and masks. - rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized - coordinates - target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. - target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, - (dy, dx, log(dh), log(dw), class_id)] - Class-specific bbox refinements. - target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width) - Masks cropped to bbox boundaries and resized to neural - network output size. - - Note: Returned arrays might be zero padded if not enough target ROIs. - """ - - def __init__(self, config, **kwargs): - super(DetectionTargetLayer, self).__init__(**kwargs) - self.config = config - - def call(self, inputs): - proposals = inputs[0] - gt_class_ids = inputs[1] - gt_boxes = inputs[2] - gt_masks = inputs[3] - - # Slice the batch and run a graph for each slice - # TODO: Rename target_bbox to target_deltas for clarity - names = ["rois", "target_class_ids", "target_bbox", "target_mask"] - outputs = utils.batch_slice( - [proposals, gt_class_ids, gt_boxes, gt_masks], - lambda w, x, y, z: detection_targets_graph( - w, x, y, z, self.config), - self.config.IMAGES_PER_GPU, names=names) - return outputs - - def compute_output_shape(self, input_shape): - return [ - (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois - (None, 1), # class_ids - (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas - (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0], - self.config.MASK_SHAPE[1]) # masks - ] - - def compute_mask(self, inputs, mask=None): - return [None, None, None, None] - - -############################################################ -# Detection Layer -############################################################ - -def clip_to_window(window, boxes): - """ - window: (y1, x1, y2, x2). The window in the image we want to clip to. - boxes: [N, (y1, x1, y2, x2)] - """ - boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], window[2]), window[0]) - boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], window[3]), window[1]) - boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], window[2]), window[0]) - boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], window[3]), window[1]) - return boxes - - -def refine_detections_graph(rois, probs, deltas, window, config): - """Refine classified proposals and filter overlaps and return final - detections. - - Inputs: - rois: [N, (y1, x1, y2, x2)] in normalized coordinates - probs: [N, num_classes]. Class probabilities. - deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific - bounding box deltas. - window: (y1, x1, y2, x2) in image coordinates. The part of the image - that contains the image excluding the padding. - - Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where - coordinates are in image domain. - """ - # Class IDs per ROI - class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) - # Class probability of the top class of each ROI - indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) - class_scores = tf.gather_nd(probs, indices) - # Class-specific bounding box deltas - deltas_specific = tf.gather_nd(deltas, indices) - # Apply bounding box deltas - # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates - refined_rois = apply_box_deltas_graph( - rois, deltas_specific * config.BBOX_STD_DEV) - # Convert coordiates to image domain - # TODO: better to keep them normalized until later - height, width = config.IMAGE_SHAPE[:2] - refined_rois *= tf.constant([height, width, height, width], dtype=tf.float32) - # Clip boxes to image window - refined_rois = clip_boxes_graph(refined_rois, window) - # Round and cast to int since we're deadling with pixels now - refined_rois = tf.to_int32(tf.rint(refined_rois)) - - # TODO: Filter out boxes with zero area - - # Filter out background boxes - keep = tf.where(class_ids > 0)[:, 0] - # Filter out low confidence boxes - if config.DETECTION_MIN_CONFIDENCE: - conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] - keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), - tf.expand_dims(conf_keep, 0)) - keep = tf.sparse_tensor_to_dense(keep)[0] - - # Apply per-class NMS - # 1. Prepare variables - pre_nms_class_ids = tf.gather(class_ids, keep) - pre_nms_scores = tf.gather(class_scores, keep) - pre_nms_rois = tf.gather(refined_rois, keep) - unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] - - def nms_keep_map(class_id): - """Apply Non-Maximum Suppression on ROIs of the given class.""" - # Indices of ROIs of the given class - ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] - # Apply NMS - class_keep = tf.image.non_max_suppression( - tf.to_float(tf.gather(pre_nms_rois, ixs)), - tf.gather(pre_nms_scores, ixs), - max_output_size=config.DETECTION_MAX_INSTANCES, - iou_threshold=config.DETECTION_NMS_THRESHOLD) - # Map indicies - class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) - # Pad with -1 so returned tensors have the same shape - gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] - class_keep = tf.pad(class_keep, [(0, gap)], - mode='CONSTANT', constant_values=-1) - # Set shape so map_fn() can infer result shape - class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) - return class_keep - - # 2. Map over class IDs - nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, - dtype=tf.int64) - # 3. Merge results into one list, and remove -1 padding - nms_keep = tf.reshape(nms_keep, [-1]) - nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) - # 4. Compute intersection between keep and nms_keep - keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), - tf.expand_dims(nms_keep, 0)) - keep = tf.sparse_tensor_to_dense(keep)[0] - # Keep top detections - roi_count = config.DETECTION_MAX_INSTANCES - class_scores_keep = tf.gather(class_scores, keep) - num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) - top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] - keep = tf.gather(keep, top_ids) - - # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] - # Coordinates are in image domain. - detections = tf.concat([ - tf.to_float(tf.gather(refined_rois, keep)), - tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], - tf.gather(class_scores, keep)[..., tf.newaxis] - ], axis=1) - - # Pad with zeros if detections < DETECTION_MAX_INSTANCES - gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] - detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") - return detections - - -class DetectionLayer(KE.Layer): - """Takes classified proposal boxes and their bounding box deltas and - returns the final detection boxes. - - Returns: - [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where - coordinates are in image domain - """ - - def __init__(self, config=None, **kwargs): - super(DetectionLayer, self).__init__(**kwargs) - self.config = config - - def call(self, inputs): - rois = inputs[0] - mrcnn_class = inputs[1] - mrcnn_bbox = inputs[2] - image_meta = inputs[3] - - # Run detection refinement graph on each item in the batch - _, _, window, _ = parse_image_meta_graph(image_meta) - detections_batch = utils.batch_slice( - [rois, mrcnn_class, mrcnn_bbox, window], - lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), - self.config.IMAGES_PER_GPU) - - # Reshape output - # [batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels - return tf.reshape( - detections_batch, - [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6]) - - def compute_output_shape(self, input_shape): - return (None, self.config.DETECTION_MAX_INSTANCES, 6) - - -# Region Proposal Network (RPN) - -def rpn_graph(feature_map, anchors_per_location, anchor_stride): - """Builds the computation graph of Region Proposal Network. - - feature_map: backbone features [batch, height, width, depth] - anchors_per_location: number of anchors per pixel in the feature map - anchor_stride: Controls the density of anchors. Typically 1 (anchors for - every pixel in the feature map), or 2 (every other pixel). - - Returns: - rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax) - rpn_probs: [batch, H, W, 2] Anchor classifier probabilities. - rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be - applied to anchors. - """ - # TODO: check if stride of 2 causes alignment issues if the featuremap - # is not even. - # Shared convolutional base of the RPN - shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu', - strides=anchor_stride, - name='rpn_conv_shared')(feature_map) - - # Anchor Score. [batch, height, width, anchors per location * 2]. - x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid', - activation='linear', name='rpn_class_raw')(shared) - - # Reshape to [batch, anchors, 2] - rpn_class_logits = KL.Lambda( - lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x) - - # Softmax on last dimension of BG/FG. - rpn_probs = KL.Activation( - "softmax", name="rpn_class_xxx")(rpn_class_logits) - - # Bounding box refinement. [batch, H, W, anchors per location, depth] - # where depth is [x, y, log(w), log(h)] - x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid", - activation='linear', name='rpn_bbox_pred')(shared) - - # Reshape to [batch, anchors, 4] - rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x) - - return [rpn_class_logits, rpn_probs, rpn_bbox] - - -def build_rpn_model(anchor_stride, anchors_per_location, depth): - """Builds a Keras model of the Region Proposal Network. - It wraps the RPN graph so it can be used multiple times with shared - weights. - - anchors_per_location: number of anchors per pixel in the feature map - anchor_stride: Controls the density of anchors. Typically 1 (anchors for - every pixel in the feature map), or 2 (every other pixel). - depth: Depth of the backbone feature map. - - Returns a Keras Model object. The model outputs, when called, are: - rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax) - rpn_probs: [batch, W, W, 2] Anchor classifier probabilities. - rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be - applied to anchors. - """ - input_feature_map = KL.Input(shape=[None, None, depth], - name="input_rpn_feature_map") - outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride) - return KM.Model([input_feature_map], outputs, name="rpn_model") - - -############################################################ -# Feature Pyramid Network Heads -############################################################ - -def fpn_classifier_graph(rois, feature_maps, - image_shape, pool_size, num_classes): - """Builds the computation graph of the feature pyramid network classifier - and regressor heads. - - rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized - coordinates. - feature_maps: List of feature maps from diffent layers of the pyramid, - [P2, P3, P4, P5]. Each has a different resolution. - image_shape: [height, width, depth] - pool_size: The width of the square feature map generated from ROI Pooling. - num_classes: number of classes, which determines the depth of the results - - Returns: - logits: [N, NUM_CLASSES] classifier logits (before softmax) - probs: [N, NUM_CLASSES] classifier probabilities - bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to - proposal boxes - """ - # ROI Pooling - # Shape: [batch, num_boxes, pool_height, pool_width, channels] - x = PyramidROIAlign([pool_size, pool_size], image_shape, - name="roi_align_classifier")([rois] + feature_maps) - # Two 1024 FC layers (implemented with Conv2D for consistency) - x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size), padding="valid"), - name="mrcnn_class_conv1")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), name='mrcnn_class_bn1')(x) - x = KL.Activation('relu')(x) - x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), - name="mrcnn_class_conv2")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), - name='mrcnn_class_bn2')(x) - x = KL.Activation('relu')(x) - - shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), - name="pool_squeeze")(x) - - # Classifier head - mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes), - name='mrcnn_class_logits')(shared) - mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), - name="mrcnn_class")(mrcnn_class_logits) - - # BBox head - # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] - x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'), - name='mrcnn_bbox_fc')(shared) - # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] - s = K.int_shape(x) - mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) - - return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox - - -def build_fpn_mask_graph(rois, feature_maps, - image_shape, pool_size, num_classes): - """Builds the computation graph of the mask head of Feature Pyramid Network. - - rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized - coordinates. - feature_maps: List of feature maps from diffent layers of the pyramid, - [P2, P3, P4, P5]. Each has a different resolution. - image_shape: [height, width, depth] - pool_size: The width of the square feature map generated from ROI Pooling. - num_classes: number of classes, which determines the depth of the results - - Returns: Masks [batch, roi_count, height, width, num_classes] - """ - # ROI Pooling - # Shape: [batch, boxes, pool_height, pool_width, channels] - x = PyramidROIAlign([pool_size, pool_size], image_shape, - name="roi_align_mask")([rois] + feature_maps) - - # Conv layers - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv1")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), - name='mrcnn_mask_bn1')(x) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv2")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), - name='mrcnn_mask_bn2')(x) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv3")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), - name='mrcnn_mask_bn3')(x) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv4")(x) - x = KL.TimeDistributed(BatchNorm(axis=3), - name='mrcnn_mask_bn4')(x) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), - name="mrcnn_mask_deconv")(x) - x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), - name="mrcnn_mask")(x) - return x - - -############################################################ -# Loss Functions -############################################################ - -def smooth_l1_loss(y_true, y_pred): - """Implements Smooth-L1 loss. - y_true and y_pred are typicallly: [N, 4], but could be any shape. - """ - diff = K.abs(y_true - y_pred) - less_than_one = K.cast(K.less(diff, 1.0), "float32") - loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) - return loss - - -def rpn_class_loss_graph(rpn_match, rpn_class_logits): - """RPN anchor classifier loss. - - rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, - -1=negative, 0=neutral anchor. - rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG. - """ - # Squeeze last dim to simplify - rpn_match = tf.squeeze(rpn_match, -1) - # Get anchor classes. Convert the -1/+1 match to 0/1 values. - anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) - # Positive and Negative anchors contribute to the loss, - # but neutral anchors (match value = 0) don't. - indices = tf.where(K.not_equal(rpn_match, 0)) - # Pick rows that contribute to the loss and filter out the rest. - rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) - anchor_class = tf.gather_nd(anchor_class, indices) - # Crossentropy loss - loss = K.sparse_categorical_crossentropy(target=anchor_class, - output=rpn_class_logits, - from_logits=True) - loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) - return loss - - -def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): - """Return the RPN bounding box loss graph. - - config: the model config object. - target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. - Uses 0 padding to fill in unsed bbox deltas. - rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, - -1=negative, 0=neutral anchor. - rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] - """ - # Positive anchors contribute to the loss, but negative and - # neutral anchors (match value of 0 or -1) don't. - rpn_match = K.squeeze(rpn_match, -1) - indices = tf.where(K.equal(rpn_match, 1)) - - # Pick bbox deltas that contribute to the loss - rpn_bbox = tf.gather_nd(rpn_bbox, indices) - - # Trim target bounding box deltas to the same length as rpn_bbox. - batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) - target_bbox = batch_pack_graph(target_bbox, batch_counts, - config.IMAGES_PER_GPU) - - # TODO: use smooth_l1_loss() rather than reimplementing here - # to reduce code duplication - diff = K.abs(target_bbox - rpn_bbox) - less_than_one = K.cast(K.less(diff, 1.0), "float32") - loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) - - loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) - return loss - - -def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, - active_class_ids): - """Loss for the classifier head of Mask RCNN. - - target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero - padding to fill in the array. - pred_class_logits: [batch, num_rois, num_classes] - active_class_ids: [batch, num_classes]. Has a value of 1 for - classes that are in the dataset of the image, and 0 - for classes that are not in the dataset. - """ - target_class_ids = tf.cast(target_class_ids, 'int64') - - # Find predictions of classes that are not in the dataset. - pred_class_ids = tf.argmax(pred_class_logits, axis=2) - # TODO: Update this line to work with batch > 1. Right now it assumes all - # images in a batch have the same active_class_ids - pred_active = tf.gather(active_class_ids[0], pred_class_ids) - - # Loss - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target_class_ids, logits=pred_class_logits) - - # Erase losses of predictions of classes that are not in the active - # classes of the image. - loss = loss * pred_active - - # Computer loss mean. Use only predictions that contribute - # to the loss to get a correct mean. - loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) - return loss - - -def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): - """Loss for Mask R-CNN bounding box refinement. - - target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] - target_class_ids: [batch, num_rois]. Integer class IDs. - pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] - """ - # Reshape to merge batch and roi dimensions for simplicity. - target_class_ids = K.reshape(target_class_ids, (-1,)) - target_bbox = K.reshape(target_bbox, (-1, 4)) - pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) - - # Only positive ROIs contribute to the loss. And only - # the right class_id of each ROI. Get their indicies. - positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] - positive_roi_class_ids = tf.cast( - tf.gather(target_class_ids, positive_roi_ix), tf.int64) - indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) - - # Gather the deltas (predicted and true) that contribute to loss - target_bbox = tf.gather(target_bbox, positive_roi_ix) - pred_bbox = tf.gather_nd(pred_bbox, indices) - - # Smooth-L1 Loss - loss = K.switch(tf.size(target_bbox) > 0, - smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), - tf.constant(0.0)) - loss = K.mean(loss) - loss = K.reshape(loss, [1, 1]) - return loss - - -def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): - """Mask binary cross-entropy loss for the masks head. - - target_masks: [batch, num_rois, height, width]. - A float32 tensor of values 0 or 1. Uses zero padding to fill array. - target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. - pred_masks: [batch, proposals, height, width, num_classes] float32 tensor - with values from 0 to 1. - """ - # Reshape for simplicity. Merge first two dimensions into one. - target_class_ids = K.reshape(target_class_ids, (-1,)) - mask_shape = tf.shape(target_masks) - target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) - pred_shape = tf.shape(pred_masks) - pred_masks = K.reshape(pred_masks, - (-1, pred_shape[2], pred_shape[3], pred_shape[4])) - # Permute predicted masks to [N, num_classes, height, width] - pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) - - # Only positive ROIs contribute to the loss. And only - # the class specific mask of each ROI. - positive_ix = tf.where(target_class_ids > 0)[:, 0] - positive_class_ids = tf.cast( - tf.gather(target_class_ids, positive_ix), tf.int64) - indices = tf.stack([positive_ix, positive_class_ids], axis=1) - - # Gather the masks (predicted and true) that contribute to loss - y_true = tf.gather(target_masks, positive_ix) - y_pred = tf.gather_nd(pred_masks, indices) - - # Compute binary cross entropy. If no positive ROIs, then return 0. - # shape: [batch, roi, num_classes] - loss = K.switch(tf.size(y_true) > 0, - K.binary_crossentropy(target=y_true, output=y_pred), - tf.constant(0.0)) - loss = K.mean(loss) - loss = K.reshape(loss, [1, 1]) - return loss - - -############################################################ -# Data Generator -############################################################ - -def load_image_gt(dataset, config, image_id, augment=False, - use_mini_mask=False): - """Load and return ground truth data for an image (image, mask, bounding boxes). - - augment: If true, apply random image augmentation. Currently, only - horizontal flipping is offered. - use_mini_mask: If False, returns full-size masks that are the same height - and width as the original image. These can be big, for example - 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, - 224x224 and are generated by extracting the bounding box of the - object and resizing it to MINI_MASK_SHAPE. - - Returns: - image: [height, width, 3] - shape: the original shape of the image before resizing and cropping. - class_ids: [instance_count] Integer class IDs - bbox: [instance_count, (y1, x1, y2, x2)] - mask: [height, width, instance_count]. The height and width are those - of the image unless use_mini_mask is True, in which case they are - defined in MINI_MASK_SHAPE. - """ - # Load image and mask - image = dataset.load_image(image_id) - mask, class_ids = dataset.load_mask(image_id) - shape = image.shape - image, window, scale, padding = utils.resize_image( - image, - min_dim=config.IMAGE_MIN_DIM, - max_dim=config.IMAGE_MAX_DIM, - padding=config.IMAGE_PADDING) - mask = utils.resize_mask(mask, scale, padding) - - # Random horizontal flips. - if augment: - if random.randint(0, 1): - image = np.fliplr(image) - mask = np.fliplr(mask) - - # Bounding boxes. Note that some boxes might be all zeros - # if the corresponding mask got cropped out. - # bbox: [num_instances, (y1, x1, y2, x2)] - bbox = utils.extract_bboxes(mask) - - # Active classes - # Different datasets have different classes, so track the - # classes supported in the dataset of this image. - active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) - source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] - active_class_ids[source_class_ids] = 1 - - # Resize masks to smaller size to reduce memory usage - if use_mini_mask: - mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) - - # Image meta data - image_meta = compose_image_meta(image_id, shape, window, active_class_ids) - - return image, image_meta, class_ids, bbox, mask - - -def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config): - """Generate targets for training Stage 2 classifier and mask heads. - This is not used in normal training. It's useful for debugging or to train - the Mask RCNN heads without using the RPN head. - - Inputs: - rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. - gt_class_ids: [instance count] Integer class IDs - gt_boxes: [instance count, (y1, x1, y2, x2)] - gt_masks: [height, width, instance count] Grund truth masks. Can be full - size or mini-masks. - - Returns: - rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] - class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. - bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific - bbox refinements. - masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped - to bbox boundaries and resized to neural network output size. - """ - assert rpn_rois.shape[0] > 0 - assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( - gt_class_ids.dtype) - assert gt_boxes.dtype == np.int32, "Expected int but got {}".format( - gt_boxes.dtype) - assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format( - gt_masks.dtype) - - # It's common to add GT Boxes to ROIs but we don't do that here because - # according to XinLei Chen's paper, it doesn't help. - - # Trim empty padding in gt_boxes and gt_masks parts - instance_ids = np.where(gt_class_ids > 0)[0] - assert instance_ids.shape[0] > 0, "Image must contain instances." - gt_class_ids = gt_class_ids[instance_ids] - gt_boxes = gt_boxes[instance_ids] - gt_masks = gt_masks[:, :, instance_ids] - - # Compute areas of ROIs and ground truth boxes. - rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \ - (rpn_rois[:, 3] - rpn_rois[:, 1]) - gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \ - (gt_boxes[:, 3] - gt_boxes[:, 1]) - - # Compute overlaps [rpn_rois, gt_boxes] - overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0])) - for i in range(overlaps.shape[1]): - gt = gt_boxes[i] - overlaps[:, i] = utils.compute_iou( - gt, rpn_rois, gt_box_area[i], rpn_roi_area) - - # Assign ROIs to GT boxes - rpn_roi_iou_argmax = np.argmax(overlaps, axis=1) - rpn_roi_iou_max = overlaps[np.arange( - overlaps.shape[0]), rpn_roi_iou_argmax] - # GT box assigned to each ROI - rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax] - rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax] - - # Positive ROIs are those with >= 0.5 IoU with a GT box. - fg_ids = np.where(rpn_roi_iou_max > 0.5)[0] - - # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining) - # TODO: To hard example mine or not to hard example mine, that's the question -# bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0] - bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] - - # Subsample ROIs. Aim for 33% foreground. - # FG - fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) - if fg_ids.shape[0] > fg_roi_count: - keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False) - else: - keep_fg_ids = fg_ids - # BG - remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0] - if bg_ids.shape[0] > remaining: - keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) - else: - keep_bg_ids = bg_ids - # Combine indicies of ROIs to keep - keep = np.concatenate([keep_fg_ids, keep_bg_ids]) - # Need more? - remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0] - if remaining > 0: - # Looks like we don't have enough samples to maintain the desired - # balance. Reduce requirements and fill in the rest. This is - # likely different from the Mask RCNN paper. - - # There is a small chance we have neither fg nor bg samples. - if keep.shape[0] == 0: - # Pick bg regions with easier IoU threshold - bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] - assert bg_ids.shape[0] >= remaining - keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) - assert keep_bg_ids.shape[0] == remaining - keep = np.concatenate([keep, keep_bg_ids]) - else: - # Fill the rest with repeated bg rois. - keep_extra_ids = np.random.choice( - keep_bg_ids, remaining, replace=True) - keep = np.concatenate([keep, keep_extra_ids]) - assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \ - "keep doesn't match ROI batch size {}, {}".format( - keep.shape[0], config.TRAIN_ROIS_PER_IMAGE) - - # Reset the gt boxes assigned to BG ROIs. - rpn_roi_gt_boxes[keep_bg_ids, :] = 0 - rpn_roi_gt_class_ids[keep_bg_ids] = 0 - - # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement. - rois = rpn_rois[keep] - roi_gt_boxes = rpn_roi_gt_boxes[keep] - roi_gt_class_ids = rpn_roi_gt_class_ids[keep] - roi_gt_assignment = rpn_roi_iou_argmax[keep] - - # Class-aware bbox deltas. [y, x, log(h), log(w)] - bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE, - config.NUM_CLASSES, 4), dtype=np.float32) - pos_ids = np.where(roi_gt_class_ids > 0)[0] - bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement( - rois[pos_ids], roi_gt_boxes[pos_ids, :4]) - # Normalize bbox refinements - bboxes /= config.BBOX_STD_DEV - - # Generate class-specific target masks. - masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES), - dtype=np.float32) - for i in pos_ids: - class_id = roi_gt_class_ids[i] - assert class_id > 0, "class id must be greater than 0" - gt_id = roi_gt_assignment[i] - class_mask = gt_masks[:, :, gt_id] - - if config.USE_MINI_MASK: - # Create a mask placeholder, the size of the image - placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool) - # GT box - gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id] - gt_w = gt_x2 - gt_x1 - gt_h = gt_y2 - gt_y1 - # Resize mini mask to size of GT box - placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \ - np.round(scipy.misc.imresize(class_mask.astype(float), (gt_h, gt_w), - interp='nearest') / 255.0).astype(bool) - # Place the mini batch in the placeholder - class_mask = placeholder - - # Pick part of the mask and resize it - y1, x1, y2, x2 = rois[i].astype(np.int32) - m = class_mask[y1:y2, x1:x2] - mask = scipy.misc.imresize( - m.astype(float), config.MASK_SHAPE, interp='nearest') / 255.0 - masks[i, :, :, class_id] = mask - - return rois, roi_gt_class_ids, bboxes, masks - - -def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): - """Given the anchors and GT boxes, compute overlaps and identify positive - anchors and deltas to refine them to match their corresponding GT boxes. - - anchors: [num_anchors, (y1, x1, y2, x2)] - gt_class_ids: [num_gt_boxes] Integer class IDs. - gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] - - Returns: - rpn_match: [N] (int32) matches between anchors and GT boxes. - 1 = positive anchor, -1 = negative anchor, 0 = neutral - rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - """ - # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral - rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) - # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] - rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) - - # Handle COCO crowds - # A crowd box in COCO is a bounding box around several instances. Exclude - # them from training. A crowd box is given a negative class ID. - crowd_ix = np.where(gt_class_ids < 0)[0] - if crowd_ix.shape[0] > 0: - # Filter out crowds from ground truth class IDs and boxes - non_crowd_ix = np.where(gt_class_ids > 0)[0] - crowd_boxes = gt_boxes[crowd_ix] - gt_class_ids = gt_class_ids[non_crowd_ix] - gt_boxes = gt_boxes[non_crowd_ix] - # Compute overlaps with crowd boxes [anchors, crowds] - crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) - crowd_iou_max = np.amax(crowd_overlaps, axis=1) - no_crowd_bool = (crowd_iou_max < 0.001) - else: - # All anchors don't intersect a crowd - no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) - - # Compute overlaps [num_anchors, num_gt_boxes] - overlaps = utils.compute_overlaps(anchors, gt_boxes) - - # Match anchors to GT Boxes - # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive. - # If an anchor overlaps a GT box with IoU < 0.3 then it's negative. - # Neutral anchors are those that don't match the conditions above, - # and they don't influence the loss function. - # However, don't keep any GT box unmatched (rare, but happens). Instead, - # match it to the closest anchor (even if its max IoU is < 0.3). - # - # 1. Set negative anchors first. They get overwritten below if a GT box is - # matched to them. Skip boxes in crowd areas. - anchor_iou_argmax = np.argmax(overlaps, axis=1) - anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] - rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 - # 2. Set an anchor for each GT box (regardless of IoU value). - # TODO: If multiple anchors have the same IoU match all of them - gt_iou_argmax = np.argmax(overlaps, axis=0) - rpn_match[gt_iou_argmax] = 1 - # 3. Set anchors with high overlap as positive. - rpn_match[anchor_iou_max >= 0.7] = 1 - - # Subsample to balance positive and negative anchors - # Don't let positives be more than half the anchors - ids = np.where(rpn_match == 1)[0] - extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) - if extra > 0: - # Reset the extra ones to neutral - ids = np.random.choice(ids, extra, replace=False) - rpn_match[ids] = 0 - # Same for negative proposals - ids = np.where(rpn_match == -1)[0] - extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - - np.sum(rpn_match == 1)) - if extra > 0: - # Rest the extra ones to neutral - ids = np.random.choice(ids, extra, replace=False) - rpn_match[ids] = 0 - - # For positive anchors, compute shift and scale needed to transform them - # to match the corresponding GT boxes. - ids = np.where(rpn_match == 1)[0] - ix = 0 # index into rpn_bbox - # TODO: use box_refinement() rather than duplicating the code here - for i, a in zip(ids, anchors[ids]): - # Closest gt box (it might have IoU < 0.7) - gt = gt_boxes[anchor_iou_argmax[i]] - - # Convert coordinates to center plus width/height. - # GT Box - gt_h = gt[2] - gt[0] - gt_w = gt[3] - gt[1] - gt_center_y = gt[0] + 0.5 * gt_h - gt_center_x = gt[1] + 0.5 * gt_w - # Anchor - a_h = a[2] - a[0] - a_w = a[3] - a[1] - a_center_y = a[0] + 0.5 * a_h - a_center_x = a[1] + 0.5 * a_w - - # Compute the bbox refinement that the RPN should predict. - rpn_bbox[ix] = [ - (gt_center_y - a_center_y) / a_h, - (gt_center_x - a_center_x) / a_w, - np.log(gt_h / a_h), - np.log(gt_w / a_w), - ] - # Normalize - rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV - ix += 1 - - return rpn_match, rpn_bbox - - -def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes): - """Generates ROI proposals similar to what a region proposal network - would generate. - - image_shape: [Height, Width, Depth] - count: Number of ROIs to generate - gt_class_ids: [N] Integer ground truth class IDs - gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels. - - Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels. - """ - # placeholder - rois = np.zeros((count, 4), dtype=np.int32) - - # Generate random ROIs around GT boxes (90% of count) - rois_per_box = int(0.9 * count / gt_boxes.shape[0]) - for i in range(gt_boxes.shape[0]): - gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i] - h = gt_y2 - gt_y1 - w = gt_x2 - gt_x1 - # random boundaries - r_y1 = max(gt_y1 - h, 0) - r_y2 = min(gt_y2 + h, image_shape[0]) - r_x1 = max(gt_x1 - w, 0) - r_x2 = min(gt_x2 + w, image_shape[1]) - - # To avoid generating boxes with zero area, we generate double what - # we need and filter out the extra. If we get fewer valid boxes - # than we need, we loop and try again. - while True: - y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2)) - x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2)) - # Filter out zero area boxes - threshold = 1 - y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= - threshold][:rois_per_box] - x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= - threshold][:rois_per_box] - if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box: - break - - # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape - # into x1, y1, x2, y2 order - x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) - y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) - box_rois = np.hstack([y1, x1, y2, x2]) - rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois - - # Generate random ROIs anywhere in the image (10% of count) - remaining_count = count - (rois_per_box * gt_boxes.shape[0]) - # To avoid generating boxes with zero area, we generate double what - # we need and filter out the extra. If we get fewer valid boxes - # than we need, we loop and try again. - while True: - y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2)) - x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2)) - # Filter out zero area boxes - threshold = 1 - y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= - threshold][:remaining_count] - x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= - threshold][:remaining_count] - if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count: - break - - # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape - # into x1, y1, x2, y2 order - x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) - y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) - global_rois = np.hstack([y1, x1, y2, x2]) - rois[-remaining_count:] = global_rois - return rois - - -def data_generator(dataset, config, shuffle=True, augment=True, random_rois=0, - batch_size=1, detection_targets=False): - """A generator that returns images and corresponding target class ids, - bounding box deltas, and masks. - - dataset: The Dataset object to pick data from - config: The model config object - shuffle: If True, shuffles the samples before every epoch - augment: If True, applies image augmentation to images (currently only - horizontal flips are supported) - random_rois: If > 0 then generate proposals to be used to train the - network classifier and mask heads. Useful if training - the Mask RCNN part without the RPN. - batch_size: How many images to return in each call - detection_targets: If True, generate detection targets (class IDs, bbox - deltas, and masks). Typically for debugging or visualizations because - in trainig detection targets are generated by DetectionTargetLayer. - - Returns a Python generator. Upon calling next() on it, the - generator returns two lists, inputs and outputs. The containtes - of the lists differs depending on the received arguments: - inputs list: - - images: [batch, H, W, C] - - image_meta: [batch, size of image meta] - - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width - are those of the image unless use_mini_mask is True, in which - case they are defined in MINI_MASK_SHAPE. - - outputs list: Usually empty in regular training. But if detection_targets - is True then the outputs list contains target class_ids, bbox deltas, - and masks. - """ - b = 0 # batch item index - image_index = -1 - image_ids = np.copy(dataset.image_ids) - error_count = 0 - - # Anchors - # [anchor_count, (y1, x1, y2, x2)] - anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, - config.RPN_ANCHOR_RATIOS, - config.BACKBONE_SHAPES, - config.BACKBONE_STRIDES, - config.RPN_ANCHOR_STRIDE) - - # Keras requires a generator to run indefinately. - while True: - try: - # Increment index to pick next image. Shuffle if at the start of an epoch. - image_index = (image_index + 1) % len(image_ids) - if shuffle and image_index == 0: - np.random.shuffle(image_ids) - - # Get GT bounding boxes and masks for image. - image_id = image_ids[image_index] - image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ - load_image_gt(dataset, config, image_id, augment=augment, - use_mini_mask=config.USE_MINI_MASK) - - # Skip images that have no instances. This can happen in cases - # where we train on a subset of classes and the image doesn't - # have any of the classes we care about. - if not np.any(gt_class_ids > 0): - continue - - # RPN Targets - rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, - gt_class_ids, gt_boxes, config) - - # Mask R-CNN Targets - if random_rois: - rpn_rois = generate_random_rois( - image.shape, random_rois, gt_class_ids, gt_boxes) - if detection_targets: - rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\ - build_detection_targets( - rpn_rois, gt_class_ids, gt_boxes, gt_masks, config) - - # Init batch arrays - if b == 0: - batch_image_meta = np.zeros( - (batch_size,) + image_meta.shape, dtype=image_meta.dtype) - batch_rpn_match = np.zeros( - [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) - batch_rpn_bbox = np.zeros( - [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) - batch_images = np.zeros( - (batch_size,) + image.shape, dtype=np.float32) - batch_gt_class_ids = np.zeros( - (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) - batch_gt_boxes = np.zeros( - (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) - if config.USE_MINI_MASK: - batch_gt_masks = np.zeros((batch_size, config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1], - config.MAX_GT_INSTANCES)) - else: - batch_gt_masks = np.zeros( - (batch_size, image.shape[0], image.shape[1], config.MAX_GT_INSTANCES)) - if random_rois: - batch_rpn_rois = np.zeros( - (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype) - if detection_targets: - batch_rois = np.zeros( - (batch_size,) + rois.shape, dtype=rois.dtype) - batch_mrcnn_class_ids = np.zeros( - (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype) - batch_mrcnn_bbox = np.zeros( - (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype) - batch_mrcnn_mask = np.zeros( - (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype) - - # If more instances than fits in the array, sub-sample from them. - if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: - ids = np.random.choice( - np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False) - gt_class_ids = gt_class_ids[ids] - gt_boxes = gt_boxes[ids] - gt_masks = gt_masks[:, :, ids] - - # Add to batch - batch_image_meta[b] = image_meta - batch_rpn_match[b] = rpn_match[:, np.newaxis] - batch_rpn_bbox[b] = rpn_bbox - batch_images[b] = mold_image(image.astype(np.float32), config) - batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids - batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes - batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks - if random_rois: - batch_rpn_rois[b] = rpn_rois - if detection_targets: - batch_rois[b] = rois - batch_mrcnn_class_ids[b] = mrcnn_class_ids - batch_mrcnn_bbox[b] = mrcnn_bbox - batch_mrcnn_mask[b] = mrcnn_mask - b += 1 - - # Batch full? - if b >= batch_size: - inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, - batch_gt_class_ids, batch_gt_boxes, batch_gt_masks] - outputs = [] - - if random_rois: - inputs.extend([batch_rpn_rois]) - if detection_targets: - inputs.extend([batch_rois]) - # Keras requires that output and targets have the same number of dimensions - batch_mrcnn_class_ids = np.expand_dims( - batch_mrcnn_class_ids, -1) - outputs.extend( - [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask]) - - yield inputs, outputs - - # start a new batch - b = 0 - except (GeneratorExit, KeyboardInterrupt): - raise - except: - # Log it and skip the image - logging.exception("Error processing image {}".format( - dataset.image_info[image_id])) - error_count += 1 - if error_count > 5: - raise - - -############################################################ -# MaskRCNN Class -############################################################ - -class MaskRCNN(): - """Encapsulates the Mask RCNN model functionality. - - The actual Keras model is in the keras_model property. - """ - - def __init__(self, mode, config, model_dir): - """ - mode: Either "training" or "inference" - config: A Sub-class of the Config class - model_dir: Directory to save training logs and trained weights - """ - assert mode in ['training', 'inference'] - session_config = tf.ConfigProto(intra_op_parallelism_threads=config.NUM_INTRA, - inter_op_parallelism_threads=config.NUM_INTER) - session = tf.Session(config=session_config) - K.set_session(session) - K.set_image_data_format('channels_last') - - self.mode = mode - self.config = config - self.model_dir = model_dir - self.set_log_dir() - self.keras_model = self.build(mode=mode, config=config) - - def build(self, mode, config): - """Build Mask R-CNN architecture. - input_shape: The shape of the input image. - mode: Either "training" or "inference". The inputs and - outputs of the model differ accordingly. - """ - assert mode in ['training', 'inference'] - - # Image size must be dividable by 2 multiple times - h, w = config.IMAGE_SHAPE[:2] - if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): - raise Exception("Image size must be dividable by 2 at least 6 times " - "to avoid fractions when downscaling and upscaling." - "For example, use 256, 320, 384, 448, 512, ... etc. ") - - # Inputs - input_image = KL.Input( - shape=config.IMAGE_SHAPE.tolist(), name="input_image") - input_image_meta = KL.Input(shape=[None], name="input_image_meta") - if mode == "training": - # RPN GT - input_rpn_match = KL.Input( - shape=[None, 1], name="input_rpn_match", dtype=tf.int32) - input_rpn_bbox = KL.Input( - shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) - - # Detection GT (class IDs, bounding boxes, and masks) - # 1. GT Class IDs (zero padded) - input_gt_class_ids = KL.Input( - shape=[None], name="input_gt_class_ids", dtype=tf.int32) - # 2. GT Boxes in pixels (zero padded) - # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates - input_gt_boxes = KL.Input( - shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) - # Normalize coordinates - h, w = K.shape(input_image)[1], K.shape(input_image)[2] - image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) - gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes) - # 3. GT Masks (zero padded) - # [batch, height, width, MAX_GT_INSTANCES] - if config.USE_MINI_MASK: - input_gt_masks = KL.Input( - shape=[config.MINI_MASK_SHAPE[0], - config.MINI_MASK_SHAPE[1], None], - name="input_gt_masks", dtype=bool) - else: - input_gt_masks = KL.Input( - shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], - name="input_gt_masks", dtype=bool) - - # Build the shared convolutional layers. - # Bottom-up Layers - # Returns a list of the last layers of each stage, 5 in total. - # Don't create the thead (stage 5), so we pick the 4th item in the list. - _, C2, C3, C4, C5 = resnet_graph(input_image, "resnet101", stage5=True) - # Top-down Layers - # TODO: add assert to varify feature map sizes match what's in config - P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) - P4 = KL.Add(name="fpn_p4add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), - KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)]) - P3 = KL.Add(name="fpn_p3add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), - KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)]) - P2 = KL.Add(name="fpn_p2add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), - KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)]) - # Attach 3x3 conv to all P layers to get the final feature maps. - P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) - P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) - P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) - P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) - # P6 is used for the 5th anchor scale in RPN. Generated by - # subsampling from P5 with stride of 2. - P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) - - # Note that P6 is used in RPN, but not in the classifier heads. - rpn_feature_maps = [P2, P3, P4, P5, P6] - mrcnn_feature_maps = [P2, P3, P4, P5] - - # Generate Anchors - self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, - config.RPN_ANCHOR_RATIOS, - config.BACKBONE_SHAPES, - config.BACKBONE_STRIDES, - config.RPN_ANCHOR_STRIDE) - - # RPN Model - rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, - len(config.RPN_ANCHOR_RATIOS), 256) - # Loop through pyramid layers - layer_outputs = [] # list of lists - for p in rpn_feature_maps: - layer_outputs.append(rpn([p])) - # Concatenate layer outputs - # Convert from list of lists of level outputs to list of lists - # of outputs across levels. - # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] - output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] - outputs = list(zip(*layer_outputs)) - outputs = [KL.Concatenate(axis=1, name=n)(list(o)) - for o, n in zip(outputs, output_names)] - - rpn_class_logits, rpn_class, rpn_bbox = outputs - - # Generate proposals - # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates - # and zero padded. - proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\ - else config.POST_NMS_ROIS_INFERENCE - rpn_rois = ProposalLayer(proposal_count=proposal_count, - nms_threshold=config.RPN_NMS_THRESHOLD, - name="ROI", - anchors=self.anchors, - config=config)([rpn_class, rpn_bbox]) - - if mode == "training": - # Class ID mask to mark class IDs supported by the dataset the image - # came from. - _, _, _, active_class_ids = KL.Lambda(lambda x: parse_image_meta_graph(x), - mask=[None, None, None, None])(input_image_meta) - - if not config.USE_RPN_ROIS: - # Ignore predicted ROIs and use ROIs provided as an input. - input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], - name="input_roi", dtype=np.int32) - # Normalize coordinates to 0-1 range. - target_rois = KL.Lambda(lambda x: K.cast( - x, tf.float32) / image_scale[:4])(input_rois) - else: - target_rois = rpn_rois - - # Generate detection targets - # Subsamples proposals and generates target outputs for training - # Note that proposal class IDs, gt_boxes, and gt_masks are zero - # padded. Equally, returned rois and targets are zero padded. - rois, target_class_ids, target_bbox, target_mask =\ - DetectionTargetLayer(config, name="proposal_targets")([ - target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) - - # Network Heads - # TODO: verify that this handles zero padded ROIs - mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ - fpn_classifier_graph(rois, mrcnn_feature_maps, config.IMAGE_SHAPE, - config.POOL_SIZE, config.NUM_CLASSES) - - mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, - config.IMAGE_SHAPE, - config.MASK_POOL_SIZE, - config.NUM_CLASSES) - - # TODO: clean up (use tf.identify if necessary) - output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) - - # Losses - rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( - [input_rpn_match, rpn_class_logits]) - rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( - [input_rpn_bbox, input_rpn_match, rpn_bbox]) - class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( - [target_class_ids, mrcnn_class_logits, active_class_ids]) - bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( - [target_bbox, target_class_ids, mrcnn_bbox]) - mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( - [target_mask, target_class_ids, mrcnn_mask]) - - # Model - inputs = [input_image, input_image_meta, - input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks] - if not config.USE_RPN_ROIS: - inputs.append(input_rois) - outputs = [rpn_class_logits, rpn_class, rpn_bbox, - mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, - rpn_rois, output_rois, - rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss] - model = KM.Model(inputs, outputs, name='mask_rcnn') - else: - # Network Heads - # Proposal classifier and BBox regressor heads - mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ - fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, config.IMAGE_SHAPE, - config.POOL_SIZE, config.NUM_CLASSES) - - # Detections - # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates - detections = DetectionLayer(config, name="mrcnn_detection")( - [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) - - # Convert boxes to normalized coordinates - # TODO: let DetectionLayer return normalized coordinates to avoid - # unnecessary conversions - h, w = config.IMAGE_SHAPE[:2] - detection_boxes = KL.Lambda( - lambda x: x[..., :4] / np.array([h, w, h, w]))(detections) - - # Create masks for detections - mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, - config.IMAGE_SHAPE, - config.MASK_POOL_SIZE, - config.NUM_CLASSES) - - model = KM.Model([input_image, input_image_meta], - [detections, mrcnn_class, mrcnn_bbox, - mrcnn_mask, rpn_rois, rpn_class, rpn_bbox], - name='mask_rcnn') - - # Add multi-GPU support. - if config.GPU_COUNT > 1: - from parallel_model import ParallelModel - model = ParallelModel(model, config.GPU_COUNT) - - return model - - def find_last(self): - """Finds the last checkpoint file of the last trained model in the - model directory. - Returns: - log_dir: The directory where events and weights are saved - checkpoint_path: the path to the last checkpoint file - """ - # Get directory names. Each directory corresponds to a model - dir_names = next(os.walk(self.model_dir))[1] - key = self.config.NAME.lower() - dir_names = filter(lambda f: f.startswith(key), dir_names) - dir_names = sorted(dir_names) - if not dir_names: - return None, None - # Pick last directory - dir_name = os.path.join(self.model_dir, dir_names[-1]) - # Find the last checkpoint - checkpoints = next(os.walk(dir_name))[2] - checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints) - checkpoints = sorted(checkpoints) - if not checkpoints: - return dir_name, None - checkpoint = os.path.join(dir_name, checkpoints[-1]) - return dir_name, checkpoint - - def load_weights(self, filepath, by_name=False, exclude=None): - """Modified version of the correspoding Keras function with - the addition of multi-GPU support and the ability to exclude - some layers from loading. - exlude: list of layer names to excluce - """ - import h5py - try: - from keras.engine import saving - except ImportError: - # Keras before 2.2 used the 'topology' namespace. - from keras.engine import topology as saving - - if exclude: - by_name = True - - if h5py is None: - raise ImportError('`load_weights` requires h5py.') - f = h5py.File(filepath, mode='r') - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - - # In multi-GPU training, we wrap the model. Get layers - # of the inner model because they have the weights. - keras_model = self.keras_model - layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ - else keras_model.layers - - # Exclude some layers - if exclude: - layers = filter(lambda l: l.name not in exclude, layers) - - if by_name: - saving.load_weights_from_hdf5_group_by_name(f, layers) - else: - saving.load_weights_from_hdf5_group(f, layers) - if hasattr(f, 'close'): - f.close() - - # Update the log directory - self.set_log_dir(filepath) - - def get_imagenet_weights(self): - """Downloads ImageNet trained weights from Keras. - Returns path to weights file. - """ - from keras.utils.data_utils import get_file - TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\ - 'releases/download/v0.2/'\ - 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='a268eb855778b3df3c7506639542a6af') - return weights_path - - def compile(self, learning_rate, momentum): - """Gets the model ready for training. Adds losses, regularization, and - metrics. Then calls the Keras compile() function. - """ - # Optimizer object - optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=momentum, - clipnorm=5.0) - # Add Losses - # First, clear previously set losses to avoid duplication - self.keras_model._losses = [] - self.keras_model._per_input_losses = {} - loss_names = ["rpn_class_loss", "rpn_bbox_loss", - "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"] - for name in loss_names: - layer = self.keras_model.get_layer(name) - if layer.output in self.keras_model.losses: - continue - self.keras_model.add_loss( - tf.reduce_mean(layer.output, keep_dims=True)) - - # Add L2 Regularization - # Skip gamma and beta weights of batch normalization layers. - reg_losses = [keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32) - for w in self.keras_model.trainable_weights - if 'gamma' not in w.name and 'beta' not in w.name] - self.keras_model.add_loss(tf.add_n(reg_losses)) - - # Compile - self.keras_model.compile(optimizer=optimizer, loss=[ - None] * len(self.keras_model.outputs)) - - # Add metrics for losses - for name in loss_names: - if name in self.keras_model.metrics_names: - continue - layer = self.keras_model.get_layer(name) - self.keras_model.metrics_names.append(name) - self.keras_model.metrics_tensors.append(tf.reduce_mean( - layer.output, keep_dims=True)) - - def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): - """Sets model layers as trainable if their names match - the given regular expression. - """ - # Print message on the first call (but not on recursive calls) - if verbose > 0 and keras_model is None: - log("Selecting layers to train") - - keras_model = keras_model or self.keras_model - - # In multi-GPU training, we wrap the model. Get layers - # of the inner model because they have the weights. - layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ - else keras_model.layers - - for layer in layers: - # Is the layer a model? - if layer.__class__.__name__ == 'Model': - print("In model: ", layer.name) - self.set_trainable( - layer_regex, keras_model=layer, indent=indent + 4) - continue - - if not layer.weights: - continue - # Is it trainable? - trainable = bool(re.fullmatch(layer_regex, layer.name)) - # Update layer. If layer is a container, update inner layer. - if layer.__class__.__name__ == 'TimeDistributed': - layer.layer.trainable = trainable - else: - layer.trainable = trainable - # Print trainble layer names - if trainable and verbose > 0: - log("{}{:20} ({})".format(" " * indent, layer.name, - layer.__class__.__name__)) - - def set_log_dir(self, model_path=None): - """Sets the model log directory and epoch counter. - - model_path: If None, or a format different from what this code uses - then set a new log directory and start epochs from 0. Otherwise, - extract the log directory and the epoch counter from the file - name. - """ - # Set date and epoch counter as if starting a new model - self.epoch = 0 - now = datetime.datetime.now() - - # If we have a model path with date and epochs use them - if model_path: - # Continue from we left of. Get epoch and date from the file name - # A sample model path might look like: - # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 - regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_\w+(\d{4})\.h5" - m = re.match(regex, model_path) - if m: - now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), - int(m.group(4)), int(m.group(5))) - self.epoch = int(m.group(6)) + 1 - - # Directory for training logs - self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format( - self.config.NAME.lower(), now)) - - # Path to save after each epoch. Include placeholders that get filled by Keras. - self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format( - self.config.NAME.lower())) - self.checkpoint_path = self.checkpoint_path.replace( - "*epoch*", "{epoch:04d}") - - def train(self, train_dataset, val_dataset, learning_rate, epochs, layers, warmup): - """Train the model. - train_dataset, val_dataset: Training and validation Dataset objects. - learning_rate: The learning rate to train with - epochs: Number of training epochs. Note that previous training epochs - are considered to be done alreay, so this actually determines - the epochs to train in total rather than in this particaular - call. - layers: Allows selecting wich layers to train. It can be: - - A regular expression to match layer names to train - - One of these predefined values: - heaads: The RPN, classifier and mask heads of the network - all: All the layers - 3+: Train Resnet stage 3 and up - 4+: Train Resnet stage 4 and up - 5+: Train Resnet stage 5 and up - """ - assert self.mode == "training", "Create model in training mode." - - # Pre-defined layer regular expressions - layer_regex = { - # all layers but the backbone - "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - # From a specific Resnet stage and up - "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - # All layers - "all": ".*", - } - if layers in layer_regex.keys(): - layers = layer_regex[layers] - - # Data generators - train_generator = data_generator(train_dataset, self.config, shuffle=True, - batch_size=self.config.BATCH_SIZE) - val_generator = data_generator(val_dataset, self.config, shuffle=True, - batch_size=self.config.BATCH_SIZE, - augment=False) - - # Callbacks - callbacks = [ - keras.callbacks.TensorBoard(log_dir=self.log_dir, - histogram_freq=0, write_graph=True, write_images=False), - keras.callbacks.ModelCheckpoint(self.checkpoint_path, - verbose=0, save_weights_only=True), - TimeHistory(warmup, self.config.BATCH_SIZE), - ] - - # Train - log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate)) - log("Checkpoint Path: {}".format(self.checkpoint_path)) - self.set_trainable(layers) - self.compile(learning_rate, self.config.LEARNING_MOMENTUM) - - # Work-around for Windows: Keras fails on Windows when using - # multiprocessing workers. See discussion here: - # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 - if os.name is 'nt': - workers = 0 - else: - workers = max(self.config.BATCH_SIZE // 2, 2) - - self.keras_model.fit_generator( - train_generator, - initial_epoch=self.epoch, - epochs=epochs, - steps_per_epoch=self.config.STEPS_PER_EPOCH, - callbacks=callbacks, - validation_data=next(val_generator), - validation_steps=self.config.VALIDATION_STEPS, - max_queue_size=100, - workers=workers, - use_multiprocessing=True, - ) - self.epoch = max(self.epoch, epochs) - - def mold_inputs(self, images): - """Takes a list of images and modifies them to the format expected - as an input to the neural network. - images: List of image matricies [height,width,depth]. Images can have - different sizes. - - Returns 3 Numpy matricies: - molded_images: [N, h, w, 3]. Images resized and normalized. - image_metas: [N, length of meta data]. Details about each image. - windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the - original image (padding excluded). - """ - molded_images = [] - image_metas = [] - windows = [] - for image in images: - # Resize image to fit the model expected size - # TODO: move resizing to mold_image() - molded_image, window, scale, padding = utils.resize_image( - image, - min_dim=self.config.IMAGE_MIN_DIM, - max_dim=self.config.IMAGE_MAX_DIM, - padding=self.config.IMAGE_PADDING) - molded_image = mold_image(molded_image, self.config) - # Build image_meta - image_meta = compose_image_meta( - 0, image.shape, window, - np.zeros([self.config.NUM_CLASSES], dtype=np.int32)) - # Append - molded_images.append(molded_image) - windows.append(window) - image_metas.append(image_meta) - # Pack into arrays - molded_images = np.stack(molded_images) - image_metas = np.stack(image_metas) - windows = np.stack(windows) - return molded_images, image_metas, windows - - def unmold_detections(self, detections, mrcnn_mask, image_shape, window): - """Reformats the detections of one image from the format of the neural - network output to a format suitable for use in the rest of the - application. - - detections: [N, (y1, x1, y2, x2, class_id, score)] - mrcnn_mask: [N, height, width, num_classes] - image_shape: [height, width, depth] Original size of the image before resizing - window: [y1, x1, y2, x2] Box in the image where the real image is - excluding the padding. - - Returns: - boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels - class_ids: [N] Integer class IDs for each bounding box - scores: [N] Float probability scores of the class_id - masks: [height, width, num_instances] Instance masks - """ - # How many detections do we have? - # Detections array is padded with zeros. Find the first class_id == 0. - zero_ix = np.where(detections[:, 4] == 0)[0] - N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] - - # Extract boxes, class_ids, scores, and class-specific masks - boxes = detections[:N, :4] - class_ids = detections[:N, 4].astype(np.int32) - scores = detections[:N, 5] - masks = mrcnn_mask[np.arange(N), :, :, class_ids] - - # Compute scale and shift to translate coordinates to image domain. - h_scale = image_shape[0] / (window[2] - window[0]) - w_scale = image_shape[1] / (window[3] - window[1]) - scale = min(h_scale, w_scale) - shift = window[:2] # y, x - scales = np.array([scale, scale, scale, scale]) - shifts = np.array([shift[0], shift[1], shift[0], shift[1]]) - - # Translate bounding boxes to image domain - boxes = np.multiply(boxes - shifts, scales).astype(np.int32) - - # Filter out detections with zero area. Often only happens in early - # stages of training when the network weights are still a bit random. - exclude_ix = np.where( - (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] - if exclude_ix.shape[0] > 0: - boxes = np.delete(boxes, exclude_ix, axis=0) - class_ids = np.delete(class_ids, exclude_ix, axis=0) - scores = np.delete(scores, exclude_ix, axis=0) - masks = np.delete(masks, exclude_ix, axis=0) - N = class_ids.shape[0] - - # Resize masks to original image size and set boundary threshold. - full_masks = [] - for i in range(N): - # Convert neural network mask to full size mask - full_mask = utils.unmold_mask(masks[i], boxes[i], image_shape) - full_masks.append(full_mask) - full_masks = np.stack(full_masks, axis=-1)\ - if full_masks else np.empty((0,) + masks.shape[1:3]) - - return boxes, class_ids, scores, full_masks - - def detect(self, images, verbose=0): - """Runs the detection pipeline. - - images: List of images, potentially of different sizes. - - Returns a list of dicts, one dict per image. The dict contains: - rois: [N, (y1, x1, y2, x2)] detection bounding boxes - class_ids: [N] int class IDs - scores: [N] float probability scores for the class IDs - masks: [H, W, N] instance binary masks - """ - assert self.mode == "inference", "Create model in inference mode." - assert len( - images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE" - - if verbose: - log("Processing {} images".format(len(images))) - for image in images: - log("image", image) - # Mold inputs to format expected by the neural network - molded_images, image_metas, windows = self.mold_inputs(images) - if verbose: - log("molded_images", molded_images) - log("image_metas", image_metas) - # Run object detection - detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, \ - rois, rpn_class, rpn_bbox =\ - self.keras_model.predict([molded_images, image_metas], verbose=0) - # Process detections - results = [] - for i, image in enumerate(images): - final_rois, final_class_ids, final_scores, final_masks =\ - self.unmold_detections(detections[i], mrcnn_mask[i], - image.shape, windows[i]) - results.append({ - "rois": final_rois, - "class_ids": final_class_ids, - "scores": final_scores, - "masks": final_masks, - }) - return results - - def ancestor(self, tensor, name, checked=None): - """Finds the ancestor of a TF tensor in the computation graph. - tensor: TensorFlow symbolic tensor. - name: Name of ancestor tensor to find - checked: For internal use. A list of tensors that were already - searched to avoid loops in traversing the graph. - """ - checked = checked if checked is not None else [] - # Put a limit on how deep we go to avoid very long loops - if len(checked) > 500: - return None - # Convert name to a regex and allow matching a number prefix - # because Keras adds them automatically - if isinstance(name, str): - name = re.compile(name.replace("/", r"(\_\d+)*/")) - - parents = tensor.op.inputs - for p in parents: - if p in checked: - continue - if bool(re.fullmatch(name, p.name)): - return p - checked.append(p) - a = self.ancestor(p, name, checked) - if a is not None: - return a - return None - - def find_trainable_layer(self, layer): - """If a layer is encapsulated by another layer, this function - digs through the encapsulation and returns the layer that holds - the weights. - """ - if layer.__class__.__name__ == 'TimeDistributed': - return self.find_trainable_layer(layer.layer) - return layer - - def get_trainable_layers(self): - """Returns a list of layers that have weights.""" - layers = [] - # Loop through all layers - for l in self.keras_model.layers: - # If layer is a wrapper, find inner trainable layer - l = self.find_trainable_layer(l) - # Include layer if it has weights - if l.get_weights(): - layers.append(l) - return layers - - def run_graph(self, images, outputs): - """Runs a sub-set of the computation graph that computes the given - outputs. - - outputs: List of tuples (name, tensor) to compute. The tensors are - symbolic TensorFlow tensors and the names are for easy tracking. - - Returns an ordered dict of results. Keys are the names received in the - input and values are Numpy arrays. - """ - model = self.keras_model - - # Organize desired outputs into an ordered dict - outputs = OrderedDict(outputs) - for o in outputs.values(): - assert o is not None - - # Build a Keras function to run parts of the computation graph - inputs = model.inputs - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - inputs += [K.learning_phase()] - kf = K.function(model.inputs, list(outputs.values())) - - # Run inference - molded_images, image_metas, windows = self.mold_inputs(images) - # TODO: support training mode? - # if TEST_MODE == "training": - # model_in = [molded_images, image_metas, - # target_rpn_match, target_rpn_bbox, - # gt_boxes, gt_masks] - # if not config.USE_RPN_ROIS: - # model_in.append(target_rois) - # if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - # model_in.append(1.) - # outputs_np = kf(model_in) - # else: - - model_in = [molded_images, image_metas] - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - model_in.append(0.) - outputs_np = kf(model_in) - - # Pack the generated Numpy arrays into a a dict and log the results. - outputs_np = OrderedDict([(k, v) - for k, v in zip(outputs.keys(), outputs_np)]) - for k, v in outputs_np.items(): - log(k, v) - return outputs_np - - -############################################################ -# Data Formatting -############################################################ - -def compose_image_meta(image_id, image_shape, window, active_class_ids): - """Takes attributes of an image and puts them in one 1D array. - - image_id: An int ID of the image. Useful for debugging. - image_shape: [height, width, channels] - window: (y1, x1, y2, x2) in pixels. The area of the image where the real - image is (excluding the padding) - active_class_ids: List of class_ids available in the dataset from which - the image came. Useful if training on images from multiple datasets - where not all classes are present in all datasets. - """ - meta = np.array( - [image_id] + # size=1 - list(image_shape) + # size=3 - list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates - list(active_class_ids) # size=num_classes - ) - return meta - - -def parse_image_meta_graph(meta): - """Parses a tensor that contains image attributes to its components. - See compose_image_meta() for more details. - - meta: [batch, meta length] where meta length depends on NUM_CLASSES - """ - image_id = meta[:, 0] - image_shape = meta[:, 1:4] - window = meta[:, 4:8] # (y1, x1, y2, x2) window of image in in pixels - active_class_ids = meta[:, 8:] - return [image_id, image_shape, window, active_class_ids] - - -def mold_image(images, config): - """Takes RGB images with 0-255 values and subtraces - the mean pixel and converts it to float. Expects image - colors in RGB order. - """ - return images.astype(np.float32) - config.MEAN_PIXEL - - -def unmold_image(normalized_images, config): - """Takes a image normalized with mold() and returns the original.""" - return (normalized_images + config.MEAN_PIXEL).astype(np.uint8) - - -############################################################ -# Miscellenous Graph Functions -############################################################ - -def trim_zeros_graph(boxes, name=None): - """Often boxes are represented with matricies of shape [N, 4] and - are padded with zeros. This removes zero boxes. - - boxes: [N, 4] matrix of boxes. - non_zeros: [N] a 1D boolean mask identifying the rows to keep - """ - non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool) - boxes = tf.boolean_mask(boxes, non_zeros, name=name) - return boxes, non_zeros - - -def batch_pack_graph(x, counts, num_rows): - """Picks different number of values from each row - in x depending on the values in counts. - """ - outputs = [] - for i in range(num_rows): - outputs.append(x[i, :counts[i]]) - return tf.concat(outputs, axis=0) diff --git a/models/image_segmentation/tensorflow/maskrcnn/utils.py b/models/image_segmentation/tensorflow/maskrcnn/utils.py deleted file mode 100644 index f85c4dbd3..000000000 --- a/models/image_segmentation/tensorflow/maskrcnn/utils.py +++ /dev/null @@ -1,749 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -""" -Mask R-CNN -Common utility functions and classes. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import sys -import os -import math -import random -import numpy as np -import tensorflow as tf -import scipy.misc -import skimage.color -import skimage.io -import urllib.request -import shutil - -# URL from which to download the latest COCO trained weights -COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" - - -############################################################ -# Bounding Boxes -############################################################ - -def extract_bboxes(mask): - """Compute bounding boxes from masks. - mask: [height, width, num_instances]. Mask pixels are either 1 or 0. - - Returns: bbox array [num_instances, (y1, x1, y2, x2)]. - """ - boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) - for i in range(mask.shape[-1]): - m = mask[:, :, i] - # Bounding box. - horizontal_indicies = np.where(np.any(m, axis=0))[0] - vertical_indicies = np.where(np.any(m, axis=1))[0] - if horizontal_indicies.shape[0]: - x1, x2 = horizontal_indicies[[0, -1]] - y1, y2 = vertical_indicies[[0, -1]] - # x2 and y2 should not be part of the box. Increment by 1. - x2 += 1 - y2 += 1 - else: - # No mask for this instance. Might happen due to - # resizing or cropping. Set bbox to zeros - x1, x2, y1, y2 = 0, 0, 0, 0 - boxes[i] = np.array([y1, x1, y2, x2]) - return boxes.astype(np.int32) - - -def compute_iou(box, boxes, box_area, boxes_area): - """Calculates IoU of the given box with the array of the given boxes. - box: 1D vector [y1, x1, y2, x2] - boxes: [boxes_count, (y1, x1, y2, x2)] - box_area: float. the area of 'box' - boxes_area: array of length boxes_count. - - Note: the areas are passed in rather than calculated here for - efficency. Calculate once in the caller to avoid duplicate work. - """ - # Calculate intersection areas - y1 = np.maximum(box[0], boxes[:, 0]) - y2 = np.minimum(box[2], boxes[:, 2]) - x1 = np.maximum(box[1], boxes[:, 1]) - x2 = np.minimum(box[3], boxes[:, 3]) - intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) - union = box_area + boxes_area[:] - intersection[:] - iou = intersection / union - return iou - - -def compute_overlaps(boxes1, boxes2): - """Computes IoU overlaps between two sets of boxes. - boxes1, boxes2: [N, (y1, x1, y2, x2)]. - - For better performance, pass the largest set first and the smaller second. - """ - # Areas of anchors and GT boxes - area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) - area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) - - # Compute overlaps to generate matrix [boxes1 count, boxes2 count] - # Each cell contains the IoU value. - overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) - for i in range(overlaps.shape[1]): - box2 = boxes2[i] - overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) - return overlaps - - -def compute_overlaps_masks(masks1, masks2): - '''Computes IoU overlaps between two sets of masks. - masks1, masks2: [Height, Width, instances] - ''' - # flatten masks - masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) - masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) - area1 = np.sum(masks1, axis=0) - area2 = np.sum(masks2, axis=0) - - # intersections and union - intersections = np.dot(masks1.T, masks2) - union = area1[:, None] + area2[None, :] - intersections - overlaps = intersections / union - - return overlaps - - -def non_max_suppression(boxes, scores, threshold): - """Performs non-maximum supression and returns indicies of kept boxes. - boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. - scores: 1-D array of box scores. - threshold: Float. IoU threshold to use for filtering. - """ - assert boxes.shape[0] > 0 - if boxes.dtype.kind != "f": - boxes = boxes.astype(np.float32) - - # Compute box areas - y1 = boxes[:, 0] - x1 = boxes[:, 1] - y2 = boxes[:, 2] - x2 = boxes[:, 3] - area = (y2 - y1) * (x2 - x1) - - # Get indicies of boxes sorted by scores (highest first) - ixs = scores.argsort()[::-1] - - pick = [] - while len(ixs) > 0: - # Pick top box and add its index to the list - i = ixs[0] - pick.append(i) - # Compute IoU of the picked box with the rest - iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) - # Identify boxes with IoU over the threshold. This - # returns indicies into ixs[1:], so add 1 to get - # indicies into ixs. - remove_ixs = np.where(iou > threshold)[0] + 1 - # Remove indicies of the picked and overlapped boxes. - ixs = np.delete(ixs, remove_ixs) - ixs = np.delete(ixs, 0) - return np.array(pick, dtype=np.int32) - - -def apply_box_deltas(boxes, deltas): - """Applies the given deltas to the given boxes. - boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. - deltas: [N, (dy, dx, log(dh), log(dw))] - """ - boxes = boxes.astype(np.float32) - # Convert to y, x, h, w - height = boxes[:, 2] - boxes[:, 0] - width = boxes[:, 3] - boxes[:, 1] - center_y = boxes[:, 0] + 0.5 * height - center_x = boxes[:, 1] + 0.5 * width - # Apply deltas - center_y += deltas[:, 0] * height - center_x += deltas[:, 1] * width - height *= np.exp(deltas[:, 2]) - width *= np.exp(deltas[:, 3]) - # Convert back to y1, x1, y2, x2 - y1 = center_y - 0.5 * height - x1 = center_x - 0.5 * width - y2 = y1 + height - x2 = x1 + width - return np.stack([y1, x1, y2, x2], axis=1) - - -def box_refinement_graph(box, gt_box): - """Compute refinement needed to transform box to gt_box. - box and gt_box are [N, (y1, x1, y2, x2)] - """ - box = tf.cast(box, tf.float32) - gt_box = tf.cast(gt_box, tf.float32) - - height = box[:, 2] - box[:, 0] - width = box[:, 3] - box[:, 1] - center_y = box[:, 0] + 0.5 * height - center_x = box[:, 1] + 0.5 * width - - gt_height = gt_box[:, 2] - gt_box[:, 0] - gt_width = gt_box[:, 3] - gt_box[:, 1] - gt_center_y = gt_box[:, 0] + 0.5 * gt_height - gt_center_x = gt_box[:, 1] + 0.5 * gt_width - - dy = (gt_center_y - center_y) / height - dx = (gt_center_x - center_x) / width - dh = tf.log(gt_height / height) - dw = tf.log(gt_width / width) - - result = tf.stack([dy, dx, dh, dw], axis=1) - return result - - -def box_refinement(box, gt_box): - """Compute refinement needed to transform box to gt_box. - box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is - assumed to be outside the box. - """ - box = box.astype(np.float32) - gt_box = gt_box.astype(np.float32) - - height = box[:, 2] - box[:, 0] - width = box[:, 3] - box[:, 1] - center_y = box[:, 0] + 0.5 * height - center_x = box[:, 1] + 0.5 * width - - gt_height = gt_box[:, 2] - gt_box[:, 0] - gt_width = gt_box[:, 3] - gt_box[:, 1] - gt_center_y = gt_box[:, 0] + 0.5 * gt_height - gt_center_x = gt_box[:, 1] + 0.5 * gt_width - - dy = (gt_center_y - center_y) / height - dx = (gt_center_x - center_x) / width - dh = np.log(gt_height / height) - dw = np.log(gt_width / width) - - return np.stack([dy, dx, dh, dw], axis=1) - - -############################################################ -# Dataset -############################################################ - -class Dataset(object): - """The base class for dataset classes. - To use it, create a new class that adds functions specific to the dataset - you want to use. For example: - - class CatsAndDogsDataset(Dataset): - def load_cats_and_dogs(self): - ... - def load_mask(self, image_id): - ... - def image_reference(self, image_id): - ... - - See COCODataset and ShapesDataset as examples. - """ - - def __init__(self, class_map=None): - self._image_ids = [] - self.image_info = [] - # Background is always the first class - self.class_info = [{"source": "", "id": 0, "name": "BG"}] - self.source_class_ids = {} - - def add_class(self, source, class_id, class_name): - assert "." not in source, "Source name cannot contain a dot" - # Does the class exist already? - for info in self.class_info: - if info['source'] == source and info["id"] == class_id: - # source.class_id combination already available, skip - return - # Add the class - self.class_info.append({ - "source": source, - "id": class_id, - "name": class_name, - }) - - def add_image(self, source, image_id, path, **kwargs): - image_info = { - "id": image_id, - "source": source, - "path": path, - } - image_info.update(kwargs) - self.image_info.append(image_info) - - def image_reference(self, image_id): - """Return a link to the image in its source Website or details about - the image that help looking it up or debugging it. - - Override for your dataset, but pass to this function - if you encounter images not in your dataset. - """ - return "" - - def prepare(self, class_map=None): - """Prepares the Dataset class for use. - - TODO: class map is not supported yet. When done, it should handle mapping - classes from different datasets to the same class ID. - """ - - def clean_name(name): - """Returns a shorter version of object names for cleaner display.""" - return ",".join(name.split(",")[:1]) - - # Build (or rebuild) everything else from the info dicts. - self.num_classes = len(self.class_info) - self.class_ids = np.arange(self.num_classes) - self.class_names = [clean_name(c["name"]) for c in self.class_info] - self.num_images = len(self.image_info) - self._image_ids = np.arange(self.num_images) - - self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id - for info, id in zip(self.class_info, self.class_ids)} - - # Map sources to class_ids they support - self.sources = list(set([i['source'] for i in self.class_info])) - self.source_class_ids = {} - # Loop over datasets - for source in self.sources: - self.source_class_ids[source] = [] - # Find classes that belong to this dataset - for i, info in enumerate(self.class_info): - # Include BG class in all datasets - if i == 0 or source == info['source']: - self.source_class_ids[source].append(i) - - def map_source_class_id(self, source_class_id): - """Takes a source class ID and returns the int class ID assigned to it. - - For example: - dataset.map_source_class_id("coco.12") -> 23 - """ - return self.class_from_source_map[source_class_id] - - def get_source_class_id(self, class_id, source): - """Map an internal class ID to the corresponding class ID in the source dataset.""" - info = self.class_info[class_id] - assert info['source'] == source - return info['id'] - - def append_data(self, class_info, image_info): - self.external_to_class_id = {} - for i, c in enumerate(self.class_info): - for ds, id in c["map"]: - self.external_to_class_id[ds + str(id)] = i - - # Map external image IDs to internal ones. - self.external_to_image_id = {} - for i, info in enumerate(self.image_info): - self.external_to_image_id[info["ds"] + str(info["id"])] = i - - @property - def image_ids(self): - return self._image_ids - - def source_image_link(self, image_id): - """Returns the path or URL to the image. - Override this to return a URL to the image if it's availble online for easy - debugging. - """ - return self.image_info[image_id]["path"] - - def load_image(self, image_id): - """Load the specified image and return a [H,W,3] Numpy array. - """ - # Load image - image = skimage.io.imread(self.image_info[image_id]['path']) - # If grayscale. Convert to RGB for consistency. - if image.ndim != 3: - image = skimage.color.gray2rgb(image) - return image - - def load_mask(self, image_id): - """Load instance masks for the given image. - - Different datasets use different ways to store masks. Override this - method to load instance masks and return them in the form of am - array of binary masks of shape [height, width, instances]. - - Returns: - masks: A bool array of shape [height, width, instance count] with - a binary mask per instance. - class_ids: a 1D array of class IDs of the instance masks. - """ - # Override this function to load a mask from your dataset. - # Otherwise, it returns an empty mask. - mask = np.empty([0, 0, 0]) - class_ids = np.empty([0], np.int32) - return mask, class_ids - - -def resize_image(image, min_dim=None, max_dim=None, padding=False): - """ - Resizes an image keeping the aspect ratio. - - min_dim: if provided, resizes the image such that it's smaller - dimension == min_dim - max_dim: if provided, ensures that the image longest side doesn't - exceed this value. - padding: If true, pads image with zeros so it's size is max_dim x max_dim - - Returns: - image: the resized image - window: (y1, x1, y2, x2). If max_dim is provided, padding might - be inserted in the returned image. If so, this window is the - coordinates of the image part of the full image (excluding - the padding). The x2, y2 pixels are not included. - scale: The scale factor used to resize the image - padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] - """ - # Default window (y1, x1, y2, x2) and default scale == 1. - h, w = image.shape[:2] - window = (0, 0, h, w) - scale = 1 - - # Scale? - if min_dim: - # Scale up but not down - scale = max(1, min_dim / min(h, w)) - # Does it exceed max dim? - if max_dim: - image_max = max(h, w) - if round(image_max * scale) > max_dim: - scale = max_dim / image_max - # Resize image and mask - if scale != 1: - image = scipy.misc.imresize( - image, (round(h * scale), round(w * scale))) - # Need padding? - if padding: - # Get new height and width - h, w = image.shape[:2] - top_pad = (max_dim - h) // 2 - bottom_pad = max_dim - h - top_pad - left_pad = (max_dim - w) // 2 - right_pad = max_dim - w - left_pad - padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] - image = np.pad(image, padding, mode='constant', constant_values=0) - window = (top_pad, left_pad, h + top_pad, w + left_pad) - return image, window, scale, padding - - -def resize_mask(mask, scale, padding): - """Resizes a mask using the given scale and padding. - Typically, you get the scale and padding from resize_image() to - ensure both, the image and the mask, are resized consistently. - - scale: mask scaling factor - padding: Padding to add to the mask in the form - [(top, bottom), (left, right), (0, 0)] - """ - h, w = mask.shape[:2] - mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) - mask = np.pad(mask, padding, mode='constant', constant_values=0) - return mask - - -def minimize_mask(bbox, mask, mini_shape): - """Resize masks to a smaller version to cut memory load. - Mini-masks can then resized back to image scale using expand_masks() - - See inspect_data.ipynb notebook for more details. - """ - mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) - for i in range(mask.shape[-1]): - m = mask[:, :, i] - y1, x1, y2, x2 = bbox[i][:4] - m = m[y1:y2, x1:x2] - if m.size == 0: - raise Exception("Invalid bounding box with area of zero") - m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear') - mini_mask[:, :, i] = np.where(m >= 128, 1, 0) - return mini_mask - - -def expand_mask(bbox, mini_mask, image_shape): - """Resizes mini masks back to image size. Reverses the change - of minimize_mask(). - - See inspect_data.ipynb notebook for more details. - """ - mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) - for i in range(mask.shape[-1]): - m = mini_mask[:, :, i] - y1, x1, y2, x2 = bbox[i][:4] - h = y2 - y1 - w = x2 - x1 - m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear') - mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0) - return mask - - -# TODO: Build and use this function to reduce code duplication -def mold_mask(mask, config): - pass - - -def unmold_mask(mask, bbox, image_shape): - """Converts a mask generated by the neural network into a format similar - to it's original shape. - mask: [height, width] of type float. A small, typically 28x28 mask. - bbox: [y1, x1, y2, x2]. The box to fit the mask in. - - Returns a binary mask with the same size as the original image. - """ - threshold = 0.5 - y1, x1, y2, x2 = bbox - mask = scipy.misc.imresize( - mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0 - mask = np.where(mask >= threshold, 1, 0).astype(np.uint8) - - # Put the mask in the right location. - full_mask = np.zeros(image_shape[:2], dtype=np.uint8) - full_mask[y1:y2, x1:x2] = mask - return full_mask - - -############################################################ -# Anchors -############################################################ - -def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): - """ - scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] - ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] - shape: [height, width] spatial shape of the feature map over which - to generate anchors. - feature_stride: Stride of the feature map relative to the image in pixels. - anchor_stride: Stride of anchors on the feature map. For example, if the - value is 2 then generate anchors for every other feature map pixel. - """ - # Get all combinations of scales and ratios - scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) - scales = scales.flatten() - ratios = ratios.flatten() - - # Enumerate heights and widths from scales and ratios - heights = scales / np.sqrt(ratios) - widths = scales * np.sqrt(ratios) - - # Enumerate shifts in feature space - shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride - shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride - shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) - - # Enumerate combinations of shifts, widths, and heights - box_widths, box_centers_x = np.meshgrid(widths, shifts_x) - box_heights, box_centers_y = np.meshgrid(heights, shifts_y) - - # Reshape to get a list of (y, x) and a list of (h, w) - box_centers = np.stack( - [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) - box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) - - # Convert to corner coordinates (y1, x1, y2, x2) - boxes = np.concatenate([box_centers - 0.5 * box_sizes, - box_centers + 0.5 * box_sizes], axis=1) - return boxes - - -def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, - anchor_stride): - """Generate anchors at different levels of a feature pyramid. Each scale - is associated with a level of the pyramid, but each ratio is used in - all levels of the pyramid. - - Returns: - anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted - with the same order of the given scales. So, anchors of scale[0] come - first, then anchors of scale[1], and so on. - """ - # Anchors - # [anchor_count, (y1, x1, y2, x2)] - anchors = [] - for i in range(len(scales)): - anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], - feature_strides[i], anchor_stride)) - return np.concatenate(anchors, axis=0) - - -############################################################ -# Miscellaneous -############################################################ - -def trim_zeros(x): - """It's common to have tensors larger than the available data and - pad with zeros. This function removes rows that are all zeros. - - x: [rows, columns]. - """ - assert len(x.shape) == 2 - return x[~np.all(x == 0, axis=1)] - - -def compute_ap(gt_boxes, gt_class_ids, gt_masks, - pred_boxes, pred_class_ids, pred_scores, pred_masks, - iou_threshold=0.5): - """Compute Average Precision at a set IoU threshold (default 0.5). - - Returns: - mAP: Mean Average Precision - precisions: List of precisions at different class score thresholds. - recalls: List of recall values at different class score thresholds. - overlaps: [pred_boxes, gt_boxes] IoU overlaps. - """ - # Trim zero padding and sort predictions by score from high to low - # TODO: cleaner to do zero unpadding upstream - gt_boxes = trim_zeros(gt_boxes) - gt_masks = gt_masks[..., :gt_boxes.shape[0]] - pred_boxes = trim_zeros(pred_boxes) - pred_scores = pred_scores[:pred_boxes.shape[0]] - indices = np.argsort(pred_scores)[::-1] - pred_boxes = pred_boxes[indices] - pred_class_ids = pred_class_ids[indices] - pred_scores = pred_scores[indices] - pred_masks = pred_masks[..., indices] - - # Compute IoU overlaps [pred_masks, gt_masks] - overlaps = compute_overlaps_masks(pred_masks, gt_masks) - - # Loop through ground truth boxes and find matching predictions - match_count = 0 - pred_match = np.zeros([pred_boxes.shape[0]]) - gt_match = np.zeros([gt_boxes.shape[0]]) - for i in range(len(pred_boxes)): - # Find best matching ground truth box - sorted_ixs = np.argsort(overlaps[i])[::-1] - for j in sorted_ixs: - # If ground truth box is already matched, go to next one - if gt_match[j] == 1: - continue - # If we reach IoU smaller than the threshold, end the loop - iou = overlaps[i, j] - if iou < iou_threshold: - break - # Do we have a match? - if pred_class_ids[i] == gt_class_ids[j]: - match_count += 1 - gt_match[j] = 1 - pred_match[i] = 1 - break - - # Compute precision and recall at each prediction box step - precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1) - recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match) - - # Pad with start and end values to simplify the math - precisions = np.concatenate([[0], precisions, [0]]) - recalls = np.concatenate([[0], recalls, [1]]) - - # Ensure precision values decrease but don't increase. This way, the - # precision value at each recall threshold is the maximum it can be - # for all following recall thresholds, as specified by the VOC paper. - for i in range(len(precisions) - 2, -1, -1): - precisions[i] = np.maximum(precisions[i], precisions[i + 1]) - - # Compute mean AP over recall range - indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 - mAP = np.sum((recalls[indices] - recalls[indices - 1]) * - precisions[indices]) - - return mAP, precisions, recalls, overlaps - - -def compute_recall(pred_boxes, gt_boxes, iou): - """Compute the recall at the given IoU threshold. It's an indication - of how many GT boxes were found by the given prediction boxes. - - pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates - gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates - """ - # Measure overlaps - overlaps = compute_overlaps(pred_boxes, gt_boxes) - iou_max = np.max(overlaps, axis=1) - iou_argmax = np.argmax(overlaps, axis=1) - positive_ids = np.where(iou_max >= iou)[0] - matched_gt_boxes = iou_argmax[positive_ids] - - recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] - return recall, positive_ids - - -# ## Batch Slicing -# Some custom layers support a batch size of 1 only, and require a lot of work -# to support batches greater than 1. This function slices an input tensor -# across the batch dimension and feeds batches of size 1. Effectively, -# an easy way to support batches > 1 quickly with little code modification. -# In the long run, it's more efficient to modify the code to support large -# batches and getting rid of this function. Consider this a temporary solution -def batch_slice(inputs, graph_fn, batch_size, names=None): - """Splits inputs into slices and feeds each slice to a copy of the given - computation graph and then combines the results. It allows you to run a - graph on a batch of inputs even if the graph is written to support one - instance only. - - inputs: list of tensors. All must have the same first dimension length - graph_fn: A function that returns a TF tensor that's part of a graph. - batch_size: number of slices to divide the data into. - names: If provided, assigns names to the resulting tensors. - """ - if not isinstance(inputs, list): - inputs = [inputs] - - outputs = [] - for i in range(batch_size): - inputs_slice = [x[i] for x in inputs] - output_slice = graph_fn(*inputs_slice) - if not isinstance(output_slice, (tuple, list)): - output_slice = [output_slice] - outputs.append(output_slice) - # Change outputs from a list of slices where each is - # a list of outputs to a list of outputs and each has - # a list of slices - outputs = list(zip(*outputs)) - - if names is None: - names = [None] * len(outputs) - - result = [tf.stack(o, axis=0, name=n) - for o, n in zip(outputs, names)] - if len(result) == 1: - result = result[0] - - return result - - -def download_trained_weights(coco_model_path, verbose=1): - """Download COCO trained weights from Releases. - - coco_model_path: local path of COCO trained weights - """ - if verbose > 0: - print("Downloading pretrained model to " + coco_model_path + " ...") - with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: - shutil.copyfileobj(resp, out) - if verbose > 0: - print("... done downloading pretrained model!") diff --git a/models/language_translation/tensorflow/gnmt/__init__.py b/models/language_translation/tensorflow/gnmt/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/language_translation/tensorflow/gnmt/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/language_translation/tensorflow/gnmt/inference/__init__.py b/models/language_translation/tensorflow/gnmt/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/.gitignore b/models/language_translation/tensorflow/gnmt/inference/fp32/.gitignore deleted file mode 100644 index f4b28323a..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -bazel-bin -bazel-genfiles -bazel-out -bazel-testlogs diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/__init__.py b/models/language_translation/tensorflow/gnmt/inference/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/attention_model.py b/models/language_translation/tensorflow/gnmt/inference/fp32/attention_model.py deleted file mode 100644 index adc16eb8b..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/attention_model.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Attention-based sequence-to-sequence model with dynamic RNN support.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -import model -import model_helper - -__all__ = ["AttentionModel"] - - -class AttentionModel(model.Model): - """Sequence-to-sequence dynamic model with attention. - - This class implements a multi-layer recurrent neural network as encoder, - and an attention-based decoder. This is the same as the model described in - (Luong et al., EMNLP'2015) paper: https://arxiv.org/pdf/1508.04025v5.pdf. - This class also allows to use GRU cells in addition to LSTM cells with - support for dropout. - """ - - def __init__(self, - hparams, - mode, - iterator, - source_vocab_table, - target_vocab_table, - reverse_target_vocab_table=None, - scope=None, - extra_args=None): - self.has_attention = hparams.attention_architecture and hparams.attention - - # Set attention_mechanism_fn - if self.has_attention: - if extra_args and extra_args.attention_mechanism_fn: - self.attention_mechanism_fn = extra_args.attention_mechanism_fn - else: - self.attention_mechanism_fn = create_attention_mechanism - - super(AttentionModel, self).__init__( - hparams=hparams, - mode=mode, - iterator=iterator, - source_vocab_table=source_vocab_table, - target_vocab_table=target_vocab_table, - reverse_target_vocab_table=reverse_target_vocab_table, - scope=scope, - extra_args=extra_args) - - def _prepare_beam_search_decoder_inputs( - self, beam_width, memory, source_sequence_length, encoder_state): - memory = tf.contrib.seq2seq.tile_batch( - memory, multiplier=beam_width) - source_sequence_length = tf.contrib.seq2seq.tile_batch( - source_sequence_length, multiplier=beam_width) - encoder_state = tf.contrib.seq2seq.tile_batch( - encoder_state, multiplier=beam_width) - batch_size = self.batch_size * beam_width - return memory, source_sequence_length, encoder_state, batch_size - - def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, - source_sequence_length): - """Build a RNN cell with attention mechanism that can be used by decoder.""" - # No Attention - if not self.has_attention: - return super(AttentionModel, self)._build_decoder_cell( - hparams, encoder_outputs, encoder_state, source_sequence_length) - elif hparams.attention_architecture != "standard": - raise ValueError( - "Unknown attention architecture %s" % hparams.attention_architecture) - - num_units = hparams.num_units - num_layers = self.num_decoder_layers - num_residual_layers = self.num_decoder_residual_layers - infer_mode = hparams.infer_mode - - dtype = tf.float32 - - # Ensure memory is batch-major - if self.time_major: - memory = tf.transpose(encoder_outputs, [1, 0, 2]) - else: - memory = encoder_outputs - - if (self.mode == tf.contrib.learn.ModeKeys.INFER and - infer_mode == "beam_search"): - memory, source_sequence_length, encoder_state, batch_size = ( - self._prepare_beam_search_decoder_inputs( - hparams.beam_width, memory, source_sequence_length, - encoder_state)) - else: - batch_size = self.batch_size - - # Attention - attention_mechanism = self.attention_mechanism_fn( - hparams.attention, num_units, memory, source_sequence_length, self.mode) - - cell = model_helper.create_rnn_cell( - unit_type=hparams.unit_type, - num_units=num_units, - num_layers=num_layers, - num_residual_layers=num_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=self.num_gpus, - mode=self.mode, - single_cell_fn=self.single_cell_fn) - - # Only generate alignment in greedy INFER mode. - alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER and - infer_mode != "beam_search") - cell = tf.contrib.seq2seq.AttentionWrapper( - cell, - attention_mechanism, - attention_layer_size=num_units, - alignment_history=alignment_history, - output_attention=hparams.output_attention, - name="attention") - - # TODO(thangluong): do we need num_layers, num_gpus? - cell = tf.contrib.rnn.DeviceWrapper(cell, - model_helper.get_device_str( - num_layers - 1, self.num_gpus)) - - if hparams.pass_hidden_state: - decoder_initial_state = cell.zero_state(batch_size, dtype).clone( - cell_state=encoder_state) - else: - decoder_initial_state = cell.zero_state(batch_size, dtype) - - return cell, decoder_initial_state - - def _get_infer_summary(self, hparams): - if not self.has_attention or hparams.infer_mode == "beam_search": - return tf.no_op() - return _create_attention_images_summary(self.final_context_state) - - -def create_attention_mechanism(attention_option, num_units, memory, - source_sequence_length, mode): - """Create attention mechanism based on the attention_option.""" - del mode # unused - - # Mechanism - if attention_option == "luong": - attention_mechanism = tf.contrib.seq2seq.LuongAttention( - num_units, memory, memory_sequence_length=source_sequence_length) - elif attention_option == "scaled_luong": - attention_mechanism = tf.contrib.seq2seq.LuongAttention( - num_units, - memory, - memory_sequence_length=source_sequence_length, - scale=True) - elif attention_option == "bahdanau": - attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( - num_units, memory, memory_sequence_length=source_sequence_length) - elif attention_option == "normed_bahdanau": - attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( - num_units, - memory, - memory_sequence_length=source_sequence_length, - normalize=True) - else: - raise ValueError("Unknown attention option %s" % attention_option) - - return attention_mechanism - - -def _create_attention_images_summary(final_context_state): - """create attention image and attention summary.""" - attention_images = (final_context_state.alignment_history.stack()) - # Reshape to (batch, src_seq_len, tgt_seq_len,1) - attention_images = tf.expand_dims( - tf.transpose(attention_images, [1, 2, 0]), -1) - # Scale to range [0, 255] - attention_images *= 255 - attention_summary = tf.summary.image("attention_images", attention_images) - return attention_summary diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/gnmt_model.py b/models/language_translation/tensorflow/gnmt/inference/fp32/gnmt_model.py deleted file mode 100644 index a7e357e2f..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/gnmt_model.py +++ /dev/null @@ -1,334 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""GNMT attention sequence-to-sequence model with dynamic RNN support.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -import attention_model -import model_helper -from utils import misc_utils as utils -from utils import vocab_utils - -__all__ = ["GNMTModel"] - - -class GNMTModel(attention_model.AttentionModel): - """Sequence-to-sequence dynamic model with GNMT attention architecture. - """ - - def __init__(self, - hparams, - mode, - iterator, - source_vocab_table, - target_vocab_table, - reverse_target_vocab_table=None, - scope=None, - extra_args=None): - self.is_gnmt_attention = ( - hparams.attention_architecture in ["gnmt", "gnmt_v2"]) - - super(GNMTModel, self).__init__( - hparams=hparams, - mode=mode, - iterator=iterator, - source_vocab_table=source_vocab_table, - target_vocab_table=target_vocab_table, - reverse_target_vocab_table=reverse_target_vocab_table, - scope=scope, - extra_args=extra_args) - - def _build_encoder(self, hparams): - """Build a GNMT encoder.""" - if hparams.encoder_type == "uni" or hparams.encoder_type == "bi": - return super(GNMTModel, self)._build_encoder(hparams) - - if hparams.encoder_type != "gnmt": - raise ValueError("Unknown encoder_type %s" % hparams.encoder_type) - - # Build GNMT encoder. - num_bi_layers = 1 - num_uni_layers = self.num_encoder_layers - num_bi_layers - utils.print_out("# Build a GNMT encoder") - utils.print_out(" num_bi_layers = %d" % num_bi_layers) - utils.print_out(" num_uni_layers = %d" % num_uni_layers) - - iterator = self.iterator - source = iterator.source - if self.time_major: - source = tf.transpose(source) - - with tf.variable_scope("encoder") as scope: - dtype = scope.dtype - - self.encoder_emb_inp = self.encoder_emb_lookup_fn( - self.embedding_encoder, source) - - # Execute _build_bidirectional_rnn from Model class - bi_encoder_outputs, bi_encoder_state = self._build_bidirectional_rnn( - inputs=self.encoder_emb_inp, - sequence_length=iterator.source_sequence_length, - dtype=dtype, - hparams=hparams, - num_bi_layers=num_bi_layers, - num_bi_residual_layers=0, # no residual connection - ) - - # Build unidirectional layers - if self.extract_encoder_layers: - encoder_state, encoder_outputs = self._build_individual_encoder_layers( - bi_encoder_outputs, num_uni_layers, dtype, hparams) - else: - encoder_state, encoder_outputs = self._build_all_encoder_layers( - bi_encoder_outputs, num_uni_layers, dtype, hparams) - - # Pass all encoder states to the decoder - # except the first bi-directional layer - encoder_state = (bi_encoder_state[1],) + ( - (encoder_state,) if num_uni_layers == 1 else encoder_state) - - return encoder_outputs, encoder_state - - def _build_all_encoder_layers(self, bi_encoder_outputs, - num_uni_layers, dtype, hparams): - """Build encoder layers all at once.""" - uni_cell = model_helper.create_rnn_cell( - unit_type=hparams.unit_type, - num_units=hparams.num_units, - num_layers=num_uni_layers, - num_residual_layers=self.num_encoder_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=self.num_gpus, - base_gpu=1, - mode=self.mode, - single_cell_fn=self.single_cell_fn) - encoder_outputs, encoder_state = tf.nn.dynamic_rnn( - uni_cell, - bi_encoder_outputs, - dtype=dtype, - sequence_length=self.iterator.source_sequence_length, - time_major=self.time_major) - - # Use the top layer for now - self.encoder_state_list = [encoder_outputs] - - return encoder_state, encoder_outputs - - def _build_individual_encoder_layers(self, bi_encoder_outputs, - num_uni_layers, dtype, hparams): - """Run each of the encoder layer separately, not used in general seq2seq.""" - uni_cell_lists = model_helper._cell_list( - unit_type=hparams.unit_type, - num_units=hparams.num_units, - num_layers=num_uni_layers, - num_residual_layers=self.num_encoder_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=self.num_gpus, - base_gpu=1, - mode=self.mode, - single_cell_fn=self.single_cell_fn) - - encoder_inp = bi_encoder_outputs - encoder_states = [] - self.encoder_state_list = [bi_encoder_outputs[:, :, :hparams.num_units], - bi_encoder_outputs[:, :, hparams.num_units:]] - with tf.variable_scope("rnn/multi_rnn_cell"): - for i, cell in enumerate(uni_cell_lists): - with tf.variable_scope("cell_%d" % i) as scope: - encoder_inp, encoder_state = tf.nn.dynamic_rnn( - cell, - encoder_inp, - dtype=dtype, - sequence_length=self.iterator.source_sequence_length, - time_major=self.time_major, - scope=scope) - encoder_states.append(encoder_state) - self.encoder_state_list.append(encoder_inp) - - encoder_state = tuple(encoder_states) - encoder_outputs = self.encoder_state_list[-1] - return encoder_state, encoder_outputs - - def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, - source_sequence_length): - """Build a RNN cell with GNMT attention architecture.""" - # Standard attention - if not self.is_gnmt_attention: - return super(GNMTModel, self)._build_decoder_cell( - hparams, encoder_outputs, encoder_state, source_sequence_length) - - # GNMT attention - attention_option = hparams.attention - attention_architecture = hparams.attention_architecture - num_units = hparams.num_units - infer_mode = hparams.infer_mode - - dtype = tf.float32 - - if self.time_major: - memory = tf.transpose(encoder_outputs, [1, 0, 2]) - else: - memory = encoder_outputs - - if (self.mode == tf.contrib.learn.ModeKeys.INFER and - infer_mode == "beam_search"): - memory, source_sequence_length, encoder_state, batch_size = ( - self._prepare_beam_search_decoder_inputs( - hparams.beam_width, memory, source_sequence_length, - encoder_state)) - else: - batch_size = self.batch_size - - attention_mechanism = self.attention_mechanism_fn( - attention_option, num_units, memory, source_sequence_length, self.mode) - - cell_list = model_helper._cell_list( # pylint: disable=protected-access - unit_type=hparams.unit_type, - num_units=num_units, - num_layers=self.num_decoder_layers, - num_residual_layers=self.num_decoder_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=self.num_gpus, - mode=self.mode, - single_cell_fn=self.single_cell_fn, - residual_fn=gnmt_residual_fn - ) - - # Only wrap the bottom layer with the attention mechanism. - attention_cell = cell_list.pop(0) - - # Only generate alignment in greedy INFER mode. - alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER and - infer_mode != "beam_search") - attention_cell = tf.contrib.seq2seq.AttentionWrapper( - attention_cell, - attention_mechanism, - attention_layer_size=None, # don't use attention layer. - output_attention=False, - alignment_history=alignment_history, - name="attention") - - if attention_architecture == "gnmt": - cell = GNMTAttentionMultiCell( - attention_cell, cell_list) - elif attention_architecture == "gnmt_v2": - cell = GNMTAttentionMultiCell( - attention_cell, cell_list, use_new_attention=True) - else: - raise ValueError( - "Unknown attention_architecture %s" % attention_architecture) - - if hparams.pass_hidden_state: - decoder_initial_state = tuple( - zs.clone(cell_state=es) - if isinstance(zs, tf.contrib.seq2seq.AttentionWrapperState) else es - for zs, es in zip( - cell.zero_state(batch_size, dtype), encoder_state)) - else: - decoder_initial_state = cell.zero_state(batch_size, dtype) - - return cell, decoder_initial_state - - def _get_infer_summary(self, hparams): - if hparams.infer_mode == "beam_search": - return tf.no_op() - elif self.is_gnmt_attention: - return attention_model._create_attention_images_summary( - self.final_context_state[0]) - else: - return super(GNMTModel, self)._get_infer_summary(hparams) - - -class GNMTAttentionMultiCell(tf.nn.rnn_cell.MultiRNNCell): - """A MultiCell with GNMT attention style.""" - - def __init__(self, attention_cell, cells, use_new_attention=False): - """Creates a GNMTAttentionMultiCell. - - Args: - attention_cell: An instance of AttentionWrapper. - cells: A list of RNNCell wrapped with AttentionInputWrapper. - use_new_attention: Whether to use the attention generated from current - step bottom layer's output. Default is False. - """ - cells = [attention_cell] + cells - self.use_new_attention = use_new_attention - super(GNMTAttentionMultiCell, self).__init__(cells, state_is_tuple=True) - - def __call__(self, inputs, state, scope=None): - """Run the cell with bottom layer's attention copied to all upper layers.""" - if not tf.contrib.framework.nest.is_sequence(state): - raise ValueError( - "Expected state to be a tuple of length %d, but received: %s" - % (len(self.state_size), state)) - - with tf.variable_scope(scope or "multi_rnn_cell"): - new_states = [] - - with tf.variable_scope("cell_0_attention"): - attention_cell = self._cells[0] - attention_state = state[0] - cur_inp, new_attention_state = attention_cell(inputs, attention_state) - new_states.append(new_attention_state) - - for i in range(1, len(self._cells)): - with tf.variable_scope("cell_%d" % i): - - cell = self._cells[i] - cur_state = state[i] - - if self.use_new_attention: - cur_inp = tf.concat([cur_inp, new_attention_state.attention], -1) - else: - cur_inp = tf.concat([cur_inp, attention_state.attention], -1) - - cur_inp, new_state = cell(cur_inp, cur_state) - new_states.append(new_state) - - return cur_inp, tuple(new_states) - - -def gnmt_residual_fn(inputs, outputs): - """Residual function that handles different inputs and outputs inner dims. - - Args: - inputs: cell inputs, this is actual inputs concatenated with the attention - vector. - outputs: cell outputs - - Returns: - outputs + actual inputs - """ - def split_input(inp, out): - out_dim = out.get_shape().as_list()[-1] - inp_dim = inp.get_shape().as_list()[-1] - return tf.split(inp, [out_dim, inp_dim - out_dim], axis=-1) - actual_inputs, _ = tf.contrib.framework.nest.map_structure( - split_input, inputs, outputs) - - def assert_shape_match(inp, out): - inp.get_shape().assert_is_compatible_with(out.get_shape()) - tf.contrib.framework.nest.assert_same_structure(actual_inputs, outputs) - tf.contrib.framework.nest.map_structure( - assert_shape_match, actual_inputs, outputs) - return tf.contrib.framework.nest.map_structure( - lambda inp, out: inp + out, actual_inputs, outputs) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/inference.py b/models/language_translation/tensorflow/gnmt/inference/fp32/inference.py deleted file mode 100644 index c5cd35103..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/inference.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""To perform inference on test set given a trained model.""" -from __future__ import print_function - -import codecs -import time - -import tensorflow as tf - -import attention_model -import gnmt_model -import model as nmt_model -import model_helper -from utils import misc_utils as utils -from utils import nmt_utils - -__all__ = ["load_data", "inference", - "single_worker_inference", "multi_worker_inference"] - - -def _decode_inference_indices(model, sess, output_infer, - output_infer_summary_prefix, - inference_indices, - tgt_eos, - subword_option): - """Decoding only a specific set of sentences.""" - utils.print_out(" decoding to output %s , num sents %d." % - (output_infer, len(inference_indices))) - start_time = time.time() - with codecs.getwriter("utf-8")( - tf.gfile.GFile(output_infer, mode="wb")) as trans_f: - trans_f.write("") # Write empty string to ensure file is created. - for decode_id in inference_indices: - nmt_outputs, infer_summary = model.decode(sess) - - # get text translation - assert nmt_outputs.shape[0] == 1 - translation = nmt_utils.get_translation( - nmt_outputs, - sent_id=0, - tgt_eos=tgt_eos, - subword_option=subword_option) - - if infer_summary is not None: # Attention models - image_file = output_infer_summary_prefix + str(decode_id) + ".png" - utils.print_out(" save attention image to %s*" % image_file) - image_summ = tf.Summary() - image_summ.ParseFromString(infer_summary) - with tf.gfile.GFile(image_file, mode="w") as img_f: - img_f.write(image_summ.value[0].image.encoded_image_string) - - trans_f.write("%s\n" % translation) - utils.print_out(translation + b"\n") - utils.print_time(" done", start_time) - - -def load_data(inference_input_file, hparams=None): - """Load inference data.""" - with codecs.getreader("utf-8")( - tf.gfile.GFile(inference_input_file, mode="rb")) as f: - inference_data = f.read().splitlines() - - if hparams and hparams.inference_indices: - inference_data = [inference_data[i] for i in hparams.inference_indices] - - return inference_data - - -def get_model_creator(hparams): - """Get the right model class depending on configuration.""" - if (hparams.encoder_type == "gnmt" or - hparams.attention_architecture in ["gnmt", "gnmt_v2"]): - model_creator = gnmt_model.GNMTModel - elif hparams.attention_architecture == "standard": - model_creator = attention_model.AttentionModel - elif not hparams.attention: - model_creator = nmt_model.Model - else: - raise ValueError("Unknown attention architecture %s" % - hparams.attention_architecture) - return model_creator - - -def start_sess_and_load_model(infer_model, ckpt_path): - """Start session and load model.""" - sess = tf.Session( - graph=infer_model.graph, config=utils.get_config_proto()) - with infer_model.graph.as_default(): - loaded_infer_model = model_helper.load_model( - infer_model.model, ckpt_path, sess, "infer") - return sess, loaded_infer_model - - -def inference(ckpt_path, - inference_input_file, - inference_output_file, - hparams, - num_workers=1, - jobid=0, - scope=None): - """Perform translation.""" - if hparams.inference_indices: - assert num_workers == 1 - - model_creator = get_model_creator(hparams) - infer_model = model_helper.create_infer_model(model_creator, hparams, scope) - sess, loaded_infer_model = start_sess_and_load_model(infer_model, ckpt_path) - - if num_workers == 1: - single_worker_inference( - sess, - infer_model, - loaded_infer_model, - inference_input_file, - inference_output_file, - hparams) - else: - multi_worker_inference( - sess, - infer_model, - loaded_infer_model, - inference_input_file, - inference_output_file, - hparams, - num_workers=num_workers, - jobid=jobid) - sess.close() - - -def single_worker_inference(sess, - infer_model, - loaded_infer_model, - inference_input_file, - inference_output_file, - hparams): - """Inference with a single worker.""" - output_infer = inference_output_file - - # Read data - infer_data = load_data(inference_input_file, hparams) - infer_data_feed = infer_data - - # sort the input file if no hparams.inference_indices is defined - index_pair = {} - new_input = [] - if hparams.inference_indices is None: - start_time = time.time() - input_length = [(len(line.split()), i) for i, line in enumerate(infer_data)] - sorted_input_bylens = sorted(input_length) - for ni, (_, oi) in enumerate(sorted_input_bylens): - new_input.append(infer_data[oi]) - index_pair[oi] = ni - infer_data_feed = new_input - - with infer_model.graph.as_default(): - sess.run( - infer_model.iterator.initializer, - feed_dict={ - infer_model.src_placeholder: infer_data_feed, - infer_model.batch_size_placeholder: hparams.infer_batch_size - }) - # Decode - utils.print_out("# Start decoding") - if hparams.inference_indices: - _decode_inference_indices( - loaded_infer_model, - sess, - output_infer=output_infer, - output_infer_summary_prefix=output_infer, - inference_indices=hparams.inference_indices, - tgt_eos=hparams.eos, - subword_option=hparams.subword_option) - else: - _, end_time, num_sentences = nmt_utils.decode_and_evaluate( - "infer", - loaded_infer_model, - sess, - output_infer, - ref_file=None, - metrics=hparams.metrics, - subword_option=hparams.subword_option, - beam_width=hparams.beam_width, - tgt_eos=hparams.eos, - num_translations_per_input=hparams.num_translations_per_input, - infer_mode=hparams.infer_mode, - index_pair=index_pair) - duration = end_time - start_time - if hparams.infer_batch_size is 1: - print(" The latency of the model is %.4f ms/sentences" % (1000 * duration / num_sentences)) - else: - print(" The throughput of the model is %.4f sentences/s" % (num_sentences / duration)) - - -def multi_worker_inference(sess, - infer_model, - loaded_infer_model, - inference_input_file, - inference_output_file, - hparams, - num_workers, - jobid): - """Inference using multiple workers.""" - assert num_workers > 1 - - final_output_infer = inference_output_file - output_infer = "%s_%d" % (inference_output_file, jobid) - output_infer_done = "%s_done_%d" % (inference_output_file, jobid) - - # Read data - infer_data = load_data(inference_input_file, hparams) - - # Split data to multiple workers - total_load = len(infer_data) - load_per_worker = int((total_load - 1) / num_workers) + 1 - start_position = jobid * load_per_worker - end_position = min(start_position + load_per_worker, total_load) - infer_data = infer_data[start_position:end_position] - - with infer_model.graph.as_default(): - sess.run(infer_model.iterator.initializer, - { - infer_model.src_placeholder: infer_data, - infer_model.batch_size_placeholder: hparams.infer_batch_size - }) - # Decode - utils.print_out("# Start decoding") - nmt_utils.decode_and_evaluate( - "infer", - loaded_infer_model, - sess, - output_infer, - ref_file=None, - metrics=hparams.metrics, - subword_option=hparams.subword_option, - beam_width=hparams.beam_width, - tgt_eos=hparams.eos, - num_translations_per_input=hparams.num_translations_per_input, - infer_mode=hparams.infer_mode) - - # Change file name to indicate the file writing is completed. - tf.gfile.Rename(output_infer, output_infer_done, overwrite=True) - - # Job 0 is responsible for the clean up. - if jobid != 0: - return - - # Now write all translations - with codecs.getwriter("utf-8")( - tf.gfile.GFile(final_output_infer, mode="wb")) as final_f: - for worker_id in range(num_workers): - worker_infer_done = "%s_done_%d" % (inference_output_file, worker_id) - while not tf.gfile.Exists(worker_infer_done): - utils.print_out(" waiting job %d to complete." % worker_id) - time.sleep(10) - - with codecs.getreader("utf-8")( - tf.gfile.GFile(worker_infer_done, mode="rb")) as f: - for translation in f: - final_f.write("%s" % translation) - - for worker_id in range(num_workers): - worker_infer_done = "%s_done_%d" % (inference_output_file, worker_id) - tf.gfile.Remove(worker_infer_done) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/model.py b/models/language_translation/tensorflow/gnmt/inference/fp32/model.py deleted file mode 100644 index f4afb0b5a..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/model.py +++ /dev/null @@ -1,899 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - - -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Basic sequence-to-sequence model with dynamic RNN support.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import collections -import numpy as np - -import tensorflow as tf - -import model_helper -from utils import iterator_utils -from utils import misc_utils as utils -from utils import vocab_utils - -utils.check_tensorflow_version() - -__all__ = ["BaseModel", "Model"] - - -class TrainOutputTuple(collections.namedtuple( - "TrainOutputTuple", ("train_summary", "train_loss", "predict_count", - "global_step", "word_count", "batch_size", "grad_norm", - "learning_rate"))): - """To allow for flexibily in returing different outputs.""" - pass - - -class EvalOutputTuple(collections.namedtuple( - "EvalOutputTuple", ("eval_loss", "predict_count", "batch_size"))): - """To allow for flexibily in returing different outputs.""" - pass - - -class InferOutputTuple(collections.namedtuple( - "InferOutputTuple", ("infer_logits", "infer_summary", "sample_id", - "sample_words"))): - """To allow for flexibily in returing different outputs.""" - pass - - -class BaseModel(object): - """Sequence-to-sequence base class. - """ - - def __init__(self, - hparams, - mode, - iterator, - source_vocab_table, - target_vocab_table, - reverse_target_vocab_table=None, - scope=None, - extra_args=None): - """Create the model. - - Args: - hparams: Hyperparameter configurations. - mode: TRAIN | EVAL | INFER - iterator: Dataset Iterator that feeds data. - source_vocab_table: Lookup table mapping source words to ids. - target_vocab_table: Lookup table mapping target words to ids. - reverse_target_vocab_table: Lookup table mapping ids to target words. Only - required in INFER mode. Defaults to None. - scope: scope of the model. - extra_args: model_helper.ExtraArgs, for passing customizable functions. - - """ - # Set params - self._set_params_initializer(hparams, mode, iterator, - source_vocab_table, target_vocab_table, - scope, extra_args) - - # Not used in general seq2seq models; when True, ignore decoder & training - self.extract_encoder_layers = (hasattr(hparams, "extract_encoder_layers") - and hparams.extract_encoder_layers) - - # Train graph - res = self.build_graph(hparams, scope=scope) - if not self.extract_encoder_layers: - self._set_train_or_infer(res, reverse_target_vocab_table, hparams) - - # Saver - self.saver = tf.train.Saver( - tf.global_variables(), max_to_keep=hparams.num_keep_ckpts) - - def _set_params_initializer(self, - hparams, - mode, - iterator, - source_vocab_table, - target_vocab_table, - scope, - extra_args=None): - """Set various params for self and initialize.""" - assert isinstance(iterator, iterator_utils.BatchedInput) - self.iterator = iterator - self.mode = mode - self.src_vocab_table = source_vocab_table - self.tgt_vocab_table = target_vocab_table - - self.src_vocab_size = hparams.src_vocab_size - self.tgt_vocab_size = hparams.tgt_vocab_size - self.num_gpus = hparams.num_gpus - self.time_major = hparams.time_major - - if hparams.use_char_encode: - assert (not self.time_major), ("Can't use time major for" - " char-level inputs.") - - self.dtype = tf.float32 - self.num_sampled_softmax = hparams.num_sampled_softmax - - # extra_args: to make it flexible for adding external customizable code - self.single_cell_fn = None - if extra_args: - self.single_cell_fn = extra_args.single_cell_fn - - # Set num units - self.num_units = hparams.num_units - - # Set num layers - self.num_encoder_layers = hparams.num_encoder_layers - self.num_decoder_layers = hparams.num_decoder_layers - assert self.num_encoder_layers - assert self.num_decoder_layers - - # Set num residual layers - if hasattr(hparams, "num_residual_layers"): # compatible common_test_utils - self.num_encoder_residual_layers = hparams.num_residual_layers - self.num_decoder_residual_layers = hparams.num_residual_layers - else: - self.num_encoder_residual_layers = hparams.num_encoder_residual_layers - self.num_decoder_residual_layers = hparams.num_decoder_residual_layers - - # Batch size - self.batch_size = tf.size(self.iterator.source_sequence_length) - - # Global step - self.global_step = tf.Variable(0, trainable=False) - - # Initializer - self.random_seed = hparams.random_seed - initializer = model_helper.get_initializer( - hparams.init_op, self.random_seed, hparams.init_weight) - tf.get_variable_scope().set_initializer(initializer) - - # Embeddings - if extra_args and extra_args.encoder_emb_lookup_fn: - self.encoder_emb_lookup_fn = extra_args.encoder_emb_lookup_fn - else: - self.encoder_emb_lookup_fn = tf.nn.embedding_lookup - self.init_embeddings(hparams, scope) - - def _set_train_or_infer(self, res, reverse_target_vocab_table, hparams): - """Set up training and inference.""" - if self.mode == tf.contrib.learn.ModeKeys.TRAIN: - self.train_loss = res[1] - self.word_count = tf.reduce_sum( - self.iterator.source_sequence_length) + tf.reduce_sum( - self.iterator.target_sequence_length) - elif self.mode == tf.contrib.learn.ModeKeys.EVAL: - self.eval_loss = res[1] - elif self.mode == tf.contrib.learn.ModeKeys.INFER: - self.infer_logits, _, self.final_context_state, self.sample_id = res - self.sample_words = reverse_target_vocab_table.lookup( - tf.to_int64(self.sample_id)) - - if self.mode != tf.contrib.learn.ModeKeys.INFER: - # Count the number of predicted words for compute ppl. - self.predict_count = tf.reduce_sum( - self.iterator.target_sequence_length) - - params = tf.trainable_variables() - - # Gradients and SGD update operation for training the model. - # Arrange for the embedding vars to appear at the beginning. - if self.mode == tf.contrib.learn.ModeKeys.TRAIN: - self.learning_rate = tf.constant(hparams.learning_rate) - # warm-up - self.learning_rate = self._get_learning_rate_warmup(hparams) - # decay - self.learning_rate = self._get_learning_rate_decay(hparams) - - # Optimizer - if hparams.optimizer == "sgd": - opt = tf.train.GradientDescentOptimizer(self.learning_rate) - elif hparams.optimizer == "adam": - opt = tf.train.AdamOptimizer(self.learning_rate) - else: - raise ValueError("Unknown optimizer type %s" % hparams.optimizer) - - # Gradients - gradients = tf.gradients( - self.train_loss, - params, - colocate_gradients_with_ops=hparams.colocate_gradients_with_ops) - - clipped_grads, grad_norm_summary, grad_norm = model_helper.gradient_clip( - gradients, max_gradient_norm=hparams.max_gradient_norm) - self.grad_norm_summary = grad_norm_summary - self.grad_norm = grad_norm - - self.update = opt.apply_gradients( - zip(clipped_grads, params), global_step=self.global_step) - - # Summary - self.train_summary = self._get_train_summary() - elif self.mode == tf.contrib.learn.ModeKeys.INFER: - self.infer_summary = self._get_infer_summary(hparams) - - # Print trainable variables - utils.print_out("# Trainable variables") - utils.print_out("Format: , , <(soft) device placement>") - for param in params: - utils.print_out(" %s, %s, %s" % (param.name, str(param.get_shape()), - param.op.device)) - - def _get_learning_rate_warmup(self, hparams): - """Get learning rate warmup.""" - warmup_steps = hparams.warmup_steps - warmup_scheme = hparams.warmup_scheme - utils.print_out(" learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" % - (hparams.learning_rate, warmup_steps, warmup_scheme)) - - # Apply inverse decay if global steps less than warmup steps. - # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3) - # When step < warmup_steps, - # learing_rate *= warmup_factor ** (warmup_steps - step) - if warmup_scheme == "t2t": - # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller - warmup_factor = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = warmup_factor**( - tf.to_float(warmup_steps - self.global_step)) - else: - raise ValueError("Unknown warmup scheme %s" % warmup_scheme) - - return tf.cond( - self.global_step < hparams.warmup_steps, - lambda: inv_decay * self.learning_rate, - lambda: self.learning_rate, - name="learning_rate_warump_cond") - - def _get_decay_info(self, hparams): - """Return decay info based on decay_scheme.""" - if hparams.decay_scheme in ["luong5", "luong10", "luong234"]: - decay_factor = 0.5 - if hparams.decay_scheme == "luong5": - start_decay_step = int(hparams.num_train_steps / 2) - decay_times = 5 - elif hparams.decay_scheme == "luong10": - start_decay_step = int(hparams.num_train_steps / 2) - decay_times = 10 - elif hparams.decay_scheme == "luong234": - start_decay_step = int(hparams.num_train_steps * 2 / 3) - decay_times = 4 - remain_steps = hparams.num_train_steps - start_decay_step - decay_steps = int(remain_steps / decay_times) - elif not hparams.decay_scheme: # no decay - start_decay_step = hparams.num_train_steps - decay_steps = 0 - decay_factor = 1.0 - elif hparams.decay_scheme: - raise ValueError("Unknown decay scheme %s" % hparams.decay_scheme) - return start_decay_step, decay_steps, decay_factor - - def _get_learning_rate_decay(self, hparams): - """Get learning rate decay.""" - start_decay_step, decay_steps, decay_factor = self._get_decay_info(hparams) - utils.print_out(" decay_scheme=%s, start_decay_step=%d, decay_steps %d, " - "decay_factor %g" % (hparams.decay_scheme, - start_decay_step, - decay_steps, - decay_factor)) - - return tf.cond( - self.global_step < start_decay_step, - lambda: self.learning_rate, - lambda: tf.train.exponential_decay( - self.learning_rate, - (self.global_step - start_decay_step), - decay_steps, decay_factor, staircase=True), - name="learning_rate_decay_cond") - - def init_embeddings(self, hparams, scope): - """Init embeddings.""" - self.embedding_encoder, self.embedding_decoder = ( - model_helper.create_emb_for_encoder_and_decoder( - share_vocab=hparams.share_vocab, - src_vocab_size=self.src_vocab_size, - tgt_vocab_size=self.tgt_vocab_size, - src_embed_size=self.num_units, - tgt_embed_size=self.num_units, - num_enc_partitions=hparams.num_enc_emb_partitions, - num_dec_partitions=hparams.num_dec_emb_partitions, - src_vocab_file=hparams.src_vocab_file, - tgt_vocab_file=hparams.tgt_vocab_file, - src_embed_file=hparams.src_embed_file, - tgt_embed_file=hparams.tgt_embed_file, - use_char_encode=hparams.use_char_encode, - scope=scope,)) - - def _get_train_summary(self): - """Get train summary.""" - train_summary = tf.summary.merge( - [tf.summary.scalar("lr", self.learning_rate), - tf.summary.scalar("train_loss", self.train_loss)] + - self.grad_norm_summary) - return train_summary - - def train(self, sess): - """Execute train graph.""" - assert self.mode == tf.contrib.learn.ModeKeys.TRAIN - output_tuple = TrainOutputTuple(train_summary=self.train_summary, - train_loss=self.train_loss, - predict_count=self.predict_count, - global_step=self.global_step, - word_count=self.word_count, - batch_size=self.batch_size, - grad_norm=self.grad_norm, - learning_rate=self.learning_rate) - return sess.run([self.update, output_tuple]) - - def eval(self, sess): - """Execute eval graph.""" - assert self.mode == tf.contrib.learn.ModeKeys.EVAL - output_tuple = EvalOutputTuple(eval_loss=self.eval_loss, - predict_count=self.predict_count, - batch_size=self.batch_size) - return sess.run(output_tuple) - - def build_graph(self, hparams, scope=None): - """Subclass must implement this method. - - Creates a sequence-to-sequence model with dynamic RNN decoder API. - Args: - hparams: Hyperparameter configurations. - scope: VariableScope for the created subgraph; default "dynamic_seq2seq". - - Returns: - A tuple of the form (logits, loss_tuple, final_context_state, sample_id), - where: - logits: float32 Tensor [batch_size x num_decoder_symbols]. - loss: loss = the total loss / batch_size. - final_context_state: the final state of decoder RNN. - sample_id: sampling indices. - - Raises: - ValueError: if encoder_type differs from mono and bi, or - attention_option is not (luong | scaled_luong | - bahdanau | normed_bahdanau). - """ - utils.print_out("# Creating %s graph ..." % self.mode) - - # Projection - if not self.extract_encoder_layers: - with tf.variable_scope(scope or "build_network"): - with tf.variable_scope("decoder/output_projection"): - self.output_layer = tf.layers.Dense( - self.tgt_vocab_size, use_bias=False, name="output_projection") - - with tf.variable_scope(scope or "dynamic_seq2seq", dtype=self.dtype): - # Encoder - if hparams.language_model: # no encoder for language modeling - utils.print_out(" language modeling: no encoder") - self.encoder_outputs = None - encoder_state = None - else: - self.encoder_outputs, encoder_state = self._build_encoder(hparams) - - # Skip decoder if extracting only encoder layers - if self.extract_encoder_layers: - return - - # Decoder - logits, decoder_cell_outputs, sample_id, final_context_state = ( - self._build_decoder(self.encoder_outputs, encoder_state, hparams)) - - # Loss - if self.mode != tf.contrib.learn.ModeKeys.INFER: - with tf.device(model_helper.get_device_str(self.num_encoder_layers - 1, - self.num_gpus)): - loss = self._compute_loss(logits, decoder_cell_outputs) - else: - loss = tf.constant(0.0) - - return logits, loss, final_context_state, sample_id - - @abc.abstractmethod - def _build_encoder(self, hparams): - """Subclass must implement this. - - Build and run an RNN encoder. - - Args: - hparams: Hyperparameters configurations. - - Returns: - A tuple of encoder_outputs and encoder_state. - """ - pass - - def _build_encoder_cell(self, hparams, num_layers, num_residual_layers, - base_gpu=0): - """Build a multi-layer RNN cell that can be used by encoder.""" - - return model_helper.create_rnn_cell( - unit_type=hparams.unit_type, - num_units=self.num_units, - num_layers=num_layers, - num_residual_layers=num_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=hparams.num_gpus, - mode=self.mode, - base_gpu=base_gpu, - single_cell_fn=self.single_cell_fn) - - def _get_infer_maximum_iterations(self, hparams, source_sequence_length): - """Maximum decoding steps at inference time.""" - if hparams.tgt_max_len_infer: - maximum_iterations = hparams.tgt_max_len_infer - utils.print_out(" decoding maximum_iterations %d" % maximum_iterations) - else: - # TODO(thangluong): add decoding_length_factor flag - decoding_length_factor = 2.0 - max_encoder_length = tf.reduce_max(source_sequence_length) - maximum_iterations = tf.to_int32(tf.round( - tf.to_float(max_encoder_length) * decoding_length_factor)) - return maximum_iterations - - def _build_decoder(self, encoder_outputs, encoder_state, hparams): - """Build and run a RNN decoder with a final projection layer. - - Args: - encoder_outputs: The outputs of encoder for every time step. - encoder_state: The final state of the encoder. - hparams: The Hyperparameters configurations. - - Returns: - A tuple of final logits and final decoder state: - logits: size [time, batch_size, vocab_size] when time_major=True. - """ - tgt_sos_id = tf.cast(self.tgt_vocab_table.lookup(tf.constant(hparams.sos)), - tf.int32) - tgt_eos_id = tf.cast(self.tgt_vocab_table.lookup(tf.constant(hparams.eos)), - tf.int32) - iterator = self.iterator - - # maximum_iteration: The maximum decoding steps. - maximum_iterations = self._get_infer_maximum_iterations( - hparams, iterator.source_sequence_length) - - # Decoder. - with tf.variable_scope("decoder") as decoder_scope: - cell, decoder_initial_state = self._build_decoder_cell( - hparams, encoder_outputs, encoder_state, - iterator.source_sequence_length) - - # Optional ops depends on which mode we are in and which loss function we - # are using. - logits = tf.no_op() - decoder_cell_outputs = None - - # Train or eval - if self.mode != tf.contrib.learn.ModeKeys.INFER: - # decoder_emp_inp: [max_time, batch_size, num_units] - target_input = iterator.target_input - if self.time_major: - target_input = tf.transpose(target_input) - decoder_emb_inp = tf.nn.embedding_lookup( - self.embedding_decoder, target_input) - - # Helper - helper = tf.contrib.seq2seq.TrainingHelper( - decoder_emb_inp, iterator.target_sequence_length, - time_major=self.time_major) - - # Decoder - my_decoder = tf.contrib.seq2seq.BasicDecoder( - cell, - helper, - decoder_initial_state,) - - # Dynamic decoding - outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( - my_decoder, - output_time_major=self.time_major, - swap_memory=True, - scope=decoder_scope) - - sample_id = outputs.sample_id - - if self.num_sampled_softmax > 0: - # Note: this is required when using sampled_softmax_loss. - decoder_cell_outputs = outputs.rnn_output - - # Note: there's a subtle difference here between train and inference. - # We could have set output_layer when create my_decoder - # and shared more code between train and inference. - # We chose to apply the output_layer to all timesteps for speed: - # 10% improvements for small models & 20% for larger ones. - # If memory is a concern, we should apply output_layer per timestep. - num_layers = self.num_decoder_layers - num_gpus = self.num_gpus - device_id = num_layers if num_layers < num_gpus else (num_layers - 1) - # Colocate output layer with the last RNN cell if there is no extra GPU - # available. Otherwise, put last layer on a separate GPU. - with tf.device(model_helper.get_device_str(device_id, num_gpus)): - logits = self.output_layer(outputs.rnn_output) - - if self.num_sampled_softmax > 0: - logits = tf.no_op() # unused when using sampled softmax loss. - - # Inference - else: - infer_mode = hparams.infer_mode - start_tokens = tf.fill([self.batch_size], tgt_sos_id) - end_token = tgt_eos_id - utils.print_out( - " decoder: infer_mode=%sbeam_width=%d, length_penalty=%f" % ( - infer_mode, hparams.beam_width, hparams.length_penalty_weight)) - - if infer_mode == "beam_search": - beam_width = hparams.beam_width - length_penalty_weight = hparams.length_penalty_weight - - my_decoder = tf.contrib.seq2seq.BeamSearchDecoder( - cell=cell, - embedding=self.embedding_decoder, - start_tokens=start_tokens, - end_token=end_token, - initial_state=decoder_initial_state, - beam_width=beam_width, - output_layer=self.output_layer, - length_penalty_weight=length_penalty_weight) - elif infer_mode == "sample": - # Helper - sampling_temperature = hparams.sampling_temperature - assert sampling_temperature > 0.0, ( - "sampling_temperature must greater than 0.0 when using sample" - " decoder.") - helper = tf.contrib.seq2seq.SampleEmbeddingHelper( - self.embedding_decoder, start_tokens, end_token, - softmax_temperature=sampling_temperature, - seed=self.random_seed) - elif infer_mode == "greedy": - helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( - self.embedding_decoder, start_tokens, end_token) - else: - raise ValueError("Unknown infer_mode '%s'", infer_mode) - - if infer_mode != "beam_search": - my_decoder = tf.contrib.seq2seq.BasicDecoder( - cell, - helper, - decoder_initial_state, - output_layer=self.output_layer # applied per timestep - ) - - # Dynamic decoding - outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode( - my_decoder, - maximum_iterations=maximum_iterations, - output_time_major=self.time_major, - swap_memory=True, - scope=decoder_scope) - - if infer_mode == "beam_search": - sample_id = outputs.predicted_ids - else: - logits = outputs.rnn_output - sample_id = outputs.sample_id - - return logits, decoder_cell_outputs, sample_id, final_context_state - - def get_max_time(self, tensor): - time_axis = 0 if self.time_major else 1 - return tensor.shape[time_axis].value or tf.shape(tensor)[time_axis] - - @abc.abstractmethod - def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, - source_sequence_length): - """Subclass must implement this. - - Args: - hparams: Hyperparameters configurations. - encoder_outputs: The outputs of encoder for every time step. - encoder_state: The final state of the encoder. - source_sequence_length: sequence length of encoder_outputs. - - Returns: - A tuple of a multi-layer RNN cell used by decoder and the intial state of - the decoder RNN. - """ - pass - - def _softmax_cross_entropy_loss( - self, logits, decoder_cell_outputs, labels): - """Compute softmax loss or sampled softmax loss.""" - if self.num_sampled_softmax > 0: - - is_sequence = (decoder_cell_outputs.shape.ndims == 3) - - if is_sequence: - labels = tf.reshape(labels, [-1, 1]) - inputs = tf.reshape(decoder_cell_outputs, [-1, self.num_units]) - - crossent = tf.nn.sampled_softmax_loss( - weights=tf.transpose(self.output_layer.kernel), - biases=self.output_layer.bias or tf.zeros([self.tgt_vocab_size]), - labels=labels, - inputs=inputs, - num_sampled=self.num_sampled_softmax, - num_classes=self.tgt_vocab_size, - partition_strategy="div", - seed=self.random_seed) - - if is_sequence: - if self.time_major: - crossent = tf.reshape(crossent, [-1, self.batch_size]) - else: - crossent = tf.reshape(crossent, [self.batch_size, -1]) - - else: - crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - - return crossent - - def _compute_loss(self, logits, decoder_cell_outputs): - """Compute optimization loss.""" - target_output = self.iterator.target_output - if self.time_major: - target_output = tf.transpose(target_output) - max_time = self.get_max_time(target_output) - - crossent = self._softmax_cross_entropy_loss( - logits, decoder_cell_outputs, target_output) - - target_weights = tf.sequence_mask( - self.iterator.target_sequence_length, max_time, dtype=self.dtype) - if self.time_major: - target_weights = tf.transpose(target_weights) - - loss = tf.reduce_sum( - crossent * target_weights) / tf.to_float(self.batch_size) - return loss - - def _get_infer_summary(self, hparams): - del hparams - return tf.no_op() - - def infer(self, sess): - assert self.mode == tf.contrib.learn.ModeKeys.INFER - output_tuple = InferOutputTuple(infer_logits=self.infer_logits, - infer_summary=self.infer_summary, - sample_id=self.sample_id, - sample_words=self.sample_words) - return sess.run(output_tuple) - - def decode(self, sess): - """Decode a batch. - - Args: - sess: tensorflow session to use. - - Returns: - A tuple consiting of outputs, infer_summary. - outputs: of size [batch_size, time] - """ - output_tuple = self.infer(sess) - sample_words = output_tuple.sample_words - infer_summary = output_tuple.infer_summary - - # make sure outputs is of shape [batch_size, time] or [beam_width, - # batch_size, time] when using beam search. - if self.time_major: - sample_words = sample_words.transpose() - elif sample_words.ndim == 3: - # beam search output in [batch_size, time, beam_width] shape. - sample_words = sample_words.transpose([2, 0, 1]) - return sample_words, infer_summary - - def build_encoder_states(self, include_embeddings=False): - """Stack encoder states and return tensor [batch, length, layer, size].""" - assert self.mode == tf.contrib.learn.ModeKeys.INFER - if include_embeddings: - stack_state_list = tf.stack( - [self.encoder_emb_inp] + self.encoder_state_list, 2) - else: - stack_state_list = tf.stack(self.encoder_state_list, 2) - - # transform from [length, batch, ...] -> [batch, length, ...] - if self.time_major: - stack_state_list = tf.transpose(stack_state_list, [1, 0, 2, 3]) - - return stack_state_list - - -class Model(BaseModel): - """Sequence-to-sequence dynamic model. - - This class implements a multi-layer recurrent neural network as encoder, - and a multi-layer recurrent neural network decoder. - """ - - def _build_encoder_from_sequence(self, hparams, sequence, sequence_length): - """Build an encoder from a sequence. - - Args: - hparams: hyperparameters. - sequence: tensor with input sequence data. - sequence_length: tensor with length of the input sequence. - - Returns: - encoder_outputs: RNN encoder outputs. - encoder_state: RNN encoder state. - - Raises: - ValueError: if encoder_type is neither "uni" nor "bi". - """ - num_layers = self.num_encoder_layers - num_residual_layers = self.num_encoder_residual_layers - - if self.time_major: - sequence = tf.transpose(sequence) - - with tf.variable_scope("encoder") as scope: - dtype = scope.dtype - - self.encoder_emb_inp = self.encoder_emb_lookup_fn( - self.embedding_encoder, sequence) - - # Encoder_outputs: [max_time, batch_size, num_units] - if hparams.encoder_type == "uni": - utils.print_out(" num_layers = %d, num_residual_layers=%d" % - (num_layers, num_residual_layers)) - cell = self._build_encoder_cell(hparams, num_layers, - num_residual_layers) - - encoder_outputs, encoder_state = tf.nn.dynamic_rnn( - cell, - self.encoder_emb_inp, - dtype=dtype, - sequence_length=sequence_length, - time_major=self.time_major, - swap_memory=True) - elif hparams.encoder_type == "bi": - num_bi_layers = int(num_layers / 2) - num_bi_residual_layers = int(num_residual_layers / 2) - utils.print_out(" num_bi_layers = %d, num_bi_residual_layers=%d" % - (num_bi_layers, num_bi_residual_layers)) - - encoder_outputs, bi_encoder_state = ( - self._build_bidirectional_rnn( - inputs=self.encoder_emb_inp, - sequence_length=sequence_length, - dtype=dtype, - hparams=hparams, - num_bi_layers=num_bi_layers, - num_bi_residual_layers=num_bi_residual_layers)) - - if num_bi_layers == 1: - encoder_state = bi_encoder_state - else: - # alternatively concat forward and backward states - encoder_state = [] - for layer_id in range(num_bi_layers): - encoder_state.append(bi_encoder_state[0][layer_id]) # forward - encoder_state.append(bi_encoder_state[1][layer_id]) # backward - encoder_state = tuple(encoder_state) - else: - raise ValueError("Unknown encoder_type %s" % hparams.encoder_type) - - # Use the top layer for now - self.encoder_state_list = [encoder_outputs] - - return encoder_outputs, encoder_state - - def _build_encoder(self, hparams): - """Build encoder from source.""" - utils.print_out("# Build a basic encoder") - return self._build_encoder_from_sequence( - hparams, self.iterator.source, self.iterator.source_sequence_length) - - def _build_bidirectional_rnn(self, inputs, sequence_length, - dtype, hparams, - num_bi_layers, - num_bi_residual_layers, - base_gpu=0): - """Create and call biddirectional RNN cells. - - Args: - num_residual_layers: Number of residual layers from top to bottom. For - example, if `num_bi_layers=4` and `num_residual_layers=2`, the last 2 RNN - layers in each RNN cell will be wrapped with `ResidualWrapper`. - base_gpu: The gpu device id to use for the first forward RNN layer. The - i-th forward RNN layer will use `(base_gpu + i) % num_gpus` as its - device id. The `base_gpu` for backward RNN cell is `(base_gpu + - num_bi_layers)`. - - Returns: - The concatenated bidirectional output and the bidirectional RNN cell"s - state. - """ - # Construct forward and backward cells - fw_cell = self._build_encoder_cell(hparams, - num_bi_layers, - num_bi_residual_layers, - base_gpu=base_gpu) - bw_cell = self._build_encoder_cell(hparams, - num_bi_layers, - num_bi_residual_layers, - base_gpu=(base_gpu + num_bi_layers)) - - bi_outputs, bi_state = tf.nn.bidirectional_dynamic_rnn( - fw_cell, - bw_cell, - inputs, - dtype=dtype, - sequence_length=sequence_length, - time_major=self.time_major, - swap_memory=True) - - return tf.concat(bi_outputs, -1), bi_state - - def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state, - source_sequence_length, base_gpu=0): - """Build an RNN cell that can be used by decoder.""" - # We only make use of encoder_outputs in attention-based models - if hparams.attention: - raise ValueError("BasicModel doesn't support attention.") - - cell = model_helper.create_rnn_cell( - unit_type=hparams.unit_type, - num_units=self.num_units, - num_layers=self.num_decoder_layers, - num_residual_layers=self.num_decoder_residual_layers, - forget_bias=hparams.forget_bias, - dropout=hparams.dropout, - num_gpus=self.num_gpus, - mode=self.mode, - single_cell_fn=self.single_cell_fn, - base_gpu=base_gpu - ) - - if hparams.language_model: - encoder_state = cell.zero_state(self.batch_size, self.dtype) - elif not hparams.pass_hidden_state: - raise ValueError("For non-attentional model, " - "pass_hidden_state needs to be set to True") - - # For beam search, we need to replicate encoder infos beam_width times - if (self.mode == tf.contrib.learn.ModeKeys.INFER and - hparams.infer_mode == "beam_search"): - decoder_initial_state = tf.contrib.seq2seq.tile_batch( - encoder_state, multiplier=hparams.beam_width) - else: - decoder_initial_state = encoder_state - - return cell, decoder_initial_state diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/model_helper.py b/models/language_translation/tensorflow/gnmt/inference/fp32/model_helper.py deleted file mode 100644 index f8f8eee23..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/model_helper.py +++ /dev/null @@ -1,664 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for building models.""" -from __future__ import print_function - -import collections -import os -import time -import numpy as np -import six -import tensorflow as tf - -from tensorflow.python.ops import lookup_ops -from utils import iterator_utils -from utils import misc_utils as utils -from utils import vocab_utils - -__all__ = [ - "get_initializer", "get_device_str", "create_train_model", - "create_eval_model", "create_infer_model", - "create_emb_for_encoder_and_decoder", "create_rnn_cell", "gradient_clip", - "create_or_load_model", "load_model", "avg_checkpoints", - "compute_perplexity" -] - -# If a vocab size is greater than this value, put the embedding on cpu instead -VOCAB_SIZE_THRESHOLD_CPU = 50000 - - -def get_initializer(init_op, seed=None, init_weight=None): - """Create an initializer. init_weight is only for uniform.""" - if init_op == "uniform": - assert init_weight - return tf.random_uniform_initializer( - -init_weight, init_weight, seed=seed) - elif init_op == "glorot_normal": - return tf.keras.initializers.glorot_normal( - seed=seed) - elif init_op == "glorot_uniform": - return tf.keras.initializers.glorot_uniform( - seed=seed) - else: - raise ValueError("Unknown init_op %s" % init_op) - - -def get_device_str(device_id, num_gpus): - """Return a device string for multi-GPU setup.""" - if num_gpus == 0: - return "/cpu:0" - device_str_output = "/gpu:%d" % (device_id % num_gpus) - return device_str_output - - -class ExtraArgs(collections.namedtuple( - "ExtraArgs", ("single_cell_fn", "model_device_fn", - "attention_mechanism_fn", "encoder_emb_lookup_fn"))): - pass - - -class TrainModel( - collections.namedtuple("TrainModel", ("graph", "model", "iterator", - "skip_count_placeholder"))): - pass - - -def create_train_model( - model_creator, hparams, scope=None, num_workers=1, jobid=0, - extra_args=None): - """Create train graph, model, and iterator.""" - src_file = "%s.%s" % (hparams.train_prefix, hparams.src) - tgt_file = "%s.%s" % (hparams.train_prefix, hparams.tgt) - src_vocab_file = hparams.src_vocab_file - tgt_vocab_file = hparams.tgt_vocab_file - - graph = tf.Graph() - - with graph.as_default(), tf.container(scope or "train"): - src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables( - src_vocab_file, tgt_vocab_file, hparams.share_vocab) - - src_dataset = tf.data.TextLineDataset(tf.gfile.Glob(src_file)) - tgt_dataset = tf.data.TextLineDataset(tf.gfile.Glob(tgt_file)) - skip_count_placeholder = tf.placeholder(shape=(), dtype=tf.int64) - - iterator = iterator_utils.get_iterator( - src_dataset, - tgt_dataset, - src_vocab_table, - tgt_vocab_table, - batch_size=hparams.batch_size, - sos=hparams.sos, - eos=hparams.eos, - random_seed=hparams.random_seed, - num_buckets=hparams.num_buckets, - src_max_len=hparams.src_max_len, - tgt_max_len=hparams.tgt_max_len, - skip_count=skip_count_placeholder, - num_shards=num_workers, - shard_index=jobid, - use_char_encode=hparams.use_char_encode) - - # Note: One can set model_device_fn to - # `tf.train.replica_device_setter(ps_tasks)` for distributed training. - model_device_fn = None - if extra_args: - model_device_fn = extra_args.model_device_fn - with tf.device(model_device_fn): - model = model_creator( - hparams, - iterator=iterator, - mode=tf.contrib.learn.ModeKeys.TRAIN, - source_vocab_table=src_vocab_table, - target_vocab_table=tgt_vocab_table, - scope=scope, - extra_args=extra_args) - - return TrainModel( - graph=graph, - model=model, - iterator=iterator, - skip_count_placeholder=skip_count_placeholder) - - -class EvalModel( - collections.namedtuple("EvalModel", - ("graph", "model", "src_file_placeholder", - "tgt_file_placeholder", "iterator"))): - pass - - -def create_eval_model(model_creator, hparams, scope=None, extra_args=None): - """Create train graph, model, src/tgt file holders, and iterator.""" - src_vocab_file = hparams.src_vocab_file - tgt_vocab_file = hparams.tgt_vocab_file - graph = tf.Graph() - - with graph.as_default(), tf.container(scope or "eval"): - src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables( - src_vocab_file, tgt_vocab_file, hparams.share_vocab) - reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file( - tgt_vocab_file, default_value=vocab_utils.UNK) - - src_file_placeholder = tf.placeholder(shape=(), dtype=tf.string) - tgt_file_placeholder = tf.placeholder(shape=(), dtype=tf.string) - src_dataset = tf.data.TextLineDataset(src_file_placeholder) - tgt_dataset = tf.data.TextLineDataset(tgt_file_placeholder) - iterator = iterator_utils.get_iterator( - src_dataset, - tgt_dataset, - src_vocab_table, - tgt_vocab_table, - hparams.batch_size, - sos=hparams.sos, - eos=hparams.eos, - random_seed=hparams.random_seed, - num_buckets=hparams.num_buckets, - src_max_len=hparams.src_max_len_infer, - tgt_max_len=hparams.tgt_max_len_infer, - use_char_encode=hparams.use_char_encode) - model = model_creator( - hparams, - iterator=iterator, - mode=tf.contrib.learn.ModeKeys.EVAL, - source_vocab_table=src_vocab_table, - target_vocab_table=tgt_vocab_table, - reverse_target_vocab_table=reverse_tgt_vocab_table, - scope=scope, - extra_args=extra_args) - return EvalModel( - graph=graph, - model=model, - src_file_placeholder=src_file_placeholder, - tgt_file_placeholder=tgt_file_placeholder, - iterator=iterator) - - -class InferModel( - collections.namedtuple("InferModel", - ("graph", "model", "src_placeholder", - "batch_size_placeholder", "iterator"))): - pass - - -def create_infer_model(model_creator, hparams, scope=None, extra_args=None): - """Create inference model.""" - graph = tf.Graph() - src_vocab_file = hparams.src_vocab_file - tgt_vocab_file = hparams.tgt_vocab_file - - with graph.as_default(), tf.container(scope or "infer"): - src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables( - src_vocab_file, tgt_vocab_file, hparams.share_vocab) - reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file( - tgt_vocab_file, default_value=vocab_utils.UNK) - - src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) - batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64) - - src_dataset = tf.data.Dataset.from_tensor_slices( - src_placeholder) - iterator = iterator_utils.get_infer_iterator( - src_dataset, - src_vocab_table, - batch_size=batch_size_placeholder, - eos=hparams.eos, - src_max_len=hparams.src_max_len_infer, - use_char_encode=hparams.use_char_encode) - model = model_creator( - hparams, - iterator=iterator, - mode=tf.contrib.learn.ModeKeys.INFER, - source_vocab_table=src_vocab_table, - target_vocab_table=tgt_vocab_table, - reverse_target_vocab_table=reverse_tgt_vocab_table, - scope=scope, - extra_args=extra_args) - return InferModel( - graph=graph, - model=model, - src_placeholder=src_placeholder, - batch_size_placeholder=batch_size_placeholder, - iterator=iterator) - - -def _get_embed_device(vocab_size): - """Decide on which device to place an embed matrix given its vocab size.""" - if vocab_size > VOCAB_SIZE_THRESHOLD_CPU: - return "/cpu:0" - else: - return "/gpu:0" - - -def _create_pretrained_emb_from_txt( - vocab_file, embed_file, num_trainable_tokens=3, dtype=tf.float32, - scope=None): - """Load pretrain embeding from embed_file, and return an embedding matrix. - - Args: - embed_file: Path to a Glove formated embedding txt file. - num_trainable_tokens: Make the first n tokens in the vocab file as trainable - variables. Default is 3, which is "", "" and "". - """ - vocab, _ = vocab_utils.load_vocab(vocab_file) - trainable_tokens = vocab[:num_trainable_tokens] - - utils.print_out("# Using pretrained embedding: %s." % embed_file) - utils.print_out(" with trainable tokens: ") - - emb_dict, emb_size = vocab_utils.load_embed_txt(embed_file) - for token in trainable_tokens: - utils.print_out(" %s" % token) - if token not in emb_dict: - emb_dict[token] = [0.0] * emb_size - - emb_mat = np.array( - [emb_dict[token] for token in vocab], dtype=dtype.as_numpy_dtype()) - emb_mat = tf.constant(emb_mat) - emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1]) - with tf.variable_scope(scope or "pretrain_embeddings", dtype=dtype) as scope: - with tf.device(_get_embed_device(num_trainable_tokens)): - emb_mat_var = tf.get_variable( - "emb_mat_var", [num_trainable_tokens, emb_size]) - return tf.concat([emb_mat_var, emb_mat_const], 0) - - -def _create_or_load_embed(embed_name, vocab_file, embed_file, - vocab_size, embed_size, dtype): - """Create a new or load an existing embedding matrix.""" - if vocab_file and embed_file: - embedding = _create_pretrained_emb_from_txt(vocab_file, embed_file) - else: - with tf.device(_get_embed_device(vocab_size)): - embedding = tf.get_variable( - embed_name, [vocab_size, embed_size], dtype) - return embedding - - -def create_emb_for_encoder_and_decoder(share_vocab, - src_vocab_size, - tgt_vocab_size, - src_embed_size, - tgt_embed_size, - dtype=tf.float32, - num_enc_partitions=0, - num_dec_partitions=0, - src_vocab_file=None, - tgt_vocab_file=None, - src_embed_file=None, - tgt_embed_file=None, - use_char_encode=False, - scope=None): - """Create embedding matrix for both encoder and decoder. - - Args: - share_vocab: A boolean. Whether to share embedding matrix for both - encoder and decoder. - src_vocab_size: An integer. The source vocab size. - tgt_vocab_size: An integer. The target vocab size. - src_embed_size: An integer. The embedding dimension for the encoder's - embedding. - tgt_embed_size: An integer. The embedding dimension for the decoder's - embedding. - dtype: dtype of the embedding matrix. Default to float32. - num_enc_partitions: number of partitions used for the encoder's embedding - vars. - num_dec_partitions: number of partitions used for the decoder's embedding - vars. - scope: VariableScope for the created subgraph. Default to "embedding". - - Returns: - embedding_encoder: Encoder's embedding matrix. - embedding_decoder: Decoder's embedding matrix. - - Raises: - ValueError: if use share_vocab but source and target have different vocab - size. - """ - if num_enc_partitions <= 1: - enc_partitioner = None - else: - # Note: num_partitions > 1 is required for distributed training due to - # embedding_lookup tries to colocate single partition-ed embedding variable - # with lookup ops. This may cause embedding variables being placed on worker - # jobs. - enc_partitioner = tf.fixed_size_partitioner(num_enc_partitions) - - if num_dec_partitions <= 1: - dec_partitioner = None - else: - # Note: num_partitions > 1 is required for distributed training due to - # embedding_lookup tries to colocate single partition-ed embedding variable - # with lookup ops. This may cause embedding variables being placed on worker - # jobs. - dec_partitioner = tf.fixed_size_partitioner(num_dec_partitions) - - if src_embed_file and enc_partitioner: - raise ValueError( - "Can't set num_enc_partitions > 1 when using pretrained encoder " - "embedding") - - if tgt_embed_file and dec_partitioner: - raise ValueError( - "Can't set num_dec_partitions > 1 when using pretrained decdoer " - "embedding") - - with tf.variable_scope( - scope or "embeddings", dtype=dtype, partitioner=enc_partitioner) as scope: - # Share embedding - if share_vocab: - if src_vocab_size != tgt_vocab_size: - raise ValueError("Share embedding but different src/tgt vocab sizes" - " %d vs. %d" % (src_vocab_size, tgt_vocab_size)) - assert src_embed_size == tgt_embed_size - utils.print_out("# Use the same embedding for source and target") - vocab_file = src_vocab_file or tgt_vocab_file - embed_file = src_embed_file or tgt_embed_file - - embedding_encoder = _create_or_load_embed( - "embedding_share", vocab_file, embed_file, - src_vocab_size, src_embed_size, dtype) - embedding_decoder = embedding_encoder - else: - if not use_char_encode: - with tf.variable_scope("encoder", partitioner=enc_partitioner): - embedding_encoder = _create_or_load_embed( - "embedding_encoder", src_vocab_file, src_embed_file, - src_vocab_size, src_embed_size, dtype) - else: - embedding_encoder = None - - with tf.variable_scope("decoder", partitioner=dec_partitioner): - embedding_decoder = _create_or_load_embed( - "embedding_decoder", tgt_vocab_file, tgt_embed_file, - tgt_vocab_size, tgt_embed_size, dtype) - - return embedding_encoder, embedding_decoder - - -def _single_cell(unit_type, num_units, forget_bias, dropout, mode, - residual_connection=False, device_str=None, residual_fn=None): - """Create an instance of a single RNN cell.""" - # dropout (= 1 - keep_prob) is set to 0 during eval and infer - dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 - - # Cell Type - if unit_type == "lstm": - utils.print_out(" LSTM, forget_bias=%g" % forget_bias, new_line=False) - single_cell = tf.contrib.rnn.BasicLSTMCell( - num_units, - forget_bias=forget_bias) - elif unit_type == "gru": - utils.print_out(" GRU", new_line=False) - single_cell = tf.contrib.rnn.GRUCell(num_units) - elif unit_type == "layer_norm_lstm": - utils.print_out(" Layer Normalized LSTM, forget_bias=%g" % forget_bias, - new_line=False) - single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( - num_units, - forget_bias=forget_bias, - layer_norm=True) - elif unit_type == "nas": - utils.print_out(" NASCell", new_line=False) - single_cell = tf.contrib.rnn.NASCell(num_units) - else: - raise ValueError("Unknown unit type %s!" % unit_type) - - # Dropout (= 1 - keep_prob) - if dropout > 0.0: - single_cell = tf.contrib.rnn.DropoutWrapper( - cell=single_cell, input_keep_prob=(1.0 - dropout)) - utils.print_out(" %s, dropout=%g " % (type(single_cell).__name__, dropout), - new_line=False) - - # Residual - if residual_connection: - single_cell = tf.contrib.rnn.ResidualWrapper( - single_cell, residual_fn=residual_fn) - utils.print_out(" %s" % type(single_cell).__name__, new_line=False) - - # Device Wrapper - if device_str: - single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str) - utils.print_out(" %s, device=%s" % - (type(single_cell).__name__, device_str), new_line=False) - - return single_cell - - -def _cell_list(unit_type, num_units, num_layers, num_residual_layers, - forget_bias, dropout, mode, num_gpus, base_gpu=0, - single_cell_fn=None, residual_fn=None): - """Create a list of RNN cells.""" - if not single_cell_fn: - single_cell_fn = _single_cell - - # Multi-GPU - cell_list = [] - for i in range(num_layers): - utils.print_out(" cell %d" % i, new_line=False) - single_cell = single_cell_fn( - unit_type=unit_type, - num_units=num_units, - forget_bias=forget_bias, - dropout=dropout, - mode=mode, - residual_connection=(i >= num_layers - num_residual_layers), - device_str=get_device_str(i + base_gpu, num_gpus), - residual_fn=residual_fn - ) - utils.print_out("") - cell_list.append(single_cell) - - return cell_list - - -def create_rnn_cell(unit_type, num_units, num_layers, num_residual_layers, - forget_bias, dropout, mode, num_gpus, base_gpu=0, - single_cell_fn=None): - """Create multi-layer RNN cell. - - Args: - unit_type: string representing the unit type, i.e. "lstm". - num_units: the depth of each unit. - num_layers: number of cells. - num_residual_layers: Number of residual layers from top to bottom. For - example, if `num_layers=4` and `num_residual_layers=2`, the last 2 RNN - cells in the returned list will be wrapped with `ResidualWrapper`. - forget_bias: the initial forget bias of the RNNCell(s). - dropout: floating point value between 0.0 and 1.0: - the probability of dropout. this is ignored if `mode != TRAIN`. - mode: either tf.contrib.learn.TRAIN/EVAL/INFER - num_gpus: The number of gpus to use when performing round-robin - placement of layers. - base_gpu: The gpu device id to use for the first RNN cell in the - returned list. The i-th RNN cell will use `(base_gpu + i) % num_gpus` - as its device id. - single_cell_fn: allow for adding customized cell. - When not specified, we default to model_helper._single_cell - Returns: - An `RNNCell` instance. - """ - cell_list = _cell_list(unit_type=unit_type, - num_units=num_units, - num_layers=num_layers, - num_residual_layers=num_residual_layers, - forget_bias=forget_bias, - dropout=dropout, - mode=mode, - num_gpus=num_gpus, - base_gpu=base_gpu, - single_cell_fn=single_cell_fn) - - if len(cell_list) == 1: # Single layer. - return cell_list[0] - else: # Multi layers - return tf.contrib.rnn.MultiRNNCell(cell_list) - - -def gradient_clip(gradients, max_gradient_norm): - """Clipping gradients of a model.""" - clipped_gradients, gradient_norm = tf.clip_by_global_norm( - gradients, max_gradient_norm) - gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)] - gradient_norm_summary.append( - tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients))) - - return clipped_gradients, gradient_norm_summary, gradient_norm - - -def print_variables_in_ckpt(ckpt_path): - """Print a list of variables in a checkpoint together with their shapes.""" - utils.print_out("# Variables in ckpt %s" % ckpt_path) - reader = tf.train.NewCheckpointReader(ckpt_path) - variable_map = reader.get_variable_to_shape_map() - for key in sorted(variable_map.keys()): - utils.print_out(" %s: %s" % (key, variable_map[key])) - - -def load_model(model, ckpt_path, session, name): - """Load model from a checkpoint.""" - start_time = time.time() - try: - model.saver.restore(session, ckpt_path) - except tf.errors.NotFoundError as e: - utils.print_out("Can't load checkpoint") - print_variables_in_ckpt(ckpt_path) - utils.print_out("%s" % str(e)) - - session.run(tf.tables_initializer()) - utils.print_out( - " loaded %s model parameters from %s, time %.2fs" % - (name, ckpt_path, time.time() - start_time)) - return model - - -def avg_checkpoints(model_dir, num_last_checkpoints, global_step, - global_step_name): - """Average the last N checkpoints in the model_dir.""" - checkpoint_state = tf.train.get_checkpoint_state(model_dir) - if not checkpoint_state: - utils.print_out("# No checkpoint file found in directory: %s" % model_dir) - return None - - # Checkpoints are ordered from oldest to newest. - checkpoints = ( - checkpoint_state.all_model_checkpoint_paths[-num_last_checkpoints:]) - - if len(checkpoints) < num_last_checkpoints: - utils.print_out( - "# Skipping averaging checkpoints because not enough checkpoints is " - "avaliable." - ) - return None - - avg_model_dir = os.path.join(model_dir, "avg_checkpoints") - if not tf.gfile.Exists(avg_model_dir): - utils.print_out( - "# Creating new directory %s for saving averaged checkpoints." % - avg_model_dir) - tf.gfile.MakeDirs(avg_model_dir) - - utils.print_out("# Reading and averaging variables in checkpoints:") - var_list = tf.contrib.framework.list_variables(checkpoints[0]) - var_values, var_dtypes = {}, {} - for (name, shape) in var_list: - if name != global_step_name: - var_values[name] = np.zeros(shape) - - for checkpoint in checkpoints: - utils.print_out(" %s" % checkpoint) - reader = tf.contrib.framework.load_checkpoint(checkpoint) - for name in var_values: - tensor = reader.get_tensor(name) - var_dtypes[name] = tensor.dtype - var_values[name] += tensor - - for name in var_values: - var_values[name] /= len(checkpoints) - - # Build a graph with same variables in the checkpoints, and save the averaged - # variables into the avg_model_dir. - with tf.Graph().as_default(): - tf_vars = [ - tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[name]) - for v in var_values - ] - - placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] - assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] - global_step_var = tf.Variable( - global_step, name=global_step_name, trainable=False) - saver = tf.train.Saver(tf.all_variables()) - - with tf.Session() as sess: - sess.run(tf.initialize_all_variables()) - for p, assign_op, (name, value) in zip(placeholders, assign_ops, - six.iteritems(var_values)): - sess.run(assign_op, {p: value}) - - # Use the built saver to save the averaged checkpoint. Only keep 1 - # checkpoint and the best checkpoint will be moved to avg_best_metric_dir. - saver.save( - sess, - os.path.join(avg_model_dir, "translate.ckpt")) - - return avg_model_dir - - -def create_or_load_model(model, model_dir, session, name): - """Create translation model and initialize or load parameters in session.""" - latest_ckpt = tf.train.latest_checkpoint(model_dir) - if latest_ckpt: - model = load_model(model, latest_ckpt, session, name) - else: - start_time = time.time() - session.run(tf.global_variables_initializer()) - session.run(tf.tables_initializer()) - utils.print_out(" created %s model with fresh parameters, time %.2fs" % - (name, time.time() - start_time)) - - global_step = model.global_step.eval(session=session) - return model, global_step - - -def compute_perplexity(model, sess, name): - """Compute perplexity of the output of the model. - - Args: - model: model for compute perplexity. - sess: tensorflow session to use. - name: name of the batch. - - Returns: - The perplexity of the eval outputs. - """ - total_loss = 0 - total_predict_count = 0 - start_time = time.time() - - while True: - try: - output_tuple = model.eval(sess) - total_loss += output_tuple.eval_loss * output_tuple.batch_size - total_predict_count += output_tuple.predict_count - except tf.errors.OutOfRangeError: - break - - perplexity = utils.safe_exp(total_loss / total_predict_count) - utils.print_time(" eval %s: perplexity %.2f" % (name, perplexity), - start_time) - return perplexity diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/nmt.py b/models/language_translation/tensorflow/gnmt/inference/fp32/nmt.py deleted file mode 100644 index 9e27764a3..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/nmt.py +++ /dev/null @@ -1,704 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""TensorFlow NMT model implementation.""" -from __future__ import print_function - -import argparse -import os -import random -import sys - -# import matplotlib.image as mpimg -import numpy as np -import tensorflow as tf - -import inference -import train -from utils import evaluation_utils -from utils import misc_utils as utils -from utils import vocab_utils - -utils.check_tensorflow_version() - -FLAGS = None - -INFERENCE_KEYS = ["src_max_len_infer", "tgt_max_len_infer", "subword_option", - "infer_batch_size", "beam_width", - "length_penalty_weight", "sampling_temperature", - "num_translations_per_input", "infer_mode"] - - -def add_arguments(parser): - """Build ArgumentParser.""" - parser.register("type", "bool", lambda v: v.lower() == "true") - - # network - parser.add_argument("--num_units", type=int, default=32, help="Network size.") - parser.add_argument("--num_layers", type=int, default=2, - help="Network depth.") - parser.add_argument("--num_encoder_layers", type=int, default=None, - help="Encoder depth, equal to num_layers if None.") - parser.add_argument("--num_decoder_layers", type=int, default=None, - help="Decoder depth, equal to num_layers if None.") - parser.add_argument("--encoder_type", type=str, default="uni", help="""\ - uni | bi | gnmt. - For bi, we build num_encoder_layers/2 bi-directional layers. - For gnmt, we build 1 bi-directional layer, and (num_encoder_layers - 1) - uni-directional layers.\ - """) - parser.add_argument("--residual", type="bool", nargs="?", const=True, - default=False, - help="Whether to add residual connections.") - parser.add_argument("--time_major", type="bool", nargs="?", const=True, - default=True, - help="Whether to use time-major mode for dynamic RNN.") - parser.add_argument("--num_embeddings_partitions", type=int, default=0, - help="Number of partitions for embedding vars.") - - # attention mechanisms - parser.add_argument("--attention", type=str, default="", help="""\ - luong | scaled_luong | bahdanau | normed_bahdanau or set to "" for no - attention\ - """) - parser.add_argument( - "--attention_architecture", - type=str, - default="standard", - help="""\ - standard | gnmt | gnmt_v2. - standard: use top layer to compute attention. - gnmt: GNMT style of computing attention, use previous bottom layer to - compute attention. - gnmt_v2: similar to gnmt, but use current bottom layer to compute - attention.\ - """) - parser.add_argument( - "--output_attention", type="bool", nargs="?", const=True, - default=True, - help="""\ - Only used in standard attention_architecture. Whether use attention as - the cell output at each timestep. - .\ - """) - parser.add_argument( - "--pass_hidden_state", type="bool", nargs="?", const=True, - default=True, - help="""\ - Whether to pass encoder's hidden state to decoder when using an attention - based model.\ - """) - - # optimizer - parser.add_argument("--optimizer", type=str, default="sgd", help="sgd | adam") - parser.add_argument("--learning_rate", type=float, default=1.0, - help="Learning rate. Adam: 0.001 | 0.0001") - parser.add_argument("--warmup_steps", type=int, default=0, - help="How many steps we inverse-decay learning.") - parser.add_argument("--warmup_scheme", type=str, default="t2t", help="""\ - How to warmup learning rates. Options include: - t2t: Tensor2Tensor's way, start with lr 100 times smaller, then - exponentiate until the specified lr.\ - """) - parser.add_argument( - "--decay_scheme", type=str, default="", help="""\ - How we decay learning rate. Options include: - luong234: after 2/3 num train steps, we start halving the learning rate - for 4 times before finishing. - luong5: after 1/2 num train steps, we start halving the learning rate - for 5 times before finishing.\ - luong10: after 1/2 num train steps, we start halving the learning rate - for 10 times before finishing.\ - """) - - parser.add_argument( - "--num_train_steps", type=int, default=12000, help="Num steps to train.") - parser.add_argument("--colocate_gradients_with_ops", type="bool", nargs="?", - const=True, - default=True, - help=("Whether try colocating gradients with " - "corresponding op")) - - # initializer - parser.add_argument("--init_op", type=str, default="uniform", - help="uniform | glorot_normal | glorot_uniform") - parser.add_argument("--init_weight", type=float, default=0.1, - help=("for uniform init_op, initialize weights " - "between [-this, this].")) - - # data - parser.add_argument("--src", type=str, default=None, - help="Source suffix, e.g., en.") - parser.add_argument("--tgt", type=str, default=None, - help="Target suffix, e.g., de.") - parser.add_argument("--train_prefix", type=str, default=None, - help="Train prefix, expect files with src/tgt suffixes.") - parser.add_argument("--dev_prefix", type=str, default=None, - help="Dev prefix, expect files with src/tgt suffixes.") - parser.add_argument("--test_prefix", type=str, default=None, - help="Test prefix, expect files with src/tgt suffixes.") - parser.add_argument("--out_dir", type=str, default=None, - help="Store log/model files.") - - # Vocab - parser.add_argument("--vocab_prefix", type=str, default=None, help="""\ - Vocab prefix, expect files with src/tgt suffixes.\ - """) - parser.add_argument("--embed_prefix", type=str, default=None, help="""\ - Pretrained embedding prefix, expect files with src/tgt suffixes. - The embedding files should be Glove formated txt files.\ - """) - parser.add_argument("--sos", type=str, default="", - help="Start-of-sentence symbol.") - parser.add_argument("--eos", type=str, default="", - help="End-of-sentence symbol.") - parser.add_argument("--share_vocab", type="bool", nargs="?", const=True, - default=False, - help="""\ - Whether to use the source vocab and embeddings for both source and - target.\ - """) - parser.add_argument("--check_special_token", type="bool", default=True, - help="""\ - Whether check special sos, eos, unk tokens exist in the - vocab files.\ - """) - - # Sequence lengths - parser.add_argument("--src_max_len", type=int, default=50, - help="Max length of src sequences during training.") - parser.add_argument("--tgt_max_len", type=int, default=50, - help="Max length of tgt sequences during training.") - parser.add_argument("--src_max_len_infer", type=int, default=None, - help="Max length of src sequences during inference.") - parser.add_argument("--tgt_max_len_infer", type=int, default=None, - help="""\ - Max length of tgt sequences during inference. Also use to restrict the - maximum decoding length.\ - """) - - # Default settings works well (rarely need to change) - parser.add_argument("--unit_type", type=str, default="lstm", - help="lstm | gru | layer_norm_lstm | nas") - parser.add_argument("--forget_bias", type=float, default=1.0, - help="Forget bias for BasicLSTMCell.") - parser.add_argument("--dropout", type=float, default=0.2, - help="Dropout rate (not keep_prob)") - parser.add_argument("--max_gradient_norm", type=float, default=5.0, - help="Clip gradients to this norm.") - parser.add_argument("--batch_size", type=int, default=128, help="Batch size.") - - parser.add_argument("--steps_per_stats", type=int, default=100, - help=("How many training steps to do per stats logging." - "Save checkpoint every 10x steps_per_stats")) - parser.add_argument("--max_train", type=int, default=0, - help="Limit on the size of training data (0: no limit).") - parser.add_argument("--num_buckets", type=int, default=5, - help="Put data into similar-length buckets.") - parser.add_argument("--num_sampled_softmax", type=int, default=0, - help=("Use sampled_softmax_loss if > 0." - "Otherwise, use full softmax loss.")) - - # SPM - parser.add_argument("--subword_option", type=str, default="", - choices=["", "bpe", "spm"], - help="""\ - Set to bpe or spm to activate subword desegmentation.\ - """) - - # Experimental encoding feature. - parser.add_argument("--use_char_encode", type="bool", default=False, - help="""\ - Whether to split each word or bpe into character, and then - generate the word-level representation from the character - reprentation. - """) - - # Misc - parser.add_argument("--num_gpus", type=int, default=1, - help="Number of gpus in each worker.") - parser.add_argument("--log_device_placement", type="bool", nargs="?", - const=True, default=False, help="Debug GPU allocation.") - parser.add_argument("--metrics", type=str, default="bleu", - help=("Comma-separated list of evaluations " - "metrics (bleu,rouge,accuracy)")) - parser.add_argument("--steps_per_external_eval", type=int, default=None, - help="""\ - How many training steps to do per external evaluation. Automatically set - based on data if None.\ - """) - parser.add_argument("--scope", type=str, default=None, - help="scope to put variables under") - parser.add_argument("--hparams_path", type=str, default=None, - help=("Path to standard hparams json file that overrides" - "hparams values from FLAGS.")) - parser.add_argument("--random_seed", type=int, default=None, - help="Random seed (>0, set a specific seed).") - parser.add_argument("--override_loaded_hparams", type="bool", nargs="?", - const=True, default=False, - help="Override loaded hparams with values specified") - parser.add_argument("--num_keep_ckpts", type=int, default=5, - help="Max number of checkpoints to keep.") - parser.add_argument("--avg_ckpts", type="bool", nargs="?", - const=True, default=False, help=("""\ - Average the last N checkpoints for external evaluation. - N can be controlled by setting --num_keep_ckpts.\ - """)) - parser.add_argument("--language_model", type="bool", nargs="?", - const=True, default=False, - help="True to train a language model, ignoring encoder") - - # Inference - parser.add_argument("--ckpt", type=str, default="", - help="Checkpoint file to load a model for inference.") - parser.add_argument("--inference_input_file", type=str, default=None, - help="Set to the text to decode.") - parser.add_argument("--inference_list", type=str, default=None, - help=("A comma-separated list of sentence indices " - "(0-based) to decode.")) - parser.add_argument("--infer_batch_size", type=int, default=32, - help="Batch size for inference mode.") - parser.add_argument("--inference_output_file", type=str, default=None, - help="Output file to store decoding results.") - parser.add_argument("--inference_ref_file", type=str, default=None, - help=("""\ - Reference file to compute evaluation scores (if provided).\ - """)) - - # Advanced inference arguments - parser.add_argument("--infer_mode", type=str, default="greedy", - choices=["greedy", "sample", "beam_search"], - help="Which type of decoder to use during inference.") - parser.add_argument("--beam_width", type=int, default=0, - help=("""\ - beam width when using beam search decoder. If 0 (default), use standard - decoder with greedy helper.\ - """)) - parser.add_argument("--length_penalty_weight", type=float, default=0.0, - help="Length penalty for beam search.") - parser.add_argument("--sampling_temperature", type=float, - default=0.0, - help=("""\ - Softmax sampling temperature for inference decoding, 0.0 means greedy - decoding. This option is ignored when using beam search.\ - """)) - parser.add_argument("--num_translations_per_input", type=int, default=1, - help=("""\ - Number of translations generated for each sentence. This is only used for - inference.\ - """)) - - # Job info - parser.add_argument("--jobid", type=int, default=0, - help="Task id of the worker.") - parser.add_argument("--num_workers", type=int, default=1, - help="Number of workers (inference only).") - parser.add_argument("--num_inter_threads", type=int, default=0, - help="number of inter_op_parallelism_threads") - parser.add_argument("--num_intra_threads", type=int, default=0, - help="number of intra_op_parallelism_threads") - - -def create_hparams(flags): - """Create training hparams.""" - return tf.contrib.training.HParams( - # Data - src=flags.src, - tgt=flags.tgt, - train_prefix=flags.train_prefix, - dev_prefix=flags.dev_prefix, - test_prefix=flags.test_prefix, - vocab_prefix=flags.vocab_prefix, - embed_prefix=flags.embed_prefix, - out_dir=flags.out_dir, - - # Networks - num_units=flags.num_units, - num_encoder_layers=(flags.num_encoder_layers or flags.num_layers), - num_decoder_layers=(flags.num_decoder_layers or flags.num_layers), - dropout=flags.dropout, - unit_type=flags.unit_type, - encoder_type=flags.encoder_type, - residual=flags.residual, - time_major=flags.time_major, - num_embeddings_partitions=flags.num_embeddings_partitions, - - # Attention mechanisms - attention=flags.attention, - attention_architecture=flags.attention_architecture, - output_attention=flags.output_attention, - pass_hidden_state=flags.pass_hidden_state, - - # Train - optimizer=flags.optimizer, - num_train_steps=flags.num_train_steps, - batch_size=flags.batch_size, - init_op=flags.init_op, - init_weight=flags.init_weight, - max_gradient_norm=flags.max_gradient_norm, - learning_rate=flags.learning_rate, - warmup_steps=flags.warmup_steps, - warmup_scheme=flags.warmup_scheme, - decay_scheme=flags.decay_scheme, - colocate_gradients_with_ops=flags.colocate_gradients_with_ops, - num_sampled_softmax=flags.num_sampled_softmax, - - # Data constraints - num_buckets=flags.num_buckets, - max_train=flags.max_train, - src_max_len=flags.src_max_len, - tgt_max_len=flags.tgt_max_len, - - # Inference - src_max_len_infer=flags.src_max_len_infer, - tgt_max_len_infer=flags.tgt_max_len_infer, - infer_batch_size=flags.infer_batch_size, - - # Advanced inference arguments - infer_mode=flags.infer_mode, - beam_width=flags.beam_width, - length_penalty_weight=flags.length_penalty_weight, - sampling_temperature=flags.sampling_temperature, - num_translations_per_input=flags.num_translations_per_input, - - # Vocab - sos=flags.sos if flags.sos else vocab_utils.SOS, - eos=flags.eos if flags.eos else vocab_utils.EOS, - subword_option=flags.subword_option, - check_special_token=flags.check_special_token, - use_char_encode=flags.use_char_encode, - - # Misc - forget_bias=flags.forget_bias, - num_gpus=flags.num_gpus, - epoch_step=0, # record where we were within an epoch. - steps_per_stats=flags.steps_per_stats, - steps_per_external_eval=flags.steps_per_external_eval, - share_vocab=flags.share_vocab, - metrics=flags.metrics.split(","), - log_device_placement=flags.log_device_placement, - random_seed=flags.random_seed, - override_loaded_hparams=flags.override_loaded_hparams, - num_keep_ckpts=flags.num_keep_ckpts, - avg_ckpts=flags.avg_ckpts, - language_model=flags.language_model, - num_intra_threads=flags.num_intra_threads, - num_inter_threads=flags.num_inter_threads, - ) - - -def _add_argument(hparams, key, value, update=True): - """Add an argument to hparams; if exists, change the value if update==True.""" - if hasattr(hparams, key): - if update: - setattr(hparams, key, value) - else: - hparams.add_hparam(key, value) - - -def extend_hparams(hparams): - """Add new arguments to hparams.""" - # Sanity checks - if hparams.encoder_type == "bi" and hparams.num_encoder_layers % 2 != 0: - raise ValueError("For bi, num_encoder_layers %d should be even" % - hparams.num_encoder_layers) - if (hparams.attention_architecture in ["gnmt"] and - hparams.num_encoder_layers < 2): - raise ValueError("For gnmt attention architecture, " - "num_encoder_layers %d should be >= 2" % - hparams.num_encoder_layers) - if hparams.subword_option and hparams.subword_option not in ["spm", "bpe"]: - raise ValueError("subword option must be either spm, or bpe") - if hparams.infer_mode == "beam_search" and hparams.beam_width <= 0: - raise ValueError("beam_width must greater than 0 when using beam_search" - "decoder.") - if hparams.infer_mode == "sample" and hparams.sampling_temperature <= 0.0: - raise ValueError("sampling_temperature must greater than 0.0 when using" - "sample decoder.") - - # Different number of encoder / decoder layers - assert hparams.num_encoder_layers and hparams.num_decoder_layers - if hparams.num_encoder_layers != hparams.num_decoder_layers: - hparams.pass_hidden_state = False - utils.print_out("Num encoder layer %d is different from num decoder layer" - " %d, so set pass_hidden_state to False" % ( - hparams.num_encoder_layers, - hparams.num_decoder_layers)) - - # Set residual layers - num_encoder_residual_layers = 0 - num_decoder_residual_layers = 0 - if hparams.residual: - if hparams.num_encoder_layers > 1: - num_encoder_residual_layers = hparams.num_encoder_layers - 1 - if hparams.num_decoder_layers > 1: - num_decoder_residual_layers = hparams.num_decoder_layers - 1 - - if hparams.encoder_type == "gnmt": - # The first unidirectional layer (after the bi-directional layer) in - # the GNMT encoder can't have residual connection due to the input is - # the concatenation of fw_cell and bw_cell's outputs. - num_encoder_residual_layers = hparams.num_encoder_layers - 2 - - # Compatible for GNMT models - if hparams.num_encoder_layers == hparams.num_decoder_layers: - num_decoder_residual_layers = num_encoder_residual_layers - _add_argument(hparams, "num_encoder_residual_layers", - num_encoder_residual_layers) - _add_argument(hparams, "num_decoder_residual_layers", - num_decoder_residual_layers) - - # Language modeling - if getattr(hparams, "language_model", None): - hparams.attention = "" - hparams.attention_architecture = "" - hparams.pass_hidden_state = False - hparams.share_vocab = True - hparams.src = hparams.tgt - utils.print_out("For language modeling, we turn off attention and " - "pass_hidden_state; turn on share_vocab; set src to tgt.") - - # Vocab - # Get vocab file names first - if hparams.vocab_prefix: - src_vocab_file = hparams.vocab_prefix + "." + hparams.src - tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt - else: - raise ValueError("hparams.vocab_prefix must be provided.") - - # Source vocab - check_special_token = getattr(hparams, "check_special_token", True) - src_vocab_size, src_vocab_file = vocab_utils.check_vocab( - src_vocab_file, - hparams.out_dir, - check_special_token=check_special_token, - sos=hparams.sos, - eos=hparams.eos, - unk=vocab_utils.UNK) - - # Target vocab - if hparams.share_vocab: - utils.print_out(" using source vocab for target") - tgt_vocab_file = src_vocab_file - tgt_vocab_size = src_vocab_size - else: - tgt_vocab_size, tgt_vocab_file = vocab_utils.check_vocab( - tgt_vocab_file, - hparams.out_dir, - check_special_token=check_special_token, - sos=hparams.sos, - eos=hparams.eos, - unk=vocab_utils.UNK) - _add_argument(hparams, "src_vocab_size", src_vocab_size) - _add_argument(hparams, "tgt_vocab_size", tgt_vocab_size) - _add_argument(hparams, "src_vocab_file", src_vocab_file) - _add_argument(hparams, "tgt_vocab_file", tgt_vocab_file) - - # Num embedding partitions - num_embeddings_partitions = getattr(hparams, "num_embeddings_partitions", 0) - _add_argument(hparams, "num_enc_emb_partitions", num_embeddings_partitions) - _add_argument(hparams, "num_dec_emb_partitions", num_embeddings_partitions) - - # Pretrained Embeddings - _add_argument(hparams, "src_embed_file", "") - _add_argument(hparams, "tgt_embed_file", "") - if getattr(hparams, "embed_prefix", None): - src_embed_file = hparams.embed_prefix + "." + hparams.src - tgt_embed_file = hparams.embed_prefix + "." + hparams.tgt - - if tf.gfile.Exists(src_embed_file): - utils.print_out(" src_embed_file %s exist" % src_embed_file) - hparams.src_embed_file = src_embed_file - - utils.print_out( - "For pretrained embeddings, set num_enc_emb_partitions to 1") - hparams.num_enc_emb_partitions = 1 - else: - utils.print_out(" src_embed_file %s doesn't exist" % src_embed_file) - - if tf.gfile.Exists(tgt_embed_file): - utils.print_out(" tgt_embed_file %s exist" % tgt_embed_file) - hparams.tgt_embed_file = tgt_embed_file - - utils.print_out( - "For pretrained embeddings, set num_dec_emb_partitions to 1") - hparams.num_dec_emb_partitions = 1 - else: - utils.print_out(" tgt_embed_file %s doesn't exist" % tgt_embed_file) - - # Evaluation - for metric in hparams.metrics: - best_metric_dir = os.path.join(hparams.out_dir, "best_" + metric) - tf.gfile.MakeDirs(best_metric_dir) - _add_argument(hparams, "best_" + metric, 0, update=False) - _add_argument(hparams, "best_" + metric + "_dir", best_metric_dir) - - if getattr(hparams, "avg_ckpts", None): - best_metric_dir = os.path.join(hparams.out_dir, "avg_best_" + metric) - tf.gfile.MakeDirs(best_metric_dir) - _add_argument(hparams, "avg_best_" + metric, 0, update=False) - _add_argument(hparams, "avg_best_" + metric + "_dir", best_metric_dir) - - return hparams - - -def ensure_compatible_hparams(hparams, default_hparams, hparams_path=""): - """Make sure the loaded hparams is compatible with new changes.""" - default_hparams = utils.maybe_parse_standard_hparams( - default_hparams, hparams_path) - - # Set num encoder/decoder layers (for old checkpoints) - if hasattr(hparams, "num_layers"): - if not hasattr(hparams, "num_encoder_layers"): - hparams.add_hparam("num_encoder_layers", hparams.num_layers) - if not hasattr(hparams, "num_decoder_layers"): - hparams.add_hparam("num_decoder_layers", hparams.num_layers) - - # For compatible reason, if there are new fields in default_hparams, - # we add them to the current hparams - default_config = default_hparams.values() - config = hparams.values() - for key in default_config: - if key not in config: - hparams.add_hparam(key, default_config[key]) - - # Update all hparams' keys if override_loaded_hparams=True - if getattr(default_hparams, "override_loaded_hparams", None): - overwritten_keys = default_config.keys() - else: - # For inference - overwritten_keys = INFERENCE_KEYS - - for key in overwritten_keys: - if getattr(hparams, key) != default_config[key]: - utils.print_out("# Updating hparams.%s: %s -> %s" % - (key, str(getattr(hparams, key)), - str(default_config[key]))) - setattr(hparams, key, default_config[key]) - return hparams - - -def create_or_load_hparams( - out_dir, default_hparams, hparams_path, save_hparams=True): - """Create hparams or load hparams from out_dir.""" - hparams = utils.load_hparams(out_dir) - if not hparams: - hparams = default_hparams - hparams = utils.maybe_parse_standard_hparams( - hparams, hparams_path) - else: - hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) - hparams = extend_hparams(hparams) - - # Save HParams - if save_hparams: - utils.save_hparams(out_dir, hparams) - for metric in hparams.metrics: - utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams) - - # Print HParams - utils.print_hparams(hparams) - return hparams - - -def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""): - """Run main.""" - # Job - jobid = flags.jobid - num_workers = flags.num_workers - utils.print_out("# Job id %d" % jobid) - - # GPU device - utils.print_out( - "# Devices visible to TensorFlow: %s" % repr(tf.Session().list_devices())) - - # Random - random_seed = flags.random_seed - if random_seed is not None and random_seed > 0: - utils.print_out("# Set random seed to %d" % random_seed) - random.seed(random_seed + jobid) - np.random.seed(random_seed + jobid) - - # Model output directory - out_dir = flags.out_dir - if out_dir and not tf.gfile.Exists(out_dir): - utils.print_out("# Creating output directory %s ..." % out_dir) - tf.gfile.MakeDirs(out_dir) - - # Load hparams. - loaded_hparams = False - if flags.ckpt: # Try to load hparams from the same directory as ckpt - ckpt_dir = os.path.dirname(flags.ckpt) - ckpt_hparams_file = os.path.join(ckpt_dir, "hparams") - if tf.gfile.Exists(ckpt_hparams_file) or flags.hparams_path: - hparams = create_or_load_hparams( - ckpt_dir, default_hparams, flags.hparams_path, - save_hparams=False) - loaded_hparams = True - if not loaded_hparams: # Try to load from out_dir - assert out_dir - hparams = create_or_load_hparams( - out_dir, default_hparams, flags.hparams_path, - save_hparams=(jobid == 0)) - - # Train / Decode - if flags.inference_input_file: - # Inference output directory - trans_file = flags.inference_output_file - assert trans_file - trans_dir = os.path.dirname(trans_file) - if not tf.gfile.Exists(trans_dir): - tf.gfile.MakeDirs(trans_dir) - - # Inference indices - hparams.inference_indices = None - if flags.inference_list: - (hparams.inference_indices) = ( - [int(token) for token in flags.inference_list.split(",")]) - - # Inference - ckpt = flags.ckpt - if not ckpt: - ckpt = tf.train.latest_checkpoint(out_dir) - inference_fn(ckpt, flags.inference_input_file, - trans_file, hparams, num_workers, jobid) - - # Evaluation - ref_file = flags.inference_ref_file - if ref_file and tf.gfile.Exists(trans_file): - for metric in hparams.metrics: - score = evaluation_utils.evaluate( - ref_file, - trans_file, - metric, - hparams.subword_option) - utils.print_out(" %s: %.1f" % (metric, score)) - else: - # Train - train_fn(hparams, target_session=target_session) - - -def main(unused_argv): - default_hparams = create_hparams(FLAGS) - train_fn = train.train - inference_fn = inference.inference - run_main(FLAGS, default_hparams, train_fn, inference_fn) - - -if __name__ == "__main__": - nmt_parser = argparse.ArgumentParser() - add_arguments(nmt_parser) - FLAGS, unparsed = nmt_parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/bleu.py b/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/bleu.py deleted file mode 100644 index f94a09cc9..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/bleu.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Python implementation of BLEU and smooth-BLEU. - -This module provides a Python implementation of BLEU and smooth-BLEU. -Smooth BLEU is computed following the method outlined in the paper: -Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic -evaluation metrics for machine translation. COLING 2004. -""" - -import collections -import math - - -def _get_ngrams(segment, max_order): - """Extracts all n-grams upto a given maximum order from an input segment. - - Args: - segment: text segment from which n-grams will be extracted. - max_order: maximum length in tokens of the n-grams returned by this - methods. - - Returns: - The Counter containing all n-grams upto max_order in segment - with a count of how many times each n-gram occurred. - """ - ngram_counts = collections.Counter() - for order in range(1, max_order + 1): - for i in range(0, len(segment) - order + 1): - ngram = tuple(segment[i:i + order]) - ngram_counts[ngram] += 1 - return ngram_counts - - -def compute_bleu(reference_corpus, translation_corpus, max_order=4, - smooth=False): - """Computes BLEU score of translated segments against one or more references. - - Args: - reference_corpus: list of lists of references for each translation. Each - reference should be tokenized into a list of tokens. - translation_corpus: list of translations to score. Each translation - should be tokenized into a list of tokens. - max_order: Maximum n-gram order to use when computing BLEU score. - smooth: Whether or not to apply Lin et al. 2004 smoothing. - - Returns: - 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram - precisions and brevity penalty. - """ - matches_by_order = [0] * max_order - possible_matches_by_order = [0] * max_order - reference_length = 0 - translation_length = 0 - for (references, translation) in zip(reference_corpus, - translation_corpus): - reference_length += min(len(r) for r in references) - translation_length += len(translation) - - merged_ref_ngram_counts = collections.Counter() - for reference in references: - merged_ref_ngram_counts |= _get_ngrams(reference, max_order) - translation_ngram_counts = _get_ngrams(translation, max_order) - overlap = translation_ngram_counts & merged_ref_ngram_counts - for ngram in overlap: - matches_by_order[len(ngram) - 1] += overlap[ngram] - for order in range(1, max_order + 1): - possible_matches = len(translation) - order + 1 - if possible_matches > 0: - possible_matches_by_order[order - 1] += possible_matches - - precisions = [0] * max_order - for i in range(0, max_order): - if smooth: - precisions[i] = ((matches_by_order[i] + 1.) / - (possible_matches_by_order[i] + 1.)) - else: - if possible_matches_by_order[i] > 0: - precisions[i] = (float(matches_by_order[i]) / - possible_matches_by_order[i]) - else: - precisions[i] = 0.0 - - if min(precisions) > 0: - p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions) - geo_mean = math.exp(p_log_sum) - else: - geo_mean = 0 - - ratio = float(translation_length) / reference_length - - if ratio > 1.0: - bp = 1. - else: - bp = math.exp(1 - 1. / ratio) - - bleu = geo_mean * bp - - return (bleu, precisions, bp, ratio, translation_length, reference_length) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/download_iwslt15.sh b/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/download_iwslt15.sh deleted file mode 100644 index 7bcda77f1..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/download_iwslt15.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# Download small-scale IWSLT15 Vietnames to English translation data for NMT -# model training. -# -# Usage: -# ./download_iwslt15.sh path-to-output-dir -# -# If output directory is not specified, "./iwslt15" will be used as the default -# output directory. -OUT_DIR="${1:-iwslt15}" -SITE_PREFIX="https://nlp.stanford.edu/projects/nmt/data" - -mkdir -v -p $OUT_DIR - -# Download iwslt15 small dataset from standford website. -echo "Download training dataset train.en and train.vi." -curl -o "$OUT_DIR/train.en" "$SITE_PREFIX/iwslt15.en-vi/train.en" -curl -o "$OUT_DIR/train.vi" "$SITE_PREFIX/iwslt15.en-vi/train.vi" - -echo "Download dev dataset tst2012.en and tst2012.vi." -curl -o "$OUT_DIR/tst2012.en" "$SITE_PREFIX/iwslt15.en-vi/tst2012.en" -curl -o "$OUT_DIR/tst2012.vi" "$SITE_PREFIX/iwslt15.en-vi/tst2012.vi" - -echo "Download test dataset tst2013.en and tst2013.vi." -curl -o "$OUT_DIR/tst2013.en" "$SITE_PREFIX/iwslt15.en-vi/tst2013.en" -curl -o "$OUT_DIR/tst2013.vi" "$SITE_PREFIX/iwslt15.en-vi/tst2013.vi" - -echo "Download vocab file vocab.en and vocab.vi." -curl -o "$OUT_DIR/vocab.en" "$SITE_PREFIX/iwslt15.en-vi/vocab.en" -curl -o "$OUT_DIR/vocab.vi" "$SITE_PREFIX/iwslt15.en-vi/vocab.vi" diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/rouge.py b/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/rouge.py deleted file mode 100644 index a7ed29f16..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/rouge.py +++ /dev/null @@ -1,352 +0,0 @@ -"""ROUGE metric implementation. - -Copy from tf_seq2seq/seq2seq/metrics/rouge.py. -This is a modified and slightly extended verison of -https://github.com/miso-belica/sumy/blob/dev/sumy/evaluation/rouge.py. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import itertools -import numpy as np - -# pylint: disable=C0103 - - -def _get_ngrams(n, text): - """Calcualtes n-grams. - - Args: - n: which n-grams to calculate - text: An array of tokens - - Returns: - A set of n-grams - """ - ngram_set = set() - text_length = len(text) - max_index_ngram_start = text_length - n - for i in range(max_index_ngram_start + 1): - ngram_set.add(tuple(text[i:i + n])) - return ngram_set - - -def _split_into_words(sentences): - """Splits multiple sentences into words and flattens the result""" - return list(itertools.chain(*[_.split(" ") for _ in sentences])) - - -def _get_word_ngrams(n, sentences): - """Calculates word n-grams for multiple sentences. - """ - assert len(sentences) > 0 - assert n > 0 - - words = _split_into_words(sentences) - return _get_ngrams(n, words) - - -def _len_lcs(x, y): - """ - Returns the length of the Longest Common Subsequence between sequences x - and y. - Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence - - Args: - x: sequence of words - y: sequence of words - - Returns - integer: Length of LCS between x and y - """ - table = _lcs(x, y) - n, m = len(x), len(y) - return table[n, m] - - -def _lcs(x, y): - """ - Computes the length of the longest common subsequence (lcs) between two - strings. The implementation below uses a DP programming algorithm and runs - in O(nm) time where n = len(x) and m = len(y). - Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence - - Args: - x: collection of words - y: collection of words - - Returns: - Table of dictionary of coord and len lcs - """ - n, m = len(x), len(y) - table = dict() - for i in range(n + 1): - for j in range(m + 1): - if i == 0 or j == 0: - table[i, j] = 0 - elif x[i - 1] == y[j - 1]: - table[i, j] = table[i - 1, j - 1] + 1 - else: - table[i, j] = max(table[i - 1, j], table[i, j - 1]) - return table - - -def _recon_lcs(x, y): - """ - Returns the Longest Subsequence between x and y. - Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence - - Args: - x: sequence of words - y: sequence of words - - Returns: - sequence: LCS of x and y - """ - i, j = len(x), len(y) - table = _lcs(x, y) - - def _recon(i, j): - """private recon calculation""" - if i == 0 or j == 0: - return [] - elif x[i - 1] == y[j - 1]: - return _recon(i - 1, j - 1) + [(x[i - 1], i)] - elif table[i - 1, j] > table[i, j - 1]: - return _recon(i - 1, j) - else: - return _recon(i, j - 1) - - recon_tuple = tuple(map(lambda x: x[0], _recon(i, j))) - return recon_tuple - - -def rouge_n(evaluated_sentences, reference_sentences, n=2): - """ - Computes ROUGE-N of two text collections of sentences. - Sourece: http://research.microsoft.com/en-us/um/people/cyl/download/ - papers/rouge-working-note-v1.3.1.pdf - - Args: - evaluated_sentences: The sentences that have been picked by the summarizer - reference_sentences: The sentences from the referene set - n: Size of ngram. Defaults to 2. - - Returns: - A tuple (f1, precision, recall) for ROUGE-N - - Raises: - ValueError: raises exception if a param has len <= 0 - """ - if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: - raise ValueError("Collections must contain at least 1 sentence.") - - evaluated_ngrams = _get_word_ngrams(n, evaluated_sentences) - reference_ngrams = _get_word_ngrams(n, reference_sentences) - reference_count = len(reference_ngrams) - evaluated_count = len(evaluated_ngrams) - - # Gets the overlapping ngrams between evaluated and reference - overlapping_ngrams = evaluated_ngrams.intersection(reference_ngrams) - overlapping_count = len(overlapping_ngrams) - - # Handle edge case. This isn't mathematically correct, but it's good enough - if evaluated_count == 0: - precision = 0.0 - else: - precision = overlapping_count / evaluated_count - - if reference_count == 0: - recall = 0.0 - else: - recall = overlapping_count / reference_count - - f1_score = 2.0 * ((precision * recall) / (precision + recall + 1e-8)) - - # return overlapping_count / reference_count - return f1_score, precision, recall - - -def _f_p_r_lcs(llcs, m, n): - """ - Computes the LCS-based F-measure score - Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ - rouge-working-note-v1.3.1.pdf - - Args: - llcs: Length of LCS - m: number of words in reference summary - n: number of words in candidate summary - - Returns: - Float. LCS-based F-measure score - """ - r_lcs = llcs / m - p_lcs = llcs / n - beta = p_lcs / (r_lcs + 1e-12) - num = (1 + (beta**2)) * r_lcs * p_lcs - denom = r_lcs + ((beta**2) * p_lcs) - f_lcs = num / (denom + 1e-12) - return f_lcs, p_lcs, r_lcs - - -def rouge_l_sentence_level(evaluated_sentences, reference_sentences): - """ - Computes ROUGE-L (sentence level) of two text collections of sentences. - http://research.microsoft.com/en-us/um/people/cyl/download/papers/ - rouge-working-note-v1.3.1.pdf - - Calculated according to: - R_lcs = LCS(X,Y)/m - P_lcs = LCS(X,Y)/n - F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) - - where: - X = reference summary - Y = Candidate summary - m = length of reference summary - n = length of candidate summary - - Args: - evaluated_sentences: The sentences that have been picked by the summarizer - reference_sentences: The sentences from the referene set - - Returns: - A float: F_lcs - - Raises: - ValueError: raises exception if a param has len <= 0 - """ - if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: - raise ValueError("Collections must contain at least 1 sentence.") - reference_words = _split_into_words(reference_sentences) - evaluated_words = _split_into_words(evaluated_sentences) - m = len(reference_words) - n = len(evaluated_words) - lcs = _len_lcs(evaluated_words, reference_words) - return _f_p_r_lcs(lcs, m, n) - - -def _union_lcs(evaluated_sentences, reference_sentence): - """ - Returns LCS_u(r_i, C) which is the LCS score of the union longest common - subsequence between reference sentence ri and candidate summary C. For example - if r_i= w1 w2 w3 w4 w5, and C contains two sentences: c1 = w1 w2 w6 w7 w8 and - c2 = w1 w3 w8 w9 w5, then the longest common subsequence of r_i and c1 is - "w1 w2" and the longest common subsequence of r_i and c2 is "w1 w3 w5". The - union longest common subsequence of r_i, c1, and c2 is "w1 w2 w3 w5" and - LCS_u(r_i, C) = 4/5. - - Args: - evaluated_sentences: The sentences that have been picked by the summarizer - reference_sentence: One of the sentences in the reference summaries - - Returns: - float: LCS_u(r_i, C) - - ValueError: - Raises exception if a param has len <= 0 - """ - if len(evaluated_sentences) <= 0: - raise ValueError("Collections must contain at least 1 sentence.") - - lcs_union = set() - reference_words = _split_into_words([reference_sentence]) - combined_lcs_length = 0 - for eval_s in evaluated_sentences: - evaluated_words = _split_into_words([eval_s]) - lcs = set(_recon_lcs(reference_words, evaluated_words)) - combined_lcs_length += len(lcs) - lcs_union = lcs_union.union(lcs) - - union_lcs_count = len(lcs_union) - union_lcs_value = union_lcs_count / combined_lcs_length - return union_lcs_value - - -def rouge_l_summary_level(evaluated_sentences, reference_sentences): - """ - Computes ROUGE-L (summary level) of two text collections of sentences. - http://research.microsoft.com/en-us/um/people/cyl/download/papers/ - rouge-working-note-v1.3.1.pdf - - Calculated according to: - R_lcs = SUM(1, u)[LCS(r_i,C)]/m - P_lcs = SUM(1, u)[LCS(r_i,C)]/n - F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) - - where: - SUM(i,u) = SUM from i through u - u = number of sentences in reference summary - C = Candidate summary made up of v sentences - m = number of words in reference summary - n = number of words in candidate summary - - Args: - evaluated_sentences: The sentences that have been picked by the summarizer - reference_sentence: One of the sentences in the reference summaries - - Returns: - A float: F_lcs - - Raises: - ValueError: raises exception if a param has len <= 0 - """ - if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: - raise ValueError("Collections must contain at least 1 sentence.") - - # total number of words in reference sentences - m = len(_split_into_words(reference_sentences)) - - # total number of words in evaluated sentences - n = len(_split_into_words(evaluated_sentences)) - - union_lcs_sum_across_all_references = 0 - for ref_s in reference_sentences: - union_lcs_sum_across_all_references += _union_lcs(evaluated_sentences, - ref_s) - return _f_p_r_lcs(union_lcs_sum_across_all_references, m, n) - - -def rouge(hypotheses, references): - """Calculates average rouge scores for a list of hypotheses and - references""" - - # Filter out hyps that are of 0 length - # hyps_and_refs = zip(hypotheses, references) - # hyps_and_refs = [_ for _ in hyps_and_refs if len(_[0]) > 0] - # hypotheses, references = zip(*hyps_and_refs) - - # Calculate ROUGE-1 F1, precision, recall scores - rouge_1 = [ - rouge_n([hyp], [ref], 1) for hyp, ref in zip(hypotheses, references) - ] - rouge_1_f, rouge_1_p, rouge_1_r = map(np.mean, zip(*rouge_1)) - - # Calculate ROUGE-2 F1, precision, recall scores - rouge_2 = [ - rouge_n([hyp], [ref], 2) for hyp, ref in zip(hypotheses, references) - ] - rouge_2_f, rouge_2_p, rouge_2_r = map(np.mean, zip(*rouge_2)) - - # Calculate ROUGE-L F1, precision, recall scores - rouge_l = [ - rouge_l_sentence_level([hyp], [ref]) - for hyp, ref in zip(hypotheses, references) - ] - rouge_l_f, rouge_l_p, rouge_l_r = map(np.mean, zip(*rouge_l)) - - return { - "rouge_1/f_score": rouge_1_f, - "rouge_1/r_score": rouge_1_r, - "rouge_1/p_score": rouge_1_p, - "rouge_2/f_score": rouge_2_f, - "rouge_2/r_score": rouge_2_r, - "rouge_2/p_score": rouge_2_p, - "rouge_l/f_score": rouge_l_f, - "rouge_l/r_score": rouge_l_r, - "rouge_l/p_score": rouge_l_p, - } diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/wmt16_en_de.sh b/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/wmt16_en_de.sh deleted file mode 100644 index 9e9c4be96..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/scripts/wmt16_en_de.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" - -OUTPUT_DIR="${1:-wmt16_de_en}" -echo "Writing to ${OUTPUT_DIR}. To change this, set the OUTPUT_DIR environment variable." - -OUTPUT_DIR_DATA="${OUTPUT_DIR}/data" -mkdir -p $OUTPUT_DIR_DATA - -echo "Downloading Europarl v7. This may take a while..." -curl -o ${OUTPUT_DIR_DATA}/europarl-v7-de-en.tgz \ - http://www.statmt.org/europarl/v7/de-en.tgz - -echo "Downloading Common Crawl corpus. This may take a while..." -curl -o ${OUTPUT_DIR_DATA}/common-crawl.tgz \ - http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz - -echo "Downloading News Commentary v11. This may take a while..." -curl -o ${OUTPUT_DIR_DATA}/nc-v11.tgz \ - http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz - -echo "Downloading dev/test sets" -curl -o ${OUTPUT_DIR_DATA}/dev.tgz \ - http://data.statmt.org/wmt16/translation-task/dev.tgz -curl -o ${OUTPUT_DIR_DATA}/test.tgz \ - http://data.statmt.org/wmt16/translation-task/test.tgz - -# Extract everything -echo "Extracting all files..." -mkdir -p "${OUTPUT_DIR_DATA}/europarl-v7-de-en" -tar -xvzf "${OUTPUT_DIR_DATA}/europarl-v7-de-en.tgz" -C "${OUTPUT_DIR_DATA}/europarl-v7-de-en" -mkdir -p "${OUTPUT_DIR_DATA}/common-crawl" -tar -xvzf "${OUTPUT_DIR_DATA}/common-crawl.tgz" -C "${OUTPUT_DIR_DATA}/common-crawl" -mkdir -p "${OUTPUT_DIR_DATA}/nc-v11" -tar -xvzf "${OUTPUT_DIR_DATA}/nc-v11.tgz" -C "${OUTPUT_DIR_DATA}/nc-v11" -mkdir -p "${OUTPUT_DIR_DATA}/dev" -tar -xvzf "${OUTPUT_DIR_DATA}/dev.tgz" -C "${OUTPUT_DIR_DATA}/dev" -mkdir -p "${OUTPUT_DIR_DATA}/test" -tar -xvzf "${OUTPUT_DIR_DATA}/test.tgz" -C "${OUTPUT_DIR_DATA}/test" - -# Concatenate Training data -cat "${OUTPUT_DIR_DATA}/europarl-v7-de-en/europarl-v7.de-en.en" \ - "${OUTPUT_DIR_DATA}/common-crawl/commoncrawl.de-en.en" \ - "${OUTPUT_DIR_DATA}/nc-v11/training-parallel-nc-v11/news-commentary-v11.de-en.en" \ - > "${OUTPUT_DIR}/train.en" -wc -l "${OUTPUT_DIR}/train.en" - -cat "${OUTPUT_DIR_DATA}/europarl-v7-de-en/europarl-v7.de-en.de" \ - "${OUTPUT_DIR_DATA}/common-crawl/commoncrawl.de-en.de" \ - "${OUTPUT_DIR_DATA}/nc-v11/training-parallel-nc-v11/news-commentary-v11.de-en.de" \ - > "${OUTPUT_DIR}/train.de" -wc -l "${OUTPUT_DIR}/train.de" - -# Clone Moses -if [ ! -d "${OUTPUT_DIR}/mosesdecoder" ]; then - echo "Cloning moses for data processing" - git clone https://github.com/moses-smt/mosesdecoder.git "${OUTPUT_DIR}/mosesdecoder" -fi - -# Convert SGM files -# Convert newstest2014 data into raw text format -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/dev/dev/newstest2014-deen-src.de.sgm \ - > ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.de -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/dev/dev/newstest2014-deen-ref.en.sgm \ - > ${OUTPUT_DIR_DATA}/dev/dev/newstest2014.en - -# Convert newstest2015 data into raw text format -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/dev/dev/newstest2015-deen-src.de.sgm \ - > ${OUTPUT_DIR_DATA}/dev/dev/newstest2015.de -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/dev/dev/newstest2015-deen-ref.en.sgm \ - > ${OUTPUT_DIR_DATA}/dev/dev/newstest2015.en - -# Convert newstest2016 data into raw text format -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/test/test/newstest2016-deen-src.de.sgm \ - > ${OUTPUT_DIR_DATA}/test/test/newstest2016.de -${OUTPUT_DIR}/mosesdecoder/scripts/ems/support/input-from-sgm.perl \ - < ${OUTPUT_DIR_DATA}/test/test/newstest2016-deen-ref.en.sgm \ - > ${OUTPUT_DIR_DATA}/test/test/newstest2016.en - -# Copy dev/test data to output dir -cp ${OUTPUT_DIR_DATA}/dev/dev/newstest20*.de ${OUTPUT_DIR} -cp ${OUTPUT_DIR_DATA}/dev/dev/newstest20*.en ${OUTPUT_DIR} -cp ${OUTPUT_DIR_DATA}/test/test/newstest20*.de ${OUTPUT_DIR} -cp ${OUTPUT_DIR_DATA}/test/test/newstest20*.en ${OUTPUT_DIR} - -# Tokenize data -for f in ${OUTPUT_DIR}/*.de; do - echo "Tokenizing $f..." - ${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l de -threads 8 < $f > ${f%.*}.tok.de -done - -for f in ${OUTPUT_DIR}/*.en; do - echo "Tokenizing $f..." - ${OUTPUT_DIR}/mosesdecoder/scripts/tokenizer/tokenizer.perl -q -l en -threads 8 < $f > ${f%.*}.tok.en -done - -# Clean train corpora -for f in ${OUTPUT_DIR}/train.tok.en; do - fbase=${f%.*} - echo "Cleaning ${fbase}..." - ${OUTPUT_DIR}/mosesdecoder/scripts/training/clean-corpus-n.perl $fbase de en "${fbase}.clean" 1 80 -done - -# Generate Subword Units (BPE) -# Clone Subword NMT -if [ ! -d "${OUTPUT_DIR}/subword-nmt" ]; then - git clone https://github.com/rsennrich/subword-nmt.git "${OUTPUT_DIR}/subword-nmt" -fi - -# Learn Shared BPE -for merge_ops in 32000; do - echo "Learning BPE with merge_ops=${merge_ops}. This may take a while..." - cat "${OUTPUT_DIR}/train.tok.clean.de" "${OUTPUT_DIR}/train.tok.clean.en" | \ - ${OUTPUT_DIR}/subword-nmt/learn_bpe.py -s $merge_ops > "${OUTPUT_DIR}/bpe.${merge_ops}" - - echo "Apply BPE with merge_ops=${merge_ops} to tokenized files..." - for lang in en de; do - for f in ${OUTPUT_DIR}/*.tok.${lang} ${OUTPUT_DIR}/*.tok.clean.${lang}; do - outfile="${f%.*}.bpe.${merge_ops}.${lang}" - ${OUTPUT_DIR}/subword-nmt/apply_bpe.py -c "${OUTPUT_DIR}/bpe.${merge_ops}" < $f > "${outfile}" - echo ${outfile} - done - done - - # Create vocabulary file for BPE - echo -e "\n\n" > "${OUTPUT_DIR}/vocab.bpe.${merge_ops}" - cat "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.en" "${OUTPUT_DIR}/train.tok.clean.bpe.${merge_ops}.de" | \ - ${OUTPUT_DIR}/subword-nmt/get_vocab.py | cut -f1 -d ' ' >> "${OUTPUT_DIR}/vocab.bpe.${merge_ops}" - -done - -# Duplicate vocab file with language suffix -cp "${OUTPUT_DIR}/vocab.bpe.32000" "${OUTPUT_DIR}/vocab.bpe.32000.en" -cp "${OUTPUT_DIR}/vocab.bpe.32000" "${OUTPUT_DIR}/vocab.bpe.32000.de" - -echo "All done." diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15.json deleted file mode 100644 index 2b658eca1..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "attention": "scaled_luong", - "attention_architecture": "standard", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "bi", - "eos": "", - "forget_bias": 1.0, - "infer_batch_size": 32, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 2, - "num_decoder_layers": 2, - "num_train_steps": 12000, - "decay_scheme": "luong234", - "num_units": 512, - "optimizer": "sgd", - "residual": false, - "share_vocab": false, - "subword_option": "", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15_internal.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15_internal.json deleted file mode 100644 index 1f7509baa..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/iwslt15_internal.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "attention": "scaled_luong", - "attention_architecture": "standard", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "bi", - "eos": "", - "forget_bias": 1.0, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 2, - "num_decoder_layers": 2, - "decay_scheme": "luong234", - "num_units": 512, - "optimizer": "sgd", - "residual": false, - "share_vocab": false, - "subword_option": "", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16.json deleted file mode 100644 index ba57dc5ef..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "standard", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "bi", - "eos": "", - "forget_bias": 1.0, - "infer_batch_size": 32, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 4, - "num_decoder_layers": 4, - "num_train_steps": 340000, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": false, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer.json deleted file mode 100644 index 1274f3db0..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "gnmt_v2", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "gnmt", - "eos": "", - "forget_bias": 1.0, - "infer_batch_size": 32, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 4, - "num_decoder_layers": 4, - "num_train_steps": 340000, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": true, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10, - "length_penalty_weight": 1.0 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json deleted file mode 100644 index c2a95c736..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "gnmt_v2", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "gnmt", - "eos": "", - "forget_bias": 1.0, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 4, - "num_decoder_layers": 4, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": true, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10, - "length_penalty_weight": 1.0 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer.json deleted file mode 100644 index f3b217b5d..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "gnmt_v2", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "gnmt", - "eos": "", - "forget_bias": 1.0, - "infer_batch_size": 32, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 8, - "num_decoder_layers": 8, - "num_train_steps": 340000, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": true, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10, - "length_penalty_weight": 1.0 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer_internal.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer_internal.json deleted file mode 100644 index 51a984064..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_gnmt_8_layer_internal.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "gnmt_v2", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "gnmt", - "eos": "", - "forget_bias": 1.0, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 8, - "num_decoder_layers": 8, - "num_train_steps": 340000, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": true, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10, - "length_penalty_weight": 1.0 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_internal.json b/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_internal.json deleted file mode 100644 index 1f94008de..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/standard_hparams/wmt16_internal.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "attention": "normed_bahdanau", - "attention_architecture": "standard", - "batch_size": 128, - "colocate_gradients_with_ops": true, - "dropout": 0.2, - "encoder_type": "bi", - "eos": "", - "forget_bias": 1.0, - "init_weight": 0.1, - "learning_rate": 1.0, - "max_gradient_norm": 5.0, - "metrics": ["bleu"], - "num_buckets": 5, - "num_encoder_layers": 4, - "num_decoder_layers": 4, - "decay_scheme": "luong10", - "num_units": 1024, - "optimizer": "sgd", - "residual": false, - "share_vocab": false, - "subword_option": "bpe", - "sos": "", - "src_max_len": 50, - "src_max_len_infer": null, - "steps_per_external_eval": null, - "steps_per_stats": 100, - "tgt_max_len": 50, - "tgt_max_len_infer": null, - "time_major": true, - "unit_type": "lstm", - "beam_width": 10 -} diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/train.py b/models/language_translation/tensorflow/gnmt/inference/fp32/train.py deleted file mode 100644 index 14c1cc8d2..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/train.py +++ /dev/null @@ -1,750 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""For training NMT models.""" -from __future__ import print_function - -import math -import os -import random -import time - -import tensorflow as tf - -import attention_model -import gnmt_model -import inference -import model as nmt_model -import model_helper -from utils import misc_utils as utils -from utils import nmt_utils - -utils.check_tensorflow_version() - -__all__ = [ - "run_sample_decode", "run_internal_eval", "run_external_eval", - "run_avg_external_eval", "run_full_eval", "init_stats", "update_stats", - "print_step_info", "process_stats", "train", "get_model_creator", - "add_info_summaries", "get_best_results" -] - - -def run_sample_decode(infer_model, infer_sess, model_dir, hparams, - summary_writer, src_data, tgt_data): - """Sample decode a random sentence from src_data.""" - with infer_model.graph.as_default(): - loaded_infer_model, global_step = model_helper.create_or_load_model( - infer_model.model, model_dir, infer_sess, "infer") - - _sample_decode(loaded_infer_model, global_step, infer_sess, hparams, - infer_model.iterator, src_data, tgt_data, - infer_model.src_placeholder, - infer_model.batch_size_placeholder, summary_writer) - - -def run_internal_eval(eval_model, - eval_sess, - model_dir, - hparams, - summary_writer, - use_test_set=True, - dev_eval_iterator_feed_dict=None, - test_eval_iterator_feed_dict=None): - """Compute internal evaluation (perplexity) for both dev / test. - - Computes development and testing perplexities for given model. - - Args: - eval_model: Evaluation model for which to compute perplexities. - eval_sess: Evaluation TensorFlow session. - model_dir: Directory from which to load evaluation model from. - hparams: Model hyper-parameters. - summary_writer: Summary writer for logging metrics to TensorBoard. - use_test_set: Computes testing perplexity if true; does not otherwise. - Note that the development perplexity is always computed regardless of - value of this parameter. - dev_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - development evaluation. - test_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - testing evaluation. - Returns: - Pair containing development perplexity and testing perplexity, in this - order. - """ - if dev_eval_iterator_feed_dict is None: - dev_eval_iterator_feed_dict = {} - if test_eval_iterator_feed_dict is None: - test_eval_iterator_feed_dict = {} - with eval_model.graph.as_default(): - loaded_eval_model, global_step = model_helper.create_or_load_model( - eval_model.model, model_dir, eval_sess, "eval") - - dev_src_file = "%s.%s" % (hparams.dev_prefix, hparams.src) - dev_tgt_file = "%s.%s" % (hparams.dev_prefix, hparams.tgt) - dev_eval_iterator_feed_dict[eval_model.src_file_placeholder] = dev_src_file - dev_eval_iterator_feed_dict[eval_model.tgt_file_placeholder] = dev_tgt_file - - dev_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess, - eval_model.iterator, dev_eval_iterator_feed_dict, - summary_writer, "dev") - test_ppl = None - if use_test_set and hparams.test_prefix: - test_src_file = "%s.%s" % (hparams.test_prefix, hparams.src) - test_tgt_file = "%s.%s" % (hparams.test_prefix, hparams.tgt) - test_eval_iterator_feed_dict[ - eval_model.src_file_placeholder] = test_src_file - test_eval_iterator_feed_dict[ - eval_model.tgt_file_placeholder] = test_tgt_file - test_ppl = _internal_eval(loaded_eval_model, global_step, eval_sess, - eval_model.iterator, test_eval_iterator_feed_dict, - summary_writer, "test") - return dev_ppl, test_ppl - - -def run_external_eval(infer_model, - infer_sess, - model_dir, - hparams, - summary_writer, - save_best_dev=True, - use_test_set=True, - avg_ckpts=False, - dev_infer_iterator_feed_dict=None, - test_infer_iterator_feed_dict=None): - """Compute external evaluation for both dev / test. - - Computes development and testing external evaluation (e.g. bleu, rouge) for - given model. - - Args: - infer_model: Inference model for which to compute perplexities. - infer_sess: Inference TensorFlow session. - model_dir: Directory from which to load inference model from. - hparams: Model hyper-parameters. - summary_writer: Summary writer for logging metrics to TensorBoard. - use_test_set: Computes testing external evaluation if true; does not - otherwise. Note that the development external evaluation is always - computed regardless of value of this parameter. - dev_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - development external evaluation. - test_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - testing external evaluation. - Returns: - Triple containing development scores, testing scores and the TensorFlow - Variable for the global step number, in this order. - """ - if dev_infer_iterator_feed_dict is None: - dev_infer_iterator_feed_dict = {} - if test_infer_iterator_feed_dict is None: - test_infer_iterator_feed_dict = {} - with infer_model.graph.as_default(): - loaded_infer_model, global_step = model_helper.create_or_load_model( - infer_model.model, model_dir, infer_sess, "infer") - - dev_src_file = "%s.%s" % (hparams.dev_prefix, hparams.src) - dev_tgt_file = "%s.%s" % (hparams.dev_prefix, hparams.tgt) - dev_infer_iterator_feed_dict[ - infer_model.src_placeholder] = inference.load_data(dev_src_file) - dev_infer_iterator_feed_dict[ - infer_model.batch_size_placeholder] = hparams.infer_batch_size - dev_scores = _external_eval( - loaded_infer_model, - global_step, - infer_sess, - hparams, - infer_model.iterator, - dev_infer_iterator_feed_dict, - dev_tgt_file, - "dev", - summary_writer, - save_on_best=save_best_dev, - avg_ckpts=avg_ckpts) - - test_scores = None - if use_test_set and hparams.test_prefix: - test_src_file = "%s.%s" % (hparams.test_prefix, hparams.src) - test_tgt_file = "%s.%s" % (hparams.test_prefix, hparams.tgt) - test_infer_iterator_feed_dict[ - infer_model.src_placeholder] = inference.load_data(test_src_file) - test_infer_iterator_feed_dict[ - infer_model.batch_size_placeholder] = hparams.infer_batch_size - test_scores = _external_eval( - loaded_infer_model, - global_step, - infer_sess, - hparams, - infer_model.iterator, - test_infer_iterator_feed_dict, - test_tgt_file, - "test", - summary_writer, - save_on_best=False, - avg_ckpts=avg_ckpts) - return dev_scores, test_scores, global_step - - -def run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, - summary_writer, global_step): - """Creates an averaged checkpoint and run external eval with it.""" - avg_dev_scores, avg_test_scores = None, None - if hparams.avg_ckpts: - # Convert VariableName:0 to VariableName. - global_step_name = infer_model.model.global_step.name.split(":")[0] - avg_model_dir = model_helper.avg_checkpoints( - model_dir, hparams.num_keep_ckpts, global_step, global_step_name) - - if avg_model_dir: - avg_dev_scores, avg_test_scores, _ = run_external_eval( - infer_model, - infer_sess, - avg_model_dir, - hparams, - summary_writer, - avg_ckpts=True) - - return avg_dev_scores, avg_test_scores - - -def run_internal_and_external_eval(model_dir, - infer_model, - infer_sess, - eval_model, - eval_sess, - hparams, - summary_writer, - avg_ckpts=False, - dev_eval_iterator_feed_dict=None, - test_eval_iterator_feed_dict=None, - dev_infer_iterator_feed_dict=None, - test_infer_iterator_feed_dict=None): - """Compute internal evaluation (perplexity) for both dev / test. - - Computes development and testing perplexities for given model. - - Args: - model_dir: Directory from which to load models from. - infer_model: Inference model for which to compute perplexities. - infer_sess: Inference TensorFlow session. - eval_model: Evaluation model for which to compute perplexities. - eval_sess: Evaluation TensorFlow session. - hparams: Model hyper-parameters. - summary_writer: Summary writer for logging metrics to TensorBoard. - avg_ckpts: Whether to compute average external evaluation scores. - dev_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - internal development evaluation. - test_eval_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - internal testing evaluation. - dev_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - external development evaluation. - test_infer_iterator_feed_dict: Feed dictionary for a TensorFlow session. - Can be used to pass in additional inputs necessary for running the - external testing evaluation. - Returns: - Triple containing results summary, global step Tensorflow Variable and - metrics in this order. - """ - dev_ppl, test_ppl = run_internal_eval( - eval_model, - eval_sess, - model_dir, - hparams, - summary_writer, - dev_eval_iterator_feed_dict=dev_eval_iterator_feed_dict, - test_eval_iterator_feed_dict=test_eval_iterator_feed_dict) - dev_scores, test_scores, global_step = run_external_eval( - infer_model, - infer_sess, - model_dir, - hparams, - summary_writer, - dev_infer_iterator_feed_dict=dev_infer_iterator_feed_dict, - test_infer_iterator_feed_dict=test_infer_iterator_feed_dict) - - metrics = { - "dev_ppl": dev_ppl, - "test_ppl": test_ppl, - "dev_scores": dev_scores, - "test_scores": test_scores, - } - - avg_dev_scores, avg_test_scores = None, None - if avg_ckpts: - avg_dev_scores, avg_test_scores = run_avg_external_eval( - infer_model, infer_sess, model_dir, hparams, summary_writer, - global_step) - metrics["avg_dev_scores"] = avg_dev_scores - metrics["avg_test_scores"] = avg_test_scores - - result_summary = _format_results("dev", dev_ppl, dev_scores, hparams.metrics) - if avg_dev_scores: - result_summary += ", " + _format_results("avg_dev", None, avg_dev_scores, - hparams.metrics) - if hparams.test_prefix: - result_summary += ", " + _format_results("test", test_ppl, test_scores, - hparams.metrics) - if avg_test_scores: - result_summary += ", " + _format_results("avg_test", None, - avg_test_scores, hparams.metrics) - - return result_summary, global_step, metrics - - -def run_full_eval(model_dir, - infer_model, - infer_sess, - eval_model, - eval_sess, - hparams, - summary_writer, - sample_src_data, - sample_tgt_data, - avg_ckpts=False): - """Wrapper for running sample_decode, internal_eval and external_eval. - - Args: - model_dir: Directory from which to load models from. - infer_model: Inference model for which to compute perplexities. - infer_sess: Inference TensorFlow session. - eval_model: Evaluation model for which to compute perplexities. - eval_sess: Evaluation TensorFlow session. - hparams: Model hyper-parameters. - summary_writer: Summary writer for logging metrics to TensorBoard. - sample_src_data: sample of source data for sample decoding. - sample_tgt_data: sample of target data for sample decoding. - avg_ckpts: Whether to compute average external evaluation scores. - Returns: - Triple containing results summary, global step Tensorflow Variable and - metrics in this order. - """ - run_sample_decode(infer_model, infer_sess, model_dir, hparams, summary_writer, - sample_src_data, sample_tgt_data) - return run_internal_and_external_eval(model_dir, infer_model, infer_sess, - eval_model, eval_sess, hparams, - summary_writer, avg_ckpts) - - -def init_stats(): - """Initialize statistics that we want to accumulate.""" - return {"step_time": 0.0, "train_loss": 0.0, - "predict_count": 0.0, # word count on the target side - "word_count": 0.0, # word counts for both source and target - "sequence_count": 0.0, # number of training examples processed - "grad_norm": 0.0} - - -def update_stats(stats, start_time, step_result): - """Update stats: write summary and accumulate statistics.""" - _, output_tuple = step_result - - # Update statistics - batch_size = output_tuple.batch_size - stats["step_time"] += time.time() - start_time - stats["train_loss"] += output_tuple.train_loss * batch_size - stats["grad_norm"] += output_tuple.grad_norm - stats["predict_count"] += output_tuple.predict_count - stats["word_count"] += output_tuple.word_count - stats["sequence_count"] += batch_size - - return (output_tuple.global_step, output_tuple.learning_rate, - output_tuple.train_summary) - - -def print_step_info(prefix, global_step, info, result_summary, log_f): - """Print all info at the current global step.""" - utils.print_out( - "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" % - (prefix, global_step, info["learning_rate"], info["avg_step_time"], - info["speed"], info["train_ppl"], info["avg_grad_norm"], result_summary, - time.ctime()), - log_f) - - -def add_info_summaries(summary_writer, global_step, info): - """Add stuffs in info to summaries.""" - excluded_list = ["learning_rate"] - for key in info: - if key not in excluded_list: - utils.add_summary(summary_writer, global_step, key, info[key]) - - -def process_stats(stats, info, global_step, steps_per_stats, log_f): - """Update info and check for overflow.""" - # Per-step info - info["avg_step_time"] = stats["step_time"] / steps_per_stats - info["avg_grad_norm"] = stats["grad_norm"] / steps_per_stats - info["avg_sequence_count"] = stats["sequence_count"] / steps_per_stats - info["speed"] = stats["word_count"] / (1000 * stats["step_time"]) - - # Per-predict info - info["train_ppl"] = ( - utils.safe_exp(stats["train_loss"] / stats["predict_count"])) - - # Check for overflow - is_overflow = False - train_ppl = info["train_ppl"] - if math.isnan(train_ppl) or math.isinf(train_ppl) or train_ppl > 1e20: - utils.print_out(" step %d overflow, stop early" % global_step, - log_f) - is_overflow = True - - return is_overflow - - -def before_train(loaded_train_model, train_model, train_sess, global_step, - hparams, log_f): - """Misc tasks to do before training.""" - stats = init_stats() - info = {"train_ppl": 0.0, "speed": 0.0, - "avg_step_time": 0.0, - "avg_grad_norm": 0.0, - "avg_sequence_count": 0.0, - "learning_rate": loaded_train_model.learning_rate.eval( - session=train_sess)} - start_train_time = time.time() - utils.print_out("# Start step %d, lr %g, %s" % - (global_step, info["learning_rate"], time.ctime()), log_f) - - # Initialize all of the iterators - skip_count = hparams.batch_size * hparams.epoch_step - utils.print_out("# Init train iterator, skipping %d elements" % skip_count) - train_sess.run( - train_model.iterator.initializer, - feed_dict={train_model.skip_count_placeholder: skip_count}) - - return stats, info, start_train_time - - -def get_model_creator(hparams): - """Get the right model class depending on configuration.""" - if (hparams.encoder_type == "gnmt" or - hparams.attention_architecture in ["gnmt", "gnmt_v2"]): - model_creator = gnmt_model.GNMTModel - elif hparams.attention_architecture == "standard": - model_creator = attention_model.AttentionModel - elif not hparams.attention: - model_creator = nmt_model.Model - else: - raise ValueError("Unknown attention architecture %s" % - hparams.attention_architecture) - return model_creator - - -def train(hparams, scope=None, target_session=""): - """Train a translation model.""" - log_device_placement = hparams.log_device_placement - out_dir = hparams.out_dir - num_train_steps = hparams.num_train_steps - steps_per_stats = hparams.steps_per_stats - steps_per_external_eval = hparams.steps_per_external_eval - steps_per_eval = 10 * steps_per_stats - avg_ckpts = hparams.avg_ckpts - - if not steps_per_external_eval: - steps_per_external_eval = 5 * steps_per_eval - - # Create model - model_creator = get_model_creator(hparams) - train_model = model_helper.create_train_model(model_creator, hparams, scope) - eval_model = model_helper.create_eval_model(model_creator, hparams, scope) - infer_model = model_helper.create_infer_model(model_creator, hparams, scope) - - # Preload data for sample decoding. - dev_src_file = "%s.%s" % (hparams.dev_prefix, hparams.src) - dev_tgt_file = "%s.%s" % (hparams.dev_prefix, hparams.tgt) - sample_src_data = inference.load_data(dev_src_file) - sample_tgt_data = inference.load_data(dev_tgt_file) - - summary_name = "train_log" - model_dir = hparams.out_dir - - # Log and output files - log_file = os.path.join(out_dir, "log_%d" % time.time()) - log_f = tf.gfile.GFile(log_file, mode="a") - utils.print_out("# log_file=%s" % log_file, log_f) - - # TensorFlow model - config_proto = utils.get_config_proto( - log_device_placement=log_device_placement, - num_intra_threads=hparams.num_intra_threads, - num_inter_threads=hparams.num_inter_threads) - train_sess = tf.Session( - target=target_session, config=config_proto, graph=train_model.graph) - eval_sess = tf.Session( - target=target_session, config=config_proto, graph=eval_model.graph) - infer_sess = tf.Session( - target=target_session, config=config_proto, graph=infer_model.graph) - - with train_model.graph.as_default(): - loaded_train_model, global_step = model_helper.create_or_load_model( - train_model.model, model_dir, train_sess, "train") - - # Summary writer - summary_writer = tf.summary.FileWriter( - os.path.join(out_dir, summary_name), train_model.graph) - - # First evaluation - run_full_eval( - model_dir, infer_model, infer_sess, - eval_model, eval_sess, hparams, - summary_writer, sample_src_data, - sample_tgt_data, avg_ckpts) - - last_stats_step = global_step - last_eval_step = global_step - last_external_eval_step = global_step - - # This is the training loop. - stats, info, start_train_time = before_train( - loaded_train_model, train_model, train_sess, global_step, hparams, log_f) - while global_step < num_train_steps: - # Run a step - start_time = time.time() - try: - step_result = loaded_train_model.train(train_sess) - hparams.epoch_step += 1 - except tf.errors.OutOfRangeError: - # Finished going through the training dataset. Go to next epoch. - hparams.epoch_step = 0 - utils.print_out( - "# Finished an epoch, step %d. Perform external evaluation" % - global_step) - run_sample_decode(infer_model, infer_sess, model_dir, hparams, - summary_writer, sample_src_data, sample_tgt_data) - run_external_eval(infer_model, infer_sess, model_dir, hparams, - summary_writer) - - if avg_ckpts: - run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, - summary_writer, global_step) - - train_sess.run( - train_model.iterator.initializer, - feed_dict={train_model.skip_count_placeholder: 0}) - continue - - # Process step_result, accumulate stats, and write summary - global_step, info["learning_rate"], step_summary = update_stats( - stats, start_time, step_result) - summary_writer.add_summary(step_summary, global_step) - - # Once in a while, we print statistics. - if global_step - last_stats_step >= steps_per_stats: - last_stats_step = global_step - is_overflow = process_stats( - stats, info, global_step, steps_per_stats, log_f) - print_step_info(" ", global_step, info, get_best_results(hparams), - log_f) - if is_overflow: - break - - # Reset statistics - stats = init_stats() - - if global_step - last_eval_step >= steps_per_eval: - last_eval_step = global_step - utils.print_out("# Save eval, global step %d" % global_step) - add_info_summaries(summary_writer, global_step, info) - - # Save checkpoint - loaded_train_model.saver.save( - train_sess, - os.path.join(out_dir, "translate.ckpt"), - global_step=global_step) - - # Evaluate on dev/test - run_sample_decode(infer_model, infer_sess, - model_dir, hparams, summary_writer, sample_src_data, - sample_tgt_data) - run_internal_eval( - eval_model, eval_sess, model_dir, hparams, summary_writer) - - if global_step - last_external_eval_step >= steps_per_external_eval: - last_external_eval_step = global_step - - # Save checkpoint - loaded_train_model.saver.save( - train_sess, - os.path.join(out_dir, "translate.ckpt"), - global_step=global_step) - run_sample_decode(infer_model, infer_sess, - model_dir, hparams, summary_writer, sample_src_data, - sample_tgt_data) - run_external_eval( - infer_model, infer_sess, model_dir, - hparams, summary_writer) - - if avg_ckpts: - run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, - summary_writer, global_step) - - # Done training - loaded_train_model.saver.save( - train_sess, - os.path.join(out_dir, "translate.ckpt"), - global_step=global_step) - - (result_summary, _, final_eval_metrics) = ( - run_full_eval( - model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, - summary_writer, sample_src_data, sample_tgt_data, avg_ckpts)) - print_step_info("# Final, ", global_step, info, result_summary, log_f) - utils.print_time("# Done training!", start_train_time) - - summary_writer.close() - - utils.print_out("# Start evaluating saved best models.") - for metric in hparams.metrics: - best_model_dir = getattr(hparams, "best_" + metric + "_dir") - summary_writer = tf.summary.FileWriter( - os.path.join(best_model_dir, summary_name), infer_model.graph) - result_summary, best_global_step, _ = run_full_eval( - best_model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, - summary_writer, sample_src_data, sample_tgt_data) - print_step_info("# Best %s, " % metric, best_global_step, info, - result_summary, log_f) - summary_writer.close() - - if avg_ckpts: - best_model_dir = getattr(hparams, "avg_best_" + metric + "_dir") - summary_writer = tf.summary.FileWriter( - os.path.join(best_model_dir, summary_name), infer_model.graph) - result_summary, best_global_step, _ = run_full_eval( - best_model_dir, infer_model, infer_sess, eval_model, eval_sess, - hparams, summary_writer, sample_src_data, sample_tgt_data) - print_step_info("# Averaged Best %s, " % metric, best_global_step, info, - result_summary, log_f) - summary_writer.close() - - return final_eval_metrics, global_step - - -def _format_results(name, ppl, scores, metrics): - """Format results.""" - result_str = "" - if ppl: - result_str = "%s ppl %.2f" % (name, ppl) - if scores: - for metric in metrics: - if result_str: - result_str += ", %s %s %.1f" % (name, metric, scores[metric]) - else: - result_str = "%s %s %.1f" % (name, metric, scores[metric]) - return result_str - - -def get_best_results(hparams): - """Summary of the current best results.""" - tokens = [] - for metric in hparams.metrics: - tokens.append("%s %.2f" % (metric, getattr(hparams, "best_" + metric))) - return ", ".join(tokens) - - -def _internal_eval(model, global_step, sess, iterator, iterator_feed_dict, - summary_writer, label): - """Computing perplexity.""" - sess.run(iterator.initializer, feed_dict=iterator_feed_dict) - ppl = model_helper.compute_perplexity(model, sess, label) - utils.add_summary(summary_writer, global_step, "%s_ppl" % label, ppl) - return ppl - - -def _sample_decode(model, global_step, sess, hparams, iterator, src_data, - tgt_data, iterator_src_placeholder, - iterator_batch_size_placeholder, summary_writer): - """Pick a sentence and decode.""" - decode_id = random.randint(0, len(src_data) - 1) - utils.print_out(" # %d" % decode_id) - - iterator_feed_dict = { - iterator_src_placeholder: [src_data[decode_id]], - iterator_batch_size_placeholder: 1, - } - sess.run(iterator.initializer, feed_dict=iterator_feed_dict) - - nmt_outputs, attention_summary = model.decode(sess) - - if hparams.infer_mode == "beam_search": - # get the top translation. - nmt_outputs = nmt_outputs[0] - - translation = nmt_utils.get_translation( - nmt_outputs, - sent_id=0, - tgt_eos=hparams.eos, - subword_option=hparams.subword_option) - utils.print_out(" src: %s" % src_data[decode_id]) - utils.print_out(" ref: %s" % tgt_data[decode_id]) - utils.print_out(b" nmt: " + translation) - - # Summary - if attention_summary is not None: - summary_writer.add_summary(attention_summary, global_step) - - -def _external_eval(model, global_step, sess, hparams, iterator, - iterator_feed_dict, tgt_file, label, summary_writer, - save_on_best, avg_ckpts=False): - """External evaluation such as BLEU and ROUGE scores.""" - out_dir = hparams.out_dir - decode = global_step > 0 - - if avg_ckpts: - label = "avg_" + label - - if decode: - utils.print_out("# External evaluation, global step %d" % global_step) - - sess.run(iterator.initializer, feed_dict=iterator_feed_dict) - - output = os.path.join(out_dir, "output_%s" % label) - scores, _, _ = nmt_utils.decode_and_evaluate( - label, - model, - sess, - output, - ref_file=tgt_file, - metrics=hparams.metrics, - subword_option=hparams.subword_option, - beam_width=hparams.beam_width, - tgt_eos=hparams.eos, - decode=decode, - infer_mode=hparams.infer_mode) - # Save on best metrics - if decode: - for metric in hparams.metrics: - if avg_ckpts: - best_metric_label = "avg_best_" + metric - else: - best_metric_label = "best_" + metric - - utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), - scores[metric]) - # metric: larger is better - if save_on_best and scores[metric] > getattr(hparams, best_metric_label): - setattr(hparams, best_metric_label, scores[metric]) - model.saver.save( - sess, - os.path.join( - getattr(hparams, best_metric_label + "_dir"), "translate.ckpt"), - global_step=model.global_step) - utils.save_hparams(out_dir, hparams) - return scores diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/__init__.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/evaluation_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/evaluation_utils.py deleted file mode 100644 index b779159de..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/evaluation_utils.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility for evaluating various tasks, e.g., translation & summarization.""" -import codecs -import os -import re -import subprocess -import shlex - -import tensorflow as tf - -from fp32.scripts import bleu -from fp32.scripts import rouge - - -__all__ = ["evaluate"] - - -def evaluate(ref_file, trans_file, metric, subword_option=None): - """Pick a metric and evaluate depending on task.""" - # BLEU scores for translation task - if metric.lower() == "bleu": - evaluation_score = _bleu(ref_file, trans_file, - subword_option=subword_option) - # ROUGE scores for summarization tasks - elif metric.lower() == "rouge": - evaluation_score = _rouge(ref_file, trans_file, - subword_option=subword_option) - elif metric.lower() == "accuracy": - evaluation_score = _accuracy(ref_file, trans_file) - elif metric.lower() == "word_accuracy": - evaluation_score = _word_accuracy(ref_file, trans_file) - else: - raise ValueError("Unknown metric %s" % metric) - - return evaluation_score - - -def _clean(sentence, subword_option): - """Clean and handle BPE or SPM outputs.""" - sentence = sentence.strip() - - # BPE - if subword_option == "bpe": - sentence = re.sub("@@ ", "", sentence) - - # SPM - elif subword_option == "spm": - sentence = u"".join(sentence.split()).replace(u"\u2581", u" ").lstrip() - - return sentence - - -# Follow //transconsole/localization/machine_translation/metrics/bleu_calc.py -def _bleu(ref_file, trans_file, subword_option=None): - """Compute BLEU scores and handling BPE.""" - max_order = 4 - smooth = False - - ref_files = [ref_file] - reference_text = [] - for reference_filename in ref_files: - with codecs.getreader("utf-8")( - tf.gfile.GFile(reference_filename, "rb")) as fh: - reference_text.append(fh.readlines()) - - per_segment_references = [] - for references in zip(*reference_text): - reference_list = [] - for reference in references: - reference = _clean(reference, subword_option) - reference_list.append(reference.split(" ")) - per_segment_references.append(reference_list) - - translations = [] - with codecs.getreader("utf-8")(tf.gfile.GFile(trans_file, "rb")) as fh: - for line in fh: - line = _clean(line, subword_option=None) - translations.append(line.split(" ")) - - # bleu_score, precisions, bp, ratio, translation_length, reference_length - bleu_score, _, _, _, _, _ = bleu.compute_bleu( - per_segment_references, translations, max_order, smooth) - return 100 * bleu_score - - -def _rouge(ref_file, summarization_file, subword_option=None): - """Compute ROUGE scores and handling BPE.""" - - references = [] - with codecs.getreader("utf-8")(tf.gfile.GFile(ref_file, "rb")) as fh: - for line in fh: - references.append(_clean(line, subword_option)) - - hypotheses = [] - with codecs.getreader("utf-8")( - tf.gfile.GFile(summarization_file, "rb")) as fh: - for line in fh: - hypotheses.append(_clean(line, subword_option=None)) - - rouge_score_map = rouge.rouge(hypotheses, references) - return 100 * rouge_score_map["rouge_l/f_score"] - - -def _accuracy(label_file, pred_file): - """Compute accuracy, each line contains a label.""" - - with codecs.getreader("utf-8")(tf.gfile.GFile(label_file, "rb")) as label_fh: - with codecs.getreader("utf-8")(tf.gfile.GFile(pred_file, "rb")) as pred_fh: - count = 0.0 - match = 0.0 - for label in label_fh: - label = label.strip() - pred = pred_fh.readline().strip() - if label == pred: - match += 1 - count += 1 - return 100 * match / count - - -def _word_accuracy(label_file, pred_file): - """Compute accuracy on per word basis.""" - - with codecs.getreader("utf-8")(tf.gfile.GFile(label_file, "r")) as label_fh: - with codecs.getreader("utf-8")(tf.gfile.GFile(pred_file, "r")) as pred_fh: - total_acc, total_count = 0., 0. - for sentence in label_fh: - labels = sentence.strip().split(" ") - preds = pred_fh.readline().strip().split(" ") - match = 0.0 - for pos in range(min(len(labels), len(preds))): - label = labels[pos] - pred = preds[pos] - if label == pred: - match += 1 - total_acc += 100 * match / max(len(labels), len(preds)) - total_count += 1 - return total_acc / total_count - - -def _moses_bleu(multi_bleu_script, tgt_test, trans_file, subword_option=None): - """Compute BLEU scores using Moses multi-bleu.perl script.""" - - # TODO(thangluong): perform rewrite using python - # BPE - if subword_option == "bpe": - debpe_tgt_test = tgt_test + ".debpe" - if not os.path.exists(debpe_tgt_test): - subprocess.call(shlex.split("cp %s %s" % (tgt_test, debpe_tgt_test)), shell=False) - subprocess.call(shlex.split("sed s/@@ //g %s" % (debpe_tgt_test)), - shell=False) - tgt_test = debpe_tgt_test - elif subword_option == "spm": - despm_tgt_test = tgt_test + ".despm" - if not os.path.exists(despm_tgt_test): - subprocess.call("cp %s %s" % (tgt_test, despm_tgt_test)) - subprocess.call("sed s/ //g %s" % (despm_tgt_test)) - subprocess.call(u"sed s/^\u2581/g %s" % (despm_tgt_test)) - subprocess.call(u"sed s/\u2581/ /g %s" % (despm_tgt_test)) - tgt_test = despm_tgt_test - cmd = "%s %s < %s" % (multi_bleu_script, tgt_test, trans_file) - - # subprocess - bleu_output = subprocess.check_output(shlex.split(cmd), shell=False) - - # extract BLEU score - m = re.search("BLEU = (.+?),", bleu_output) - bleu_score = float(m.group(1)) - - return bleu_score diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/iterator_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/iterator_utils.py deleted file mode 100644 index 91b7f70d7..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/iterator_utils.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""For loading data into NMT models.""" -from __future__ import print_function - -import collections - -import tensorflow as tf - -from fp32.utils import vocab_utils - - -__all__ = ["BatchedInput", "get_iterator", "get_infer_iterator"] - - -# NOTE(ebrevdo): When we subclass this, instances' __dict__ becomes empty. -class BatchedInput( - collections.namedtuple("BatchedInput", - ("initializer", "source", "target_input", - "target_output", "source_sequence_length", - "target_sequence_length"))): - pass - - -def get_infer_iterator(src_dataset, - src_vocab_table, - batch_size, - eos, - src_max_len=None, - use_char_encode=False): - if use_char_encode: - src_eos_id = vocab_utils.EOS_CHAR_ID - else: - src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(eos)), tf.int32) - src_dataset = src_dataset.map(lambda src: tf.string_split([src]).values) - - if src_max_len: - src_dataset = src_dataset.map(lambda src: src[:src_max_len]) - - if use_char_encode: - # Convert the word strings to character ids - src_dataset = src_dataset.map( - lambda src: tf.reshape(vocab_utils.tokens_to_bytes(src), [-1])) - else: - # Convert the word strings to ids - src_dataset = src_dataset.map( - lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32)) - - # Add in the word counts. - if use_char_encode: - src_dataset = src_dataset.map( - lambda src: (src, - tf.to_int32( - tf.size(src) / vocab_utils.DEFAULT_CHAR_MAXLEN))) - else: - src_dataset = src_dataset.map(lambda src: (src, tf.size(src))) - - def batching_func(x): - return x.padded_batch( - batch_size, - # The entry is the source line rows; - # this has unknown-length vectors. The last entry is - # the source row size; this is a scalar. - padded_shapes=( - tf.TensorShape([None]), # src - tf.TensorShape([])), # src_len - # Pad the source sequences with eos tokens. - # (Though notice we don't generally need to do this since - # later on we will be masking out calculations past the true sequence. - padding_values=( - src_eos_id, # src - 0)) # src_len -- unused - - batched_dataset = batching_func(src_dataset) - batched_iter = batched_dataset.make_initializable_iterator() - (src_ids, src_seq_len) = batched_iter.get_next() - return BatchedInput( - initializer=batched_iter.initializer, - source=src_ids, - target_input=None, - target_output=None, - source_sequence_length=src_seq_len, - target_sequence_length=None) - - -def get_iterator(src_dataset, - tgt_dataset, - src_vocab_table, - tgt_vocab_table, - batch_size, - sos, - eos, - random_seed, - num_buckets, - src_max_len=None, - tgt_max_len=None, - num_parallel_calls=4, - output_buffer_size=None, - skip_count=None, - num_shards=1, - shard_index=0, - reshuffle_each_iteration=True, - use_char_encode=False): - if not output_buffer_size: - output_buffer_size = batch_size * 1000 - - if use_char_encode: - src_eos_id = vocab_utils.EOS_CHAR_ID - else: - src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(eos)), tf.int32) - - tgt_sos_id = tf.cast(tgt_vocab_table.lookup(tf.constant(sos)), tf.int32) - tgt_eos_id = tf.cast(tgt_vocab_table.lookup(tf.constant(eos)), tf.int32) - - src_tgt_dataset = tf.data.Dataset.zip((src_dataset, tgt_dataset)) - - src_tgt_dataset = src_tgt_dataset.shard(num_shards, shard_index) - if skip_count is not None: - src_tgt_dataset = src_tgt_dataset.skip(skip_count) - - src_tgt_dataset = src_tgt_dataset.shuffle( - output_buffer_size, random_seed, reshuffle_each_iteration) - - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: ( - tf.string_split([src]).values, tf.string_split([tgt]).values), - num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size) - - # Filter zero length input sequences. - src_tgt_dataset = src_tgt_dataset.filter( - lambda src, tgt: tf.logical_and(tf.size(src) > 0, tf.size(tgt) > 0)) - - if src_max_len: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: (src[:src_max_len], tgt), - num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size) - if tgt_max_len: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: (src, tgt[:tgt_max_len]), - num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size) - - # Convert the word strings to ids. Word strings that are not in the - # vocab get the lookup table's default_value integer. - if use_char_encode: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: (tf.reshape(vocab_utils.tokens_to_bytes(src), [-1]), - tf.cast(tgt_vocab_table.lookup(tgt), tf.int32)), - num_parallel_calls=num_parallel_calls) - else: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: (tf.cast(src_vocab_table.lookup(src), tf.int32), - tf.cast(tgt_vocab_table.lookup(tgt), tf.int32)), - num_parallel_calls=num_parallel_calls) - - src_tgt_dataset = src_tgt_dataset.prefetch(output_buffer_size) - # Create a tgt_input prefixed with and a tgt_output suffixed with . - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt: (src, - tf.concat(([tgt_sos_id], tgt), 0), - tf.concat((tgt, [tgt_eos_id]), 0)), - num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size) - # Add in sequence lengths. - if use_char_encode: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt_in, tgt_out: ( - src, tgt_in, tgt_out, - tf.to_int32(tf.size(src) / vocab_utils.DEFAULT_CHAR_MAXLEN), - tf.size(tgt_in)), - num_parallel_calls=num_parallel_calls) - else: - src_tgt_dataset = src_tgt_dataset.map( - lambda src, tgt_in, tgt_out: ( - src, tgt_in, tgt_out, tf.size(src), tf.size(tgt_in)), - num_parallel_calls=num_parallel_calls) - - src_tgt_dataset = src_tgt_dataset.prefetch(output_buffer_size) - - # Bucket by source sequence length (buckets for lengths 0-9, 10-19, ...) - def batching_func(x): - return x.padded_batch( - batch_size, - # The first three entries are the source and target line rows; - # these have unknown-length vectors. The last two entries are - # the source and target row sizes; these are scalars. - padded_shapes=( - tf.TensorShape([None]), # src - tf.TensorShape([None]), # tgt_input - tf.TensorShape([None]), # tgt_output - tf.TensorShape([]), # src_len - tf.TensorShape([])), # tgt_len - # Pad the source and target sequences with eos tokens. - # (Though notice we don't generally need to do this since - # later on we will be masking out calculations past the true sequence. - padding_values=( - src_eos_id, # src - tgt_eos_id, # tgt_input - tgt_eos_id, # tgt_output - 0, # src_len -- unused - 0)) # tgt_len -- unused - - if num_buckets > 1: - - def key_func(unused_1, unused_2, unused_3, src_len, tgt_len): - # Calculate bucket_width by maximum source sequence length. - # Pairs with length [0, bucket_width) go to bucket 0, length - # [bucket_width, 2 * bucket_width) go to bucket 1, etc. Pairs with length - # over ((num_bucket-1) * bucket_width) words all go into the last bucket. - if src_max_len: - bucket_width = (src_max_len + num_buckets - 1) // num_buckets - else: - bucket_width = 10 - - # Bucket sentence pairs by the length of their source sentence and target - # sentence. - bucket_id = tf.maximum(src_len // bucket_width, tgt_len // bucket_width) - return tf.to_int64(tf.minimum(num_buckets, bucket_id)) - - def reduce_func(unused_key, windowed_data): - return batching_func(windowed_data) - - batched_dataset = src_tgt_dataset.apply( - tf.contrib.data.group_by_window( - key_func=key_func, reduce_func=reduce_func, window_size=batch_size)) - - else: - batched_dataset = batching_func(src_tgt_dataset) - batched_iter = batched_dataset.make_initializable_iterator() - (src_ids, tgt_input_ids, tgt_output_ids, src_seq_len, - tgt_seq_len) = (batched_iter.get_next()) - return BatchedInput( - initializer=batched_iter.initializer, - source=src_ids, - target_input=tgt_input_ids, - target_output=tgt_output_ids, - source_sequence_length=src_seq_len, - target_sequence_length=tgt_seq_len) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/misc_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/misc_utils.py deleted file mode 100644 index 5f3c18ba9..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/misc_utils.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generally useful utility functions.""" -from __future__ import print_function - -import codecs -import collections -import json -import math -import os -import sys -import time -from distutils import version - -import numpy as np -from distutils.version import LooseVersion, StrictVersion -import tensorflow as tf - - -def check_tensorflow_version(): - min_tf_version = "1.4.0-dev20171024" - if (version.LooseVersion(tf.__version__) < - version.LooseVersion(min_tf_version)): - raise EnvironmentError("Tensorflow version must >= %s" % min_tf_version) - - -def safe_exp(value): - """Exponentiation with catching of overflow error.""" - try: - ans = math.exp(value) - except OverflowError: - ans = float("inf") - return ans - - -def print_time(s, start_time): - """Take a start time, print elapsed duration, and return a new time.""" - print("%s, time %ds, %s." % (s, (time.time() - start_time), time.ctime())) - sys.stdout.flush() - return time.time() - - -def print_out(s, f=None, new_line=True): - """Similar to print but with support to flush and output to a file.""" - if isinstance(s, bytes): - s = s.decode("utf-8") - - if f: - f.write(s.encode("utf-8")) - if new_line: - f.write(b"\n") - - # stdout - out_s = s.encode("utf-8") - if not isinstance(out_s, str): - out_s = out_s.decode("utf-8") - print(out_s, end="", file=sys.stdout) - - if new_line: - sys.stdout.write("\n") - sys.stdout.flush() - - -def print_hparams(hparams, skip_patterns=None, header=None): - """Print hparams, can skip keys based on pattern.""" - if header: - print_out("%s" % header) - values = hparams.values() - for key in sorted(values.keys()): - if not skip_patterns or all( - [skip_pattern not in key for skip_pattern in skip_patterns]): - print_out(" %s=%s" % (key, str(values[key]))) - - -def load_hparams(model_dir): - """Load hparams from an existing model directory.""" - hparams_file = os.path.join(model_dir, "hparams") - if tf.gfile.Exists(hparams_file): - print_out("# Loading hparams from %s" % hparams_file) - with codecs.getreader("utf-8")(tf.gfile.GFile(hparams_file, "rb")) as f: - try: - hparams_values = json.load(f) - hparams = tf.contrib.training.HParams(**hparams_values) - except ValueError: - print_out(" can't load hparams file") - return None - return hparams - else: - return None - - -def maybe_parse_standard_hparams(hparams, hparams_path): - """Override hparams values with existing standard hparams config.""" - if hparams_path and tf.gfile.Exists(hparams_path): - print_out("# Loading standard hparams from %s" % hparams_path) - with codecs.getreader("utf-8")(tf.gfile.GFile(hparams_path, "rb")) as f: - hparams.parse_json(f.read()) - return hparams - - -def save_hparams(out_dir, hparams): - """Save hparams.""" - hparams_file = os.path.join(out_dir, "hparams") - print_out(" saving hparams to %s" % hparams_file) - with codecs.getwriter("utf-8")(tf.gfile.GFile(hparams_file, "wb")) as f: - f.write(hparams.to_json(indent=4, sort_keys=True)) - - -def debug_tensor(s, msg=None, summarize=10): - """Print the shape and value of a tensor at test time. Return a new tensor.""" - if not msg: - msg = s.name - return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize) - - -def add_summary(summary_writer, global_step, tag, value): - """Add a new summary to the current summary_writer. - Useful to log things that are not part of the training graph, e.g., tag=BLEU. - """ - summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) - summary_writer.add_summary(summary, global_step) - - -def get_config_proto(log_device_placement=False, allow_soft_placement=True, - num_intra_threads=0, num_inter_threads=0): - # GPU options: - # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html - config_proto = tf.ConfigProto( - log_device_placement=log_device_placement, - allow_soft_placement=allow_soft_placement) - config_proto.gpu_options.allow_growth = True - - # CPU threads options - if num_intra_threads: - config_proto.intra_op_parallelism_threads = num_intra_threads - if num_inter_threads: - config_proto.inter_op_parallelism_threads = num_inter_threads - - return config_proto - - -def format_text(words): - """Convert a sequence words into sentence.""" - if (not hasattr(words, "__len__") and # for numpy array - not isinstance(words, collections.Iterable)): - words = [words] - return b" ".join(words) - - -def format_bpe_text(symbols, delimiter=b"@@"): - """Convert a sequence of bpe words into sentence.""" - words = [] - word = b"" - if isinstance(symbols, str): - symbols = symbols.encode() - delimiter_len = len(delimiter) - for symbol in symbols: - if len(symbol) >= delimiter_len and symbol[-delimiter_len:] == delimiter: - word += symbol[:-delimiter_len] - else: # end of a word - word += symbol - words.append(word) - word = b"" - return b" ".join(words) - - -def format_spm_text(symbols): - """Decode a text in SPM (https://github.com/google/sentencepiece) format.""" - return u"".join(format_text(symbols).decode("utf-8").split()).replace( - u"\u2581", u" ").strip().encode("utf-8") diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/nmt_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/nmt_utils.py deleted file mode 100644 index cf9974756..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/nmt_utils.py +++ /dev/null @@ -1,144 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - - -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions specifically for NMT.""" -from __future__ import print_function - -import codecs -import time -import numpy as np -import tensorflow as tf - -from fp32.utils import evaluation_utils -from fp32.utils import misc_utils as utils - -__all__ = ["decode_and_evaluate", "get_translation"] - - -def decode_and_evaluate(name, - model, - sess, - trans_file, - ref_file, - metrics, - subword_option, - beam_width, - tgt_eos, - num_translations_per_input=1, - decode=True, - infer_mode="greedy", - index_pair=[]): - """Decode a test set and compute a score according to the evaluation task.""" - # Decode - end_time = None - num_sentences = None - if decode: - utils.print_out(" decoding to output %s" % trans_file) - - start_time = time.time() - num_sentences = 0 - with codecs.getwriter("utf-8")( - tf.gfile.GFile(trans_file, mode="wb")) as trans_f: - trans_f.write("") # Write empty string to ensure file is created. - - if infer_mode == "greedy": - num_translations_per_input = 1 - elif infer_mode == "beam_search": - num_translations_per_input = min(num_translations_per_input, beam_width) - translation = [] - while True: - try: - nmt_outputs, _ = model.decode(sess) - if infer_mode != "beam_search": - nmt_outputs = np.expand_dims(nmt_outputs, 0) - - batch_size = nmt_outputs.shape[1] - num_sentences += batch_size - - for sent_id in range(batch_size): - for beam_id in range(num_translations_per_input): - translation.append(get_translation( - nmt_outputs[beam_id], - sent_id, - tgt_eos=tgt_eos, - subword_option=subword_option)) - except tf.errors.OutOfRangeError: - end_time = time.time() - utils.print_time( - " done, num sentences %d, num translations per input %d" % - (num_sentences, num_translations_per_input), start_time) - break - if len(index_pair) is 0: - for sentence in translation: - trans_f.write((sentence + b"\n").decode("utf-8")) - else: - for i in index_pair: - trans_f.write((translation[index_pair[i]] + b"\n").decode("utf-8")) - - # Evaluation - evaluation_scores = {} - if ref_file and tf.gfile.Exists(trans_file): - for metric in metrics: - score = evaluation_utils.evaluate( - ref_file, - trans_file, - metric, - subword_option=subword_option) - evaluation_scores[metric] = score - utils.print_out(" %s %s: %.1f" % (metric, name, score)) - - return evaluation_scores, end_time, num_sentences - - -def get_translation(nmt_outputs, sent_id, tgt_eos, subword_option): - """Given batch decoding outputs, select a sentence and turn to text.""" - if tgt_eos: - tgt_eos = tgt_eos.encode("utf-8") - # Select a sentence - output = nmt_outputs[sent_id, :].tolist() - - # If there is an eos symbol in outputs, cut them at that point. - if tgt_eos and tgt_eos in output: - output = output[:output.index(tgt_eos)] - - if subword_option == "bpe": # BPE - translation = utils.format_bpe_text(output) - elif subword_option == "spm": # SPM - translation = utils.format_spm_text(output) - else: - translation = utils.format_text(output) - - return translation diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/standard_hparams_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/standard_hparams_utils.py deleted file mode 100644 index 55d6f76d2..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/standard_hparams_utils.py +++ /dev/null @@ -1,132 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - - -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""standard hparams utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def create_standard_hparams(): - return tf.contrib.training.HParams( - # Data - src="", - tgt="", - train_prefix="", - dev_prefix="", - test_prefix="", - vocab_prefix="", - embed_prefix="", - out_dir="", - - # Networks - num_units=512, - num_encoder_layers=2, - num_decoder_layers=2, - dropout=0.2, - unit_type="lstm", - encoder_type="bi", - residual=False, - time_major=True, - num_embeddings_partitions=0, - num_enc_emb_partitions=0, - num_dec_emb_partitions=0, - - # Attention mechanisms - attention="scaled_luong", - attention_architecture="standard", - output_attention=True, - pass_hidden_state=True, - - # Train - optimizer="sgd", - batch_size=128, - init_op="uniform", - init_weight=0.1, - max_gradient_norm=5.0, - learning_rate=1.0, - warmup_steps=0, - warmup_scheme="t2t", - decay_scheme="luong234", - colocate_gradients_with_ops=True, - num_train_steps=12000, - num_sampled_softmax=0, - - # Data constraints - num_buckets=5, - max_train=0, - src_max_len=50, - tgt_max_len=50, - src_max_len_infer=0, - tgt_max_len_infer=0, - - # Data format - sos="", - eos="", - subword_option="", - use_char_encode=False, - check_special_token=True, - - # Misc - forget_bias=1.0, - num_gpus=1, - epoch_step=0, # record where we were within an epoch. - steps_per_stats=100, - steps_per_external_eval=0, - share_vocab=False, - metrics=["bleu"], - log_device_placement=False, - random_seed=None, - # only enable beam search during inference when beam_width > 0. - beam_width=0, - length_penalty_weight=0.0, - override_loaded_hparams=True, - num_keep_ckpts=5, - avg_ckpts=False, - - # For inference - inference_indices=None, - infer_batch_size=32, - sampling_temperature=0.0, - num_translations_per_input=1, - infer_mode="greedy", - - # Language model - language_model=False, - ) diff --git a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/vocab_utils.py b/models/language_translation/tensorflow/gnmt/inference/fp32/utils/vocab_utils.py deleted file mode 100644 index 59d909b50..000000000 --- a/models/language_translation/tensorflow/gnmt/inference/fp32/utils/vocab_utils.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility to handle vocabularies.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import codecs -import os -import tensorflow as tf - -from tensorflow.python.ops import lookup_ops - -from fp32.utils import misc_utils as utils - -# word level special token -UNK = "" -SOS = "" -EOS = "" -UNK_ID = 0 - -# char ids 0-255 come from utf-8 encoding bytes -# assign 256-300 to special chars -BOS_CHAR_ID = 256 # -EOS_CHAR_ID = 257 # -BOW_CHAR_ID = 258 # -EOW_CHAR_ID = 259 # -PAD_CHAR_ID = 260 # - -DEFAULT_CHAR_MAXLEN = 50 # max number of chars for each word. - - -def _string_to_bytes(text, max_length): - """Given string and length, convert to byte seq of at most max_length. - - This process mimics docqa/elmo's preprocessing: - https://github.com/allenai/document-qa/blob/master/docqa/elmo/data.py - - Note that we make use of BOS_CHAR_ID and EOS_CHAR_ID in iterator_utils.py & - our usage differs from docqa/elmo. - - Args: - text: tf.string tensor of shape [] - max_length: max number of chars for each word. - - Returns: - A tf.int32 tensor of the byte encoded text. - """ - byte_ids = tf.to_int32(tf.decode_raw(text, tf.uint8)) - byte_ids = byte_ids[:max_length - 2] - padding = tf.fill([max_length - tf.shape(byte_ids)[0] - 2], PAD_CHAR_ID) - byte_ids = tf.concat( - [[BOW_CHAR_ID], byte_ids, [EOW_CHAR_ID], padding], axis=0) - tf.logging.info(byte_ids) - - byte_ids = tf.reshape(byte_ids, [max_length]) - tf.logging.info(byte_ids.get_shape().as_list()) - return byte_ids + 1 - - -def tokens_to_bytes(tokens): - """Given a sequence of strings, map to sequence of bytes. - - Args: - tokens: A tf.string tensor - - Returns: - A tensor of shape words.shape + [bytes_per_word] containing byte versions - of each word. - """ - bytes_per_word = DEFAULT_CHAR_MAXLEN - with tf.device("/cpu:0"): - tf.assert_rank(tokens, 1) - shape = tf.shape(tokens) - tf.logging.info(tokens) - tokens_flat = tf.reshape(tokens, [-1]) - as_bytes_flat = tf.map_fn( - fn=lambda x: _string_to_bytes(x, max_length=bytes_per_word), - elems=tokens_flat, - dtype=tf.int32, - back_prop=False) - tf.logging.info(as_bytes_flat) - as_bytes = tf.reshape(as_bytes_flat, [shape[0], bytes_per_word]) - return as_bytes - - -def load_vocab(vocab_file): - vocab = [] - with codecs.getreader("utf-8")(tf.gfile.GFile(vocab_file, "rb")) as f: - vocab_size = 0 - for word in f: - vocab_size += 1 - vocab.append(word.strip()) - return vocab, vocab_size - - -def check_vocab(vocab_file, out_dir, check_special_token=True, sos=None, - eos=None, unk=None): - """Check if vocab_file doesn't exist, create from corpus_file.""" - if tf.gfile.Exists(vocab_file): - utils.print_out("# Vocab file %s exists" % vocab_file) - vocab, vocab_size = load_vocab(vocab_file) - if check_special_token: - # Verify if the vocab starts with unk, sos, eos - # If not, prepend those tokens & generate a new vocab file - if not unk: - unk = UNK - if not sos: - sos = SOS - if not eos: - eos = EOS - assert len(vocab) >= 3 - if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: - utils.print_out("The first 3 vocab words [%s, %s, %s]" - " are not [%s, %s, %s]" % - (vocab[0], vocab[1], vocab[2], unk, sos, eos)) - vocab = [unk, sos, eos] + vocab - vocab_size += 3 - new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file)) - with codecs.getwriter("utf-8")( - tf.gfile.GFile(new_vocab_file, "wb")) as f: - for word in vocab: - f.write("%s\n" % word) - vocab_file = new_vocab_file - else: - raise ValueError("vocab_file '%s' does not exist." % vocab_file) - - vocab_size = len(vocab) - return vocab_size, vocab_file - - -def create_vocab_tables(src_vocab_file, tgt_vocab_file, share_vocab): - """Creates vocab tables for src_vocab_file and tgt_vocab_file.""" - src_vocab_table = lookup_ops.index_table_from_file( - src_vocab_file, default_value=UNK_ID) - if share_vocab: - tgt_vocab_table = src_vocab_table - else: - tgt_vocab_table = lookup_ops.index_table_from_file( - tgt_vocab_file, default_value=UNK_ID) - return src_vocab_table, tgt_vocab_table - - -def load_embed_txt(embed_file): - """Load embed_file into a python dictionary. - - Note: the embed_file should be a Glove/word2vec formatted txt file. Assuming - Here is an exampe assuming embed_size=5: - - the -0.071549 0.093459 0.023738 -0.090339 0.056123 - to 0.57346 0.5417 -0.23477 -0.3624 0.4037 - and 0.20327 0.47348 0.050877 0.002103 0.060547 - - For word2vec format, the first line will be: . - - Args: - embed_file: file path to the embedding file. - Returns: - a dictionary that maps word to vector, and the size of embedding dimensions. - """ - emb_dict = dict() - emb_size = None - - is_first_line = True - with codecs.getreader("utf-8")(tf.gfile.GFile(embed_file, "rb")) as f: - for line in f: - tokens = line.rstrip().split(" ") - if is_first_line: - is_first_line = False - if len(tokens) == 2: # header line - emb_size = int(tokens[1]) - continue - word = tokens[0] - vec = list(map(float, tokens[1:])) - emb_dict[word] = vec - if emb_size: - if emb_size != len(vec): - utils.print_out( - "Ignoring %s since embeding size is inconsistent." % word) - del emb_dict[word] - else: - emb_size = len(vec) - return emb_dict, emb_size diff --git a/models/language_translation/tensorflow/gnmt/training/__init__.py b/models/language_translation/tensorflow/gnmt/training/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/language_translation/tensorflow/gnmt/training/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/language_translation/tensorflow/gnmt/training/fp32/multi_instances.patch b/models/language_translation/tensorflow/gnmt/training/fp32/multi_instances.patch deleted file mode 100644 index 4e9a40b79..000000000 --- a/models/language_translation/tensorflow/gnmt/training/fp32/multi_instances.patch +++ /dev/null @@ -1,909 +0,0 @@ -diff --git a/nmt/inference.py b/nmt/inference.py -index 2cbef07..b9e08f3 100644 ---- a/nmt/inference.py -+++ b/nmt/inference.py -@@ -98,7 +98,10 @@ def get_model_creator(hparams): - def start_sess_and_load_model(infer_model, ckpt_path): - """Start session and load model.""" - sess = tf.Session( -- graph=infer_model.graph, config=utils.get_config_proto()) -+ graph=infer_model.graph, config=utils.get_config_proto( -+ num_intra_threads=hparams.num_intra_threads, -+ num_inter_threads=hparams.num_inter_threads -+ )) - with infer_model.graph.as_default(): - loaded_infer_model = model_helper.load_model( - infer_model.model, ckpt_path, sess, "infer") -@@ -152,12 +155,25 @@ def single_worker_inference(sess, - - # Read data - infer_data = load_data(inference_input_file, hparams) -+ infer_data_feed = infer_data -+ -+ #sort the input file if no hparams.inference_indices is defined -+ index_pair = {} -+ new_input =[] -+ if hparams.inference_indices is None: -+ start_time = time.time() -+ input_length = [(len(line.split()), i) for i, line in enumerate(infer_data)] -+ sorted_input_bylens = sorted(input_length) -+ for ni, (_, oi) in enumerate(sorted_input_bylens): -+ new_input.append(infer_data[oi]) -+ index_pair[oi] = ni -+ infer_data_feed = new_input - - with infer_model.graph.as_default(): - sess.run( - infer_model.iterator.initializer, - feed_dict={ -- infer_model.src_placeholder: infer_data, -+ infer_model.src_placeholder: infer_data_feed, - infer_model.batch_size_placeholder: hparams.infer_batch_size - }) - # Decode -@@ -172,7 +188,7 @@ def single_worker_inference(sess, - tgt_eos=hparams.eos, - subword_option=hparams.subword_option) - else: -- nmt_utils.decode_and_evaluate( -+ _, end_time, num_sentences = nmt_utils.decode_and_evaluate( - "infer", - loaded_infer_model, - sess, -@@ -183,8 +199,13 @@ def single_worker_inference(sess, - beam_width=hparams.beam_width, - tgt_eos=hparams.eos, - num_translations_per_input=hparams.num_translations_per_input, -- infer_mode=hparams.infer_mode) -- -+ infer_mode=hparams.infer_mode, -+ index_pair=index_pair) -+ duration = end_time - start_time -+ if hparams.infer_batch_size is 1: -+ print(" The latency of the model is %.4f ms/sentences" % (1000*duration /num_sentences )) -+ else: -+ print(" The throughput of the model is %.4f sentences/s" % (num_sentences / duration)) - - def multi_worker_inference(sess, - infer_model, -diff --git a/nmt/model.py b/nmt/model.py -index e0c4f4e..ed1c15d 100644 ---- a/nmt/model.py -+++ b/nmt/model.py -@@ -23,6 +23,7 @@ import collections - import numpy as np - - import tensorflow as tf -+import horovod.tensorflow as hvd - - from . import model_helper - from .utils import iterator_utils -@@ -206,11 +207,21 @@ class BaseModel(object): - else: - raise ValueError("Unknown optimizer type %s" % hparams.optimizer) - -+ # Add Horovod Distributed Optimizer -+ opt = hvd.DistributedOptimizer(opt) -+ - # Gradients -- gradients = tf.gradients( -+ #gradients = tf.gradients( -+ # self.train_loss, -+ # params, -+ # colocate_gradients_with_ops=hparams.colocate_gradients_with_ops) -+ -+ # Horovod compute_gradients -+ # Allreduce the gradients before returning them -+ gradients, variables = zip(*opt.compute_gradients( - self.train_loss, - params, -- colocate_gradients_with_ops=hparams.colocate_gradients_with_ops) -+ colocate_gradients_with_ops=hparams.colocate_gradients_with_ops)) - - clipped_grads, grad_norm_summary, grad_norm = model_helper.gradient_clip( - gradients, max_gradient_norm=hparams.max_gradient_norm) -@@ -394,9 +405,13 @@ class BaseModel(object): - - ## Loss - if self.mode != tf.contrib.learn.ModeKeys.INFER: -- with tf.device(model_helper.get_device_str(self.num_encoder_layers - 1, -- self.num_gpus)): -- loss = self._compute_loss(logits, decoder_cell_outputs) -+ #with tf.device(model_helper.get_device_str(self.num_encoder_layers - 1, -+ # self.num_gpus)): -+ # loss = self._compute_loss(logits, decoder_cell_outputs) -+ -+ # Horovod -+ loss = self._compute_loss(logits, decoder_cell_outputs) -+ - else: - loss = tf.constant(0.0) - -@@ -649,7 +664,9 @@ class BaseModel(object): - target_output = self.iterator.target_output - if self.time_major: - target_output = tf.transpose(target_output) -- max_time = self.get_max_time(target_output) -+ #max_time = self.get_max_time(target_output) -+ max_time = tf.reduce_max(self.iterator.target_sequence_length) -+ target_output = tf.slice(target_output, [0, 0], [max_time, -1]) - - crossent = self._softmax_cross_entropy_loss( - logits, decoder_cell_outputs, target_output) -diff --git a/nmt/model_helper.py b/nmt/model_helper.py -index 65e1114..7e05591 100644 ---- a/nmt/model_helper.py -+++ b/nmt/model_helper.py -@@ -115,16 +115,16 @@ def create_train_model( - # Note: One can set model_device_fn to - # `tf.train.replica_device_setter(ps_tasks)` for distributed training. - model_device_fn = None -- if extra_args: model_device_fn = extra_args.model_device_fn -- with tf.device(model_device_fn): -- model = model_creator( -- hparams, -- iterator=iterator, -- mode=tf.contrib.learn.ModeKeys.TRAIN, -- source_vocab_table=src_vocab_table, -- target_vocab_table=tgt_vocab_table, -- scope=scope, -- extra_args=extra_args) -+ #if extra_args: model_device_fn = extra_args.model_device_fn -+ #with tf.device(model_device_fn): -+ model = model_creator( -+ hparams, -+ iterator=iterator, -+ mode=tf.contrib.learn.ModeKeys.TRAIN, -+ source_vocab_table=src_vocab_table, -+ target_vocab_table=tgt_vocab_table, -+ scope=scope, -+ extra_args=extra_args) - - return TrainModel( - graph=graph, -@@ -236,10 +236,15 @@ def create_infer_model(model_creator, hparams, scope=None, extra_args=None): - - def _get_embed_device(vocab_size): - """Decide on which device to place an embed matrix given its vocab size.""" -+ ''' - if vocab_size > VOCAB_SIZE_THRESHOLD_CPU: - return "/cpu:0" - else: - return "/gpu:0" -+ ''' -+ # Horovod -+ # All device are CPU for horovod CPU training -+ return "/cpu:0" - - - def _create_pretrained_emb_from_txt( -@@ -269,9 +274,10 @@ def _create_pretrained_emb_from_txt( - emb_mat = tf.constant(emb_mat) - emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1]) - with tf.variable_scope(scope or "pretrain_embeddings", dtype=dtype) as scope: -- with tf.device(_get_embed_device(num_trainable_tokens)): -- emb_mat_var = tf.get_variable( -- "emb_mat_var", [num_trainable_tokens, emb_size]) -+ #with tf.device(_get_embed_device(num_trainable_tokens)): -+ # Horovod -+ emb_mat_var = tf.get_variable( -+ "emb_mat_var", [num_trainable_tokens, emb_size]) - return tf.concat([emb_mat_var, emb_mat_const], 0) - - -@@ -281,9 +287,10 @@ def _create_or_load_embed(embed_name, vocab_file, embed_file, - if vocab_file and embed_file: - embedding = _create_pretrained_emb_from_txt(vocab_file, embed_file) - else: -- with tf.device(_get_embed_device(vocab_size)): -- embedding = tf.get_variable( -- embed_name, [vocab_size, embed_size], dtype) -+ #with tf.device(_get_embed_device(vocab_size)): -+ #Horovod -+ embedding = tf.get_variable( -+ embed_name, [vocab_size, embed_size], dtype) - return embedding - - -@@ -545,6 +552,23 @@ def load_model(model, ckpt_path, session, name): - return model - - -+# Horovod -+def horovod_load_model(model, ckpt_path, session, name): -+ """Load model from a checkpoint.""" -+ start_time = time.time() -+ try: -+ model.saver.restore(session, ckpt_path) -+ except tf.errors.NotFoundError as e: -+ utils.print_out("Can't load checkpoint") -+ print_variables_in_ckpt(ckpt_path) -+ utils.print_out("%s" % str(e)) -+ -+ utils.print_out( -+ " loaded %s model parameters from %s, time %.2fs" % -+ (name, ckpt_path, time.time() - start_time)) -+ return model -+ -+ - def avg_checkpoints(model_dir, num_last_checkpoints, global_step, - global_step_name): - """Average the last N checkpoints in the model_dir.""" -@@ -634,6 +658,17 @@ def create_or_load_model(model, model_dir, session, name): - return model, global_step - - -+# Horovod -+def horovod_create_or_load_model(model, model_dir, session, name): -+ """Create translation model and initialize or load parameters in session.""" -+ latest_ckpt = tf.train.latest_checkpoint(model_dir) -+ if latest_ckpt: -+ model = horovod_load_model(model, latest_ckpt, session, name) -+ -+ global_step = model.global_step.eval(session=session) -+ return model, global_step -+ -+ - def compute_perplexity(model, sess, name): - """Compute perplexity of the output of the model. - -diff --git a/nmt/nmt.py b/nmt/nmt.py -index f5823d8..57fdfd3 100644 ---- a/nmt/nmt.py -+++ b/nmt/nmt.py -@@ -227,7 +227,10 @@ def add_arguments(parser): - """) - - # Misc -- parser.add_argument("--num_gpus", type=int, default=1, -+ #parser.add_argument("--num_gpus", type=int, default=1, -+ -+ # Horovod -+ parser.add_argument("--num_gpus", type=int, default=0, - help="Number of gpus in each worker.") - parser.add_argument("--log_device_placement", type="bool", nargs="?", - const=True, default=False, help="Debug GPU allocation.") -@@ -653,6 +656,14 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""): - out_dir, default_hparams, flags.hparams_path, - save_hparams=(jobid == 0)) - -+ # GPU device -+ #config_proto = utils.get_config_proto( -+ # allow_soft_placement=True, -+ # num_intra_threads=hparams.num_intra_threads, -+ # num_inter_threads=hparams.num_inter_threads) -+ #utils.print_out( -+ # "# Devices visible to TensorFlow: %s" % repr(tf.Session(config=config_proto).list_devices())) -+ - ## Train / Decode - if flags.inference_input_file: - # Inference output directory -@@ -686,7 +697,9 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""): - utils.print_out(" %s: %.1f" % (metric, score)) - else: - # Train -- train_fn(hparams, target_session=target_session) -+ # train_fn(hparams, target_session=target_session) -+ # Horovod -+ train_fn(hparams, flags, target_session=target_session) - - - def main(unused_argv): -diff --git a/nmt/standard_hparams/wmt16_gnmt_4_layer_multi_instances.json b/nmt/standard_hparams/wmt16_gnmt_4_layer_multi_instances.json -new file mode 100644 -index 0000000..f03ed11 ---- /dev/null -+++ b/nmt/standard_hparams/wmt16_gnmt_4_layer_multi_instances.json -@@ -0,0 +1,34 @@ -+{ -+ "attention": "normed_bahdanau", -+ "attention_architecture": "gnmt_v2", -+ "batch_size": 512, -+ "colocate_gradients_with_ops": true, -+ "dropout": 0.2, -+ "encoder_type": "gnmt", -+ "eos": "", -+ "forget_bias": 1.0, -+ "init_weight": 0.1, -+ "learning_rate": 0.5, -+ "max_gradient_norm": 5.0, -+ "metrics": ["bleu"], -+ "num_buckets": 5, -+ "num_encoder_layers": 4, -+ "num_decoder_layers": 4, -+ "decay_scheme": "luong234", -+ "num_units": 1024, -+ "optimizer": "sgd", -+ "residual": true, -+ "share_vocab": false, -+ "subword_option": "bpe", -+ "sos": "", -+ "src_max_len": 50, -+ "src_max_len_infer": null, -+ "steps_per_external_eval": null, -+ "steps_per_stats": 100, -+ "tgt_max_len": 50, -+ "tgt_max_len_infer": null, -+ "time_major": true, -+ "unit_type": "lstm", -+ "beam_width": 10, -+ "length_penalty_weight": 1.0 -+} -diff --git a/nmt/train.py b/nmt/train.py -index 1f06148..5777938 100644 ---- a/nmt/train.py -+++ b/nmt/train.py -@@ -21,6 +21,8 @@ import random - import time - - import tensorflow as tf -+# Horovod -+import horovod.tensorflow as hvd - - from . import attention_model - from . import gnmt_model -@@ -356,13 +358,16 @@ def update_stats(stats, start_time, step_result): - - # Update statistics - batch_size = output_tuple.batch_size -- stats["step_time"] += time.time() - start_time -+ step_time = time.time() - start_time -+ stats["step_time"] += step_time - stats["train_loss"] += output_tuple.train_loss * batch_size - stats["grad_norm"] += output_tuple.grad_norm - stats["predict_count"] += output_tuple.predict_count - stats["word_count"] += output_tuple.word_count - stats["sequence_count"] += batch_size - -+ if os.environ.get('GNMT_DEBUG_LOAD_BALANCE', None): print("Rank %2d: Train Step #%-6d batch_size: %4d, word_count: %6d, predict_count: %6d, loss = %10g, grad_norm: %10g, time: %7.3f sec wps: %7.3fK\n" % (hvd.rank(), output_tuple.global_step, batch_size, output_tuple.word_count, output_tuple.predict_count, output_tuple.train_loss, output_tuple.grad_norm, step_time, output_tuple.word_count/(step_time*1000.0)), end='') -+ - return (output_tuple.global_step, output_tuple.learning_rate, - output_tuple.train_summary) - -@@ -370,11 +375,11 @@ def update_stats(stats, start_time, step_result): - def print_step_info(prefix, global_step, info, result_summary, log_f): - """Print all info at the current global step.""" - utils.print_out( -- "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" % -+ "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s rank %s\n" % - (prefix, global_step, info["learning_rate"], info["avg_step_time"], - info["speed"], info["train_ppl"], info["avg_grad_norm"], result_summary, -- time.ctime()), -- log_f) -+ time.ctime(), hvd.rank()), -+ log_f, new_line=False) - - - def add_info_summaries(summary_writer, global_step, info): -@@ -447,7 +452,11 @@ def get_model_creator(hparams): - return model_creator - - --def train(hparams, scope=None, target_session=""): -+#def train(hparams, scope=None, target_session=""): -+def train(hparams, flags, scope=None, target_session=""): -+ # Horovod -+ hvd.init() -+ - """Train a translation model.""" - log_device_placement = hparams.log_device_placement - out_dir = hparams.out_dir -@@ -462,7 +471,7 @@ def train(hparams, scope=None, target_session=""): - - # Create model - model_creator = get_model_creator(hparams) -- train_model = model_helper.create_train_model(model_creator, hparams, scope) -+ train_model = model_helper.create_train_model(model_creator, hparams, scope, num_workers=hvd.size(), jobid=hvd.rank()) - eval_model = model_helper.create_eval_model(model_creator, hparams, scope) - infer_model = model_helper.create_infer_model(model_creator, hparams, scope) - -@@ -485,155 +494,168 @@ def train(hparams, scope=None, target_session=""): - log_device_placement=log_device_placement, - num_intra_threads=hparams.num_intra_threads, - num_inter_threads=hparams.num_inter_threads) -- train_sess = tf.Session( -- target=target_session, config=config_proto, graph=train_model.graph) -- eval_sess = tf.Session( -- target=target_session, config=config_proto, graph=eval_model.graph) -- infer_sess = tf.Session( -- target=target_session, config=config_proto, graph=infer_model.graph) - - with train_model.graph.as_default(): -- loaded_train_model, global_step = model_helper.create_or_load_model( -- train_model.model, model_dir, train_sess, "train") -- -- # Summary writer -- summary_writer = tf.summary.FileWriter( -- os.path.join(out_dir, summary_name), train_model.graph) -- -- # First evaluation -- run_full_eval( -- model_dir, infer_model, infer_sess, -- eval_model, eval_sess, hparams, -- summary_writer, sample_src_data, -- sample_tgt_data, avg_ckpts) -- -- last_stats_step = global_step -- last_eval_step = global_step -- last_external_eval_step = global_step -- -- # This is the training loop. -- stats, info, start_train_time = before_train( -- loaded_train_model, train_model, train_sess, global_step, hparams, log_f) -- while global_step < num_train_steps: -- ### Run a step ### -- start_time = time.time() -- try: -- step_result = loaded_train_model.train(train_sess) -- hparams.epoch_step += 1 -- except tf.errors.OutOfRangeError: -- # Finished going through the training dataset. Go to next epoch. -- hparams.epoch_step = 0 -- utils.print_out( -- "# Finished an epoch, step %d. Perform external evaluation" % -- global_step) -- run_sample_decode(infer_model, infer_sess, model_dir, hparams, -- summary_writer, sample_src_data, sample_tgt_data) -- run_external_eval(infer_model, infer_sess, model_dir, hparams, -- summary_writer) -+ hooks=[hvd.BroadcastGlobalVariablesHook(0)] -+ with tf.train.MonitoredTrainingSession( -+ #checkpoint_dir=flags.out_dir, -+ hooks=hooks, config=config_proto) as train_sess: - -- if avg_ckpts: -- run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, -- summary_writer, global_step) -+ eval_sess = tf.Session( -+ target=target_session, config=config_proto, graph=eval_model.graph) -+ infer_sess = tf.Session( -+ target=target_session, config=config_proto, graph=infer_model.graph) - -- train_sess.run( -- train_model.iterator.initializer, -- feed_dict={train_model.skip_count_placeholder: 0}) -- continue -+ loaded_train_model, global_step = model_helper.horovod_create_or_load_model( -+ train_model.model, model_dir, train_sess, "train") - -- # Process step_result, accumulate stats, and write summary -- global_step, info["learning_rate"], step_summary = update_stats( -- stats, start_time, step_result) -- summary_writer.add_summary(step_summary, global_step) -+ # Summary writer -+ summary_writer = tf.summary.FileWriter( -+ os.path.join(out_dir, summary_name), train_model.graph) -+ -+ ''' -+ # First evaluation -+ # Rank 0 do evaluation -+ if hvd.rank() == 0: -+ run_full_eval( -+ model_dir, infer_model, infer_sess, -+ eval_model, eval_sess, hparams, -+ summary_writer, sample_src_data, -+ sample_tgt_data, avg_ckpts) -+ ''' - -- # Once in a while, we print statistics. -- if global_step - last_stats_step >= steps_per_stats: - last_stats_step = global_step -- is_overflow = process_stats( -- stats, info, global_step, steps_per_stats, log_f) -- print_step_info(" ", global_step, info, get_best_results(hparams), -- log_f) -- if is_overflow: -- break -- -- # Reset statistics -- stats = init_stats() -- -- if global_step - last_eval_step >= steps_per_eval: - last_eval_step = global_step -- utils.print_out("# Save eval, global step %d" % global_step) -- add_info_summaries(summary_writer, global_step, info) -- -- # Save checkpoint -- loaded_train_model.saver.save( -- train_sess, -- os.path.join(out_dir, "translate.ckpt"), -- global_step=global_step) -- -- # Evaluate on dev/test -- run_sample_decode(infer_model, infer_sess, -- model_dir, hparams, summary_writer, sample_src_data, -- sample_tgt_data) -- run_internal_eval( -- eval_model, eval_sess, model_dir, hparams, summary_writer) -- -- if global_step - last_external_eval_step >= steps_per_external_eval: - last_external_eval_step = global_step - -- # Save checkpoint -- loaded_train_model.saver.save( -- train_sess, -- os.path.join(out_dir, "translate.ckpt"), -- global_step=global_step) -- run_sample_decode(infer_model, infer_sess, -- model_dir, hparams, summary_writer, sample_src_data, -- sample_tgt_data) -- run_external_eval( -- infer_model, infer_sess, model_dir, -- hparams, summary_writer) -- -- if avg_ckpts: -- run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, -- summary_writer, global_step) -- -- # Done training -- loaded_train_model.saver.save( -- train_sess, -- os.path.join(out_dir, "translate.ckpt"), -- global_step=global_step) -- -- (result_summary, _, final_eval_metrics) = ( -- run_full_eval( -- model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, -- summary_writer, sample_src_data, sample_tgt_data, avg_ckpts)) -- print_step_info("# Final, ", global_step, info, result_summary, log_f) -- utils.print_time("# Done training!", start_train_time) -- -- summary_writer.close() -- -- utils.print_out("# Start evaluating saved best models.") -- for metric in hparams.metrics: -- best_model_dir = getattr(hparams, "best_" + metric + "_dir") -- summary_writer = tf.summary.FileWriter( -- os.path.join(best_model_dir, summary_name), infer_model.graph) -- result_summary, best_global_step, _ = run_full_eval( -- best_model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, -- summary_writer, sample_src_data, sample_tgt_data) -- print_step_info("# Best %s, " % metric, best_global_step, info, -- result_summary, log_f) -- summary_writer.close() -- -- if avg_ckpts: -- best_model_dir = getattr(hparams, "avg_best_" + metric + "_dir") -- summary_writer = tf.summary.FileWriter( -- os.path.join(best_model_dir, summary_name), infer_model.graph) -- result_summary, best_global_step, _ = run_full_eval( -- best_model_dir, infer_model, infer_sess, eval_model, eval_sess, -- hparams, summary_writer, sample_src_data, sample_tgt_data) -- print_step_info("# Averaged Best %s, " % metric, best_global_step, info, -- result_summary, log_f) -- summary_writer.close() -- -- return final_eval_metrics, global_step -+ # This is the training loop. -+ stats, info, start_train_time = before_train( -+ loaded_train_model, train_model, train_sess, global_step, hparams, log_f) -+ while global_step < num_train_steps: -+ ### Run a step ### -+ start_time = time.time() -+ try: -+ step_result = loaded_train_model.train(train_sess) -+ hparams.epoch_step += 1 -+ except tf.errors.OutOfRangeError: -+ # Finished going through the training dataset. Go to next epoch. -+ hparams.epoch_step = 0 -+ utils.print_out( -+ "# Finished an epoch, step %d. Perform external evaluation" % -+ global_step) -+ run_sample_decode(infer_model, infer_sess, model_dir, hparams, -+ summary_writer, sample_src_data, sample_tgt_data) -+ run_external_eval(infer_model, infer_sess, model_dir, hparams, -+ summary_writer) -+ -+ if avg_ckpts: -+ run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, -+ summary_writer, global_step) -+ -+ train_sess.run( -+ train_model.iterator.initializer, -+ feed_dict={train_model.skip_count_placeholder: 0}) -+ continue -+ -+ # Process step_result, accumulate stats, and write summary -+ global_step, info["learning_rate"], step_summary = update_stats( -+ stats, start_time, step_result) -+ summary_writer.add_summary(step_summary, global_step) -+ -+ # Once in a while, we print statistics. -+ if global_step - last_stats_step >= steps_per_stats: -+ last_stats_step = global_step -+ is_overflow = process_stats( -+ stats, info, global_step, steps_per_stats, log_f) -+ print_step_info(" ", global_step, info, get_best_results(hparams), -+ log_f) -+ if is_overflow: -+ break -+ -+ # Reset statistics -+ stats = init_stats() -+ -+ # Horovod -+ # Only master will do it -+ #if hvd.rank() == 0: -+ if global_step - last_eval_step >= steps_per_eval: -+ last_eval_step = global_step -+ utils.print_out("# Save eval, global step %d" % global_step) -+ add_info_summaries(summary_writer, global_step, info) -+ -+ # Save checkpoint -+ loaded_train_model.saver.save( -+ train_sess._sess._sess._sess._sess, -+ os.path.join(out_dir, "translate.ckpt"), -+ global_step=global_step) -+ -+ # Evaluate on dev/test -+ run_sample_decode(infer_model, infer_sess, -+ model_dir, hparams, summary_writer, sample_src_data, -+ sample_tgt_data) -+ run_internal_eval( -+ eval_model, eval_sess, model_dir, hparams, summary_writer) -+ -+ if global_step - last_external_eval_step >= steps_per_external_eval: -+ last_external_eval_step = global_step -+ -+ # Save checkpoint -+ loaded_train_model.saver.save( -+ train_sess._sess._sess._sess._sess, -+ os.path.join(out_dir, "translate.ckpt"), -+ global_step=global_step) -+ run_sample_decode(infer_model, infer_sess, -+ model_dir, hparams, summary_writer, sample_src_data, -+ sample_tgt_data) -+ run_external_eval( -+ infer_model, infer_sess, model_dir, -+ hparams, summary_writer) -+ -+ if avg_ckpts: -+ run_avg_external_eval(infer_model, infer_sess, model_dir, hparams, -+ summary_writer, global_step) -+ -+ # Done training -+ # Now out of the training loop. Doing the rest of full and best evaluation -+ # by only rank 0 -+ if hvd.rank() == 0: -+ loaded_train_model.saver.save( -+ train_sess._sess._sess._sess._sess, -+ os.path.join(out_dir, "translate.ckpt"), -+ global_step=global_step) -+ -+ (result_summary, _, final_eval_metrics) = ( -+ run_full_eval( -+ model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, -+ summary_writer, sample_src_data, sample_tgt_data, avg_ckpts)) -+ print_step_info("# Final, ", global_step, info, result_summary, log_f) -+ utils.print_time("# Done training!", start_train_time) -+ -+ summary_writer.close() -+ -+ utils.print_out("# Start evaluating saved best models.") -+ for metric in hparams.metrics: -+ best_model_dir = getattr(hparams, "best_" + metric + "_dir") -+ summary_writer = tf.summary.FileWriter( -+ os.path.join(best_model_dir, summary_name), infer_model.graph) -+ result_summary, best_global_step, _ = run_full_eval( -+ best_model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, -+ summary_writer, sample_src_data, sample_tgt_data) -+ print_step_info("# Best %s, " % metric, best_global_step, info, -+ result_summary, log_f) -+ summary_writer.close() -+ -+ if avg_ckpts: -+ best_model_dir = getattr(hparams, "avg_best_" + metric + "_dir") -+ summary_writer = tf.summary.FileWriter( -+ os.path.join(best_model_dir, summary_name), infer_model.graph) -+ result_summary, best_global_step, _ = run_full_eval( -+ best_model_dir, infer_model, infer_sess, eval_model, eval_sess, -+ hparams, summary_writer, sample_src_data, sample_tgt_data) -+ print_step_info("# Averaged Best %s, " % metric, best_global_step, info, -+ result_summary, log_f) -+ summary_writer.close() -+ return final_eval_metrics, global_step - - - def _format_results(name, ppl, scores, metrics): -@@ -716,7 +738,7 @@ def _external_eval(model, global_step, sess, hparams, iterator, - sess.run(iterator.initializer, feed_dict=iterator_feed_dict) - - output = os.path.join(out_dir, "output_%s" % label) -- scores = nmt_utils.decode_and_evaluate( -+ scores,_,_ = nmt_utils.decode_and_evaluate( - label, - model, - sess, -diff --git a/nmt/utils/iterator_utils.py b/nmt/utils/iterator_utils.py -index 31efb11..c08ff22 100644 ---- a/nmt/utils/iterator_utils.py -+++ b/nmt/utils/iterator_utils.py -@@ -16,8 +16,11 @@ - from __future__ import print_function - - import collections -+import os - - import tensorflow as tf -+# Horovod -+import horovod.tensorflow as hvd - - from ..utils import vocab_utils - -@@ -30,7 +33,7 @@ class BatchedInput( - collections.namedtuple("BatchedInput", - ("initializer", "source", "target_input", - "target_output", "source_sequence_length", -- "target_sequence_length"))): -+ "target_sequence_length", "global_batch_size"))): - pass - - -@@ -86,13 +89,16 @@ def get_infer_iterator(src_dataset, - batched_dataset = batching_func(src_dataset) - batched_iter = batched_dataset.make_initializable_iterator() - (src_ids, src_seq_len) = batched_iter.get_next() -+ global_batch_size = tf.shape(src_seq_len)[0] -+ if os.environ.get('GNMT_DEBUG_LOAD_BALANCE', None): src_seq_len = tf.Print(src_seq_len, [tf.reduce_max(src_seq_len), global_batch_size], "Infer [Max_Src_len][Batch_Size] = ") - return BatchedInput( - initializer=batched_iter.initializer, - source=src_ids, - target_input=None, - target_output=None, - source_sequence_length=src_seq_len, -- target_sequence_length=None) -+ target_sequence_length=None, -+ global_batch_size=global_batch_size) - - - def get_iterator(src_dataset, -@@ -126,10 +132,15 @@ def get_iterator(src_dataset, - - src_tgt_dataset = tf.data.Dataset.zip((src_dataset, tgt_dataset)) - -- src_tgt_dataset = src_tgt_dataset.shard(num_shards, shard_index) - if skip_count is not None: - src_tgt_dataset = src_tgt_dataset.skip(skip_count) - -+ # Horovod requires same random_seed across all ranks -+ if num_shards > 1: -+ if not random_seed: random_seed = 1 -+ -+ print("# num_shards = {} and shard_index = {}, using random_seed = {}".format(num_shards, shard_index, random_seed)) -+ - src_tgt_dataset = src_tgt_dataset.shuffle( - output_buffer_size, random_seed, reshuffle_each_iteration) - -@@ -187,10 +198,12 @@ def get_iterator(src_dataset, - - src_tgt_dataset = src_tgt_dataset.prefetch(output_buffer_size) - -+ global_batch_size = batch_size * num_shards -+ - # Bucket by source sequence length (buckets for lengths 0-9, 10-19, ...) - def batching_func(x): - return x.padded_batch( -- batch_size, -+ batch_size * num_shards, - # The first three entries are the source and target line rows; - # these have unknown-length vectors. The last two entries are - # the source and target row sizes; these are scalars. -@@ -232,17 +245,40 @@ def get_iterator(src_dataset, - - batched_dataset = src_tgt_dataset.apply( - tf.contrib.data.group_by_window( -- key_func=key_func, reduce_func=reduce_func, window_size=batch_size)) -+ key_func=key_func, reduce_func=reduce_func, window_size=batch_size*num_shards)) - - else: - batched_dataset = batching_func(src_tgt_dataset) -+ -+ # Add global batch_size -+ batched_dataset = batched_dataset.map( -+ lambda src, tgt_in, tgt_out, src_len, tgt_len: (src, tgt_in, tgt_out, src_len, tgt_len, tf.shape(src)[0])) -+ if num_shards > 1: -+ def slice_func(src, tgt_in, tgt_out, src_len, tgt_len, global_batch): -+ -+ start = (shard_index * global_batch) // num_shards -+ end = ((shard_index + 1) * global_batch) // num_shards -+ size = end - start -+ return (tf.slice(src, [start, 0], [size, -1]), -+ tf.slice(tgt_in, [start, 0], [size, -1]), -+ tf.slice(tgt_out, [start, 0], [size, -1]), -+ tf.slice(src_len, [start], [size]), -+ tf.slice(tgt_len, [start], [size]), -+ global_batch) -+ -+ batched_dataset = batched_dataset.filter( -+ lambda src, tgt_in, tgt_out, src_len, tgt_len, global_batch: tf.math.greater_equal(global_batch, num_shards)) -+ batched_dataset = batched_dataset.map(slice_func, num_parallel_calls=num_parallel_calls).prefetch(output_buffer_size) -+ - batched_iter = batched_dataset.make_initializable_iterator() - (src_ids, tgt_input_ids, tgt_output_ids, src_seq_len, -- tgt_seq_len) = (batched_iter.get_next()) -+ tgt_seq_len, global_batch_size) = (batched_iter.get_next()) -+ if os.environ.get('GNMT_DEBUG_LOAD_BALANCE', None): src_seq_len = tf.Print(src_seq_len, [tf.reduce_max(src_seq_len), tf.reduce_max(tgt_seq_len)], "Rank %2d: Max_seq_len = " % hvd.rank()) - return BatchedInput( - initializer=batched_iter.initializer, - source=src_ids, - target_input=tgt_input_ids, - target_output=tgt_output_ids, - source_sequence_length=src_seq_len, -- target_sequence_length=tgt_seq_len) -+ target_sequence_length=tgt_seq_len, -+ global_batch_size=global_batch_size) -diff --git a/nmt/utils/misc_utils.py b/nmt/utils/misc_utils.py -index 63dc5a6..09f4727 100644 ---- a/nmt/utils/misc_utils.py -+++ b/nmt/utils/misc_utils.py -@@ -26,6 +26,7 @@ import time - from distutils import version - - import numpy as np -+from distutils.version import LooseVersion, StrictVersion - import tensorflow as tf - - -diff --git a/nmt/utils/nmt_utils.py b/nmt/utils/nmt_utils.py -index 2115de9..11164d2 100644 ---- a/nmt/utils/nmt_utils.py -+++ b/nmt/utils/nmt_utils.py -@@ -38,9 +38,12 @@ def decode_and_evaluate(name, - tgt_eos, - num_translations_per_input=1, - decode=True, -- infer_mode="greedy"): -+ infer_mode="greedy", -+ index_pair=[]): - """Decode a test set and compute a score according to the evaluation task.""" - # Decode -+ end_time = None -+ num_sentences = None - if decode: - utils.print_out(" decoding to output %s" % trans_file) - -@@ -54,7 +57,7 @@ def decode_and_evaluate(name, - num_translations_per_input = 1 - elif infer_mode == "beam_search": - num_translations_per_input = min(num_translations_per_input, beam_width) -- -+ translation = [] - while True: - try: - nmt_outputs, _ = model.decode(sess) -@@ -66,17 +69,23 @@ def decode_and_evaluate(name, - - for sent_id in range(batch_size): - for beam_id in range(num_translations_per_input): -- translation = get_translation( -+ translation.append(get_translation( - nmt_outputs[beam_id], - sent_id, - tgt_eos=tgt_eos, -- subword_option=subword_option) -- trans_f.write((translation + b"\n").decode("utf-8")) -+ subword_option=subword_option)) - except tf.errors.OutOfRangeError: -+ end_time = time.time() - utils.print_time( - " done, num sentences %d, num translations per input %d" % - (num_sentences, num_translations_per_input), start_time) - break -+ if len(index_pair) is 0: -+ for sentence in translation: -+ trans_f.write((sentence + b"\n").decode("utf-8")) -+ else: -+ for i in index_pair: -+ trans_f.write((translation[index_pair[i]] + b"\n").decode("utf-8")) - - # Evaluation - evaluation_scores = {} -@@ -90,7 +99,7 @@ def decode_and_evaluate(name, - evaluation_scores[metric] = score - utils.print_out(" %s %s: %.1f" % (metric, name, score)) - -- return evaluation_scores -+ return evaluation_scores, end_time, num_sentences - - - def get_translation(nmt_outputs, sent_id, tgt_eos, subword_option): diff --git a/models/language_translation/tensorflow/gnmt/training/requirements.txt b/models/language_translation/tensorflow/gnmt/training/requirements.txt deleted file mode 100644 index d28827329..000000000 --- a/models/language_translation/tensorflow/gnmt/training/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -horovod==0.15.1 diff --git a/models/language_translation/tensorflow/mlperf_gnmt/__init__.py b/models/language_translation/tensorflow/mlperf_gnmt/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/__init__.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/bleu.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/bleu.py new file mode 100644 index 000000000..25aa2f93e --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/bleu.py @@ -0,0 +1,172 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# Modifications copyright (C) 2019 MLPerf Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Python implementation of BLEU, smooth-BLEU and Running BLEU. + +@note The most common usage case is to invoke the function compute_bleu + +This module provides a Python implementation of BLEU and smooth-BLEU. +Smooth BLEU is computed following the method outlined in the paper: +Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic +evaluation metrics for machine translation. COLING 2004. +""" + +import collections +import math + +## +# @brief Class to compute running BLEU scores +# @detail BLEU scores can be computed in a non-linear way, +# or without having access to the full translated corpus in time. +class RunningBLEUScorer: + + def __init__(self, max_order=4, smooth=False): + self.max_order = max_order + self.smooth = smooth + self.reset() + + ## + # @brief Reset all variables (none of the previus sentences will be taken into account) + def reset(self): + self.matches_by_order = [0] * self.max_order + self.possible_matches_by_order = [0] * self.max_order + self.reference_length = 0 + self.translation_length = 0 + + ## + # @brief Add a single sentence + # @param reference list of words for a reference sentence + # @param translation list of words for its corresponding translated sentence + # @post Updates internal structures to take this sentence's translation + # result into account in final BLEU score + def add_sentence(self, reference, translation): + self.add_sentence_with_multiple_refs([reference], translation) + + ## + # @brief Add a single reference, with potentially multiple references + # @param reference list of list of words for a reference sentence + # @note That we could have multiple sentences serving as a reference + # @param translation (single) list of words for its corresponding translated sentence + # @post Updates internal structures to take this sentence's translation + # result into account in final BLEU score + def add_sentence_with_multiple_refs(self, references, translation): + self.reference_length += min(len(r) for r in references) + self.translation_length += len(translation) + + merged_ref_ngram_counts = collections.Counter() + for reference in references: + merged_ref_ngram_counts |= self._get_ngrams(reference) + + translation_ngram_counts = self._get_ngrams(translation) + + new_matches_by_order, new_possible_matches_by_order = self._get_ngram_match_values(merged_ref_ngram_counts, translation_ngram_counts, len(translation)) + + for i in range(self.max_order): + self.matches_by_order[i] += new_matches_by_order[i] + self.possible_matches_by_order[i] += new_possible_matches_by_order[i] + + ## + # @brief Calculate final BLEU score + def calc_BLEU_score(self): + precisions = [0] * self.max_order + for i in range(0, self.max_order): + if self.smooth: + precisions[i] = ((self.matches_by_order[i] + 1.) / + (self.possible_matches_by_order[i] + 1.)) + else: + if self.possible_matches_by_order[i] > 0: + precisions[i] = (float(self.matches_by_order[i]) / + self.possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if min(precisions) > 0: + p_log_sum = sum((1. / self.max_order) * math.log(p) for p in precisions) + geo_mean = math.exp(p_log_sum) + else: + geo_mean = 0 + + ratio = float(self.translation_length) / self.reference_length + + if ratio > 1.0: + bp = 1. + else: + bp = math.exp(1 - 1. / ratio) + + bleu = geo_mean * bp + + return (bleu, precisions, bp, ratio, self.translation_length, self.reference_length) + + ## + # @brief Internal function to compute matching percentages for different order ngrams + def _get_ngram_match_values(self, ref_ngram_counts, translation_ngram_counts, translation_length): + new_matches_by_order = [0] * self.max_order + new_possible_matches_by_order = [0] * self.max_order + + overlap = translation_ngram_counts & ref_ngram_counts + for ngram in overlap: + new_matches_by_order[len(ngram)-1] += overlap[ngram] + for order in range(1, self.max_order+1): + possible_matches = translation_length - order + 1 + new_possible_matches_by_order[order-1] = max(0, possible_matches) + + return (new_matches_by_order, new_possible_matches_by_order) + + def _get_ngrams(self, segment): + """Internal function to extract all n-grams upto a given maximum order from an input segment. + + Args: + segment: text segment from which n-grams will be extracted. + + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in range(1, self.max_order + 1): + for i in range(0, len(segment) - order + 1): + ngram = tuple(segment[i:i+order]) + ngram_counts[ngram] += 1 + return ngram_counts + +def compute_bleu(reference_corpus, translation_corpus, max_order=4, + smooth=False): + """Computes BLEU score of translated segments against one or more references. + This is the most common usage when calculating BLEU scores. + + Args: + reference_corpus: list of lists of references for each translation. Each + reference should be tokenized into a list of tokens. + reference_corpus[i][j][k] represents the k'th word of the i'th sentence + for the j'th reference text + translation_corpus: list of translated sentences to score. Each sentence + should be tokenized into a list of tokens. + translation_corpus[i][j] represents the j'th word for the i'th sentence + max_order: Maximum n-gram order to use when computing BLEU score. + smooth: Whether or not to apply Lin et al. 2004 smoothing. + + Returns: + 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram + precisions and brevity penalty. + """ + runningBLEU = RunningBLEUScorer(max_order=max_order, smooth=smooth) + + + for (references, translation) in zip(reference_corpus, + translation_corpus): + runningBLEU.add_sentence_with_multiple_refs(references, translation) + + return runningBLEU.calc_BLEU_score() \ No newline at end of file diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/evaluation_utils.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/evaluation_utils.py new file mode 100644 index 000000000..e5eb5fd93 --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/evaluation_utils.py @@ -0,0 +1,183 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utility for evaluating various tasks, e.g., translation & summarization.""" +import codecs +import os +import re +import subprocess +import shlex + +import tensorflow as tf + +import bleu +import rouge + + +__all__ = ["evaluate"] + + +def evaluate(ref_file, trans_file, metric, subword_option=None): + """Pick a metric and evaluate depending on task.""" + # BLEU scores for translation task + if metric.lower() == "bleu": + evaluation_score = _bleu(ref_file, trans_file, + subword_option=subword_option) + # ROUGE scores for summarization tasks + elif metric.lower() == "rouge": + evaluation_score = _rouge(ref_file, trans_file, + subword_option=subword_option) + elif metric.lower() == "accuracy": + evaluation_score = _accuracy(ref_file, trans_file) + elif metric.lower() == "word_accuracy": + evaluation_score = _word_accuracy(ref_file, trans_file) + else: + raise ValueError("Unknown metric %s" % metric) + + return evaluation_score + + +def _clean(sentence, subword_option): + """Clean and handle BPE or SPM outputs.""" + sentence = sentence.strip() + + # BPE + if subword_option == "bpe": + sentence = re.sub("@@ ", "", sentence) + + # SPM + elif subword_option == "spm": + sentence = u"".join(sentence.split()).replace(u"\u2581", u" ").lstrip() + + return sentence + + +# Follow //transconsole/localization/machine_translation/metrics/bleu_calc.py +def _bleu(ref_file, trans_file, subword_option=None): + """Compute BLEU scores and handling BPE.""" + max_order = 4 + smooth = False + + ref_files = [ref_file] + reference_text = [] + for reference_filename in ref_files: + with codecs.getreader("utf-8")( + tf.io.gfile.GFile(reference_filename, "rb")) as fh: + reference_text.append(fh.readlines()) + + per_segment_references = [] + for references in zip(*reference_text): + reference_list = [] + for reference in references: + reference = _clean(reference, subword_option) + reference_list.append(reference.split(" ")) + per_segment_references.append(reference_list) + + translations = [] + with codecs.getreader("utf-8")(tf.io.gfile.GFile(trans_file, "rb")) as fh: + for line in fh: + line = _clean(line, subword_option=None) + translations.append(line.split(" ")) + + # bleu_score, precisions, bp, ratio, translation_length, reference_length + bleu_score, _, _, _, _, _ = bleu.compute_bleu( + per_segment_references, translations, max_order, smooth) + return 100 * bleu_score + + +def _rouge(ref_file, summarization_file, subword_option=None): + """Compute ROUGE scores and handling BPE.""" + + references = [] + with codecs.getreader("utf-8")(tf.io.gfile.GFile(ref_file, "rb")) as fh: + for line in fh: + references.append(_clean(line, subword_option)) + + hypotheses = [] + with codecs.getreader("utf-8")( + tf.io.gfile.GFile(summarization_file, "rb")) as fh: + for line in fh: + hypotheses.append(_clean(line, subword_option=None)) + + rouge_score_map = rouge.rouge(hypotheses, references) + return 100 * rouge_score_map["rouge_l/f_score"] + + +def _accuracy(label_file, pred_file): + """Compute accuracy, each line contains a label.""" + + with codecs.getreader("utf-8")(tf.io.gfile.GFile(label_file, "rb")) as label_fh: + with codecs.getreader("utf-8")(tf.io.gfile.GFile(pred_file, "rb")) as pred_fh: + count = 0.0 + match = 0.0 + for label in label_fh: + label = label.strip() + pred = pred_fh.readline().strip() + if label == pred: + match += 1 + count += 1 + return 100 * match / count + + +def _word_accuracy(label_file, pred_file): + """Compute accuracy on per word basis.""" + + with codecs.getreader("utf-8")(tf.io.gfile.GFile(label_file, "r")) as label_fh: + with codecs.getreader("utf-8")(tf.io.gfile.GFile(pred_file, "r")) as pred_fh: + total_acc, total_count = 0., 0. + for sentence in label_fh: + labels = sentence.strip().split(" ") + preds = pred_fh.readline().strip().split(" ") + match = 0.0 + for pos in range(min(len(labels), len(preds))): + label = labels[pos] + pred = preds[pos] + if label == pred: + match += 1 + total_acc += 100 * match / max(len(labels), len(preds)) + total_count += 1 + return total_acc / total_count + + +def _moses_bleu(multi_bleu_script, tgt_test, trans_file, subword_option=None): + """Compute BLEU scores using Moses multi-bleu.perl script.""" + + # TODO(thangluong): perform rewrite using python + # BPE + if subword_option == "bpe": + debpe_tgt_test = tgt_test + ".debpe" + if not os.path.exists(debpe_tgt_test): + subprocess.call(shlex.split("cp %s %s" % (tgt_test, debpe_tgt_test)), shell=False) + subprocess.call(shlex.split("sed s/@@ //g %s" % (debpe_tgt_test)), + shell=False) + tgt_test = debpe_tgt_test + elif subword_option == "spm": + despm_tgt_test = tgt_test + ".despm" + if not os.path.exists(despm_tgt_test): + subprocess.call("cp %s %s" % (tgt_test, despm_tgt_test)) + subprocess.call("sed s/ //g %s" % (despm_tgt_test)) + subprocess.call(u"sed s/^\u2581/g %s" % (despm_tgt_test)) + subprocess.call(u"sed s/\u2581/ /g %s" % (despm_tgt_test)) + tgt_test = despm_tgt_test + cmd = "%s %s < %s" % (multi_bleu_script, tgt_test, trans_file) + + # subprocess + bleu_output = subprocess.check_output(shlex.split(cmd), shell=False) + + # extract BLEU score + m = re.search("BLEU = (.+?),", bleu_output) + bleu_score = float(m.group(1)) + + return bleu_score diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/hparam.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/hparam.py new file mode 100644 index 000000000..e98acfa00 --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/hparam.py @@ -0,0 +1,536 @@ +import json +import numbers +import six + +from tensorflow.python.util import compat + +def _parse_fail(name, var_type, value, values): + """Helper function for raising a value error for bad assignment.""" + raise ValueError( + 'Could not parse hparam \'%s\' of type \'%s\' with value \'%s\' in %s' % + (name, var_type.__name__, value, values)) + + +def _reuse_fail(name, values): + """Helper function for raising a value error for reuse of name.""" + raise ValueError('Multiple assignments to variable \'%s\' in %s' % (name, + values)) + + +def _process_scalar_value(name, parse_fn, var_type, m_dict, values, + results_dictionary): + """Update results_dictionary with a scalar value. + Used to update the results_dictionary to be returned by parse_values when + encountering a clause with a scalar RHS (e.g. "s=5" or "arr[0]=5".) + Mutates results_dictionary. + Args: + name: Name of variable in assignment ("s" or "arr"). + parse_fn: Function for parsing the actual value. + var_type: Type of named variable. + m_dict: Dictionary constructed from regex parsing. + m_dict['val']: RHS value (scalar) + m_dict['index']: List index value (or None) + values: Full expression being parsed + results_dictionary: The dictionary being updated for return by the parsing + function. + Raises: + ValueError: If the name has already been used. + """ + try: + parsed_value = parse_fn(m_dict['val']) + except ValueError: + _parse_fail(name, var_type, m_dict['val'], values) + + # If no index is provided + if not m_dict['index']: + if name in results_dictionary: + _reuse_fail(name, values) + results_dictionary[name] = parsed_value + else: + if name in results_dictionary: + # The name has already been used as a scalar, then it + # will be in this dictionary and map to a non-dictionary. + if not isinstance(results_dictionary.get(name), dict): + _reuse_fail(name, values) + else: + results_dictionary[name] = {} + + index = int(m_dict['index']) + # Make sure the index position hasn't already been assigned a value. + if index in results_dictionary[name]: + _reuse_fail('{}[{}]'.format(name, index), values) + results_dictionary[name][index] = parsed_value + + +def _process_list_value(name, parse_fn, var_type, m_dict, values, + results_dictionary): + """Update results_dictionary from a list of values. + Used to update results_dictionary to be returned by parse_values when + encountering a clause with a list RHS (e.g. "arr=[1,2,3]".) + Mutates results_dictionary. + Args: + name: Name of variable in assignment ("arr"). + parse_fn: Function for parsing individual values. + var_type: Type of named variable. + m_dict: Dictionary constructed from regex parsing. + m_dict['val']: RHS value (scalar) + values: Full expression being parsed + results_dictionary: The dictionary being updated for return by the parsing + function. + Raises: + ValueError: If the name has an index or the values cannot be parsed. + """ + if m_dict['index'] is not None: + raise ValueError('Assignment of a list to a list index.') + elements = filter(None, re.split('[ ,]', m_dict['vals'])) + # Make sure the name hasn't already been assigned a value + if name in results_dictionary: + raise _reuse_fail(name, values) + try: + results_dictionary[name] = [parse_fn(e) for e in elements] + except ValueError: + _parse_fail(name, var_type, m_dict['vals'], values) + + +def _cast_to_type_if_compatible(name, param_type, value): + """Cast hparam to the provided type, if compatible. + Args: + name: Name of the hparam to be cast. + param_type: The type of the hparam. + value: The value to be cast, if compatible. + Returns: + The result of casting `value` to `param_type`. + Raises: + ValueError: If the type of `value` is not compatible with param_type. + * If `param_type` is a string type, but `value` is not. + * If `param_type` is a boolean, but `value` is not, or vice versa. + * If `param_type` is an integer type, but `value` is not. + * If `param_type` is a float type, but `value` is not a numeric type. + """ + fail_msg = ( + "Could not cast hparam '%s' of type '%s' from value %r" % + (name, param_type, value)) + + # If `value` is already of type `param_type`, return it directly. + # `isinstance` is too weak (e.g. isinstance(True, int) == True). + if type(value) == param_type: # pylint: disable=unidiomatic-typecheck + return value + + # Some callers use None, for which we can't do any casting/checking. :( + if issubclass(param_type, type(None)): + return value + + # Avoid converting a non-string type to a string. + if (issubclass(param_type, (six.string_types, six.binary_type)) and + not isinstance(value, (six.string_types, six.binary_type))): + raise ValueError(fail_msg) + + # Avoid converting a number or string type to a boolean or vice versa. + if issubclass(param_type, bool) != isinstance(value, bool): + raise ValueError(fail_msg) + + # Avoid converting float to an integer (the reverse is fine). + if (issubclass(param_type, numbers.Integral) and + not isinstance(value, numbers.Integral)): + raise ValueError(fail_msg) + + # Avoid converting a non-numeric type to a numeric type. + if (issubclass(param_type, numbers.Number) and + not isinstance(value, numbers.Number)): + raise ValueError(fail_msg) + + return param_type(value) + + +def parse_values(values, type_map, ignore_unknown=False): + """Parses hyperparameter values from a string into a python map. + `values` is a string containing comma-separated `name=value` pairs. + For each pair, the value of the hyperparameter named `name` is set to + `value`. + If a hyperparameter name appears multiple times in `values`, a ValueError + is raised (e.g. 'a=1,a=2', 'a[1]=1,a[1]=2'). + If a hyperparameter name in both an index assignment and scalar assignment, + a ValueError is raised. (e.g. 'a=[1,2,3],a[0] = 1'). + The hyperparameter name may contain '.' symbols, which will result in an + attribute name that is only accessible through the getattr and setattr + functions. (And must be first explicit added through add_hparam.) + WARNING: Use of '.' in your variable names is allowed, but is not well + supported and not recommended. + The `value` in `name=value` must follows the syntax according to the + type of the parameter: + * Scalar integer: A Python-parsable integer point value. E.g.: 1, + 100, -12. + * Scalar float: A Python-parsable floating point value. E.g.: 1.0, + -.54e89. + * Boolean: Either true or false. + * Scalar string: A non-empty sequence of characters, excluding comma, + spaces, and square brackets. E.g.: foo, bar_1. + * List: A comma separated list of scalar values of the parameter type + enclosed in square brackets. E.g.: [1,2,3], [1.0,1e-12], [high,low]. + When index assignment is used, the corresponding type_map key should be the + list name. E.g. for "arr[1]=0" the type_map must have the key "arr" (not + "arr[1]"). + Args: + values: String. Comma separated list of `name=value` pairs where + 'value' must follow the syntax described above. + type_map: A dictionary mapping hyperparameter names to types. Note every + parameter name in values must be a key in type_map. The values must + conform to the types indicated, where a value V is said to conform to a + type T if either V has type T, or V is a list of elements of type T. + Hence, for a multidimensional parameter 'x' taking float values, + 'x=[0.1,0.2]' will parse successfully if type_map['x'] = float. + ignore_unknown: Bool. Whether values that are missing a type in type_map + should be ignored. If set to True, a ValueError will not be raised for + unknown hyperparameter type. + Returns: + A python map mapping each name to either: + * A scalar value. + * A list of scalar values. + * A dictionary mapping index numbers to scalar values. + (e.g. "x=5,L=[1,2],arr[1]=3" results in {'x':5,'L':[1,2],'arr':{1:3}}") + Raises: + ValueError: If there is a problem with input. + * If `values` cannot be parsed. + * If a list is assigned to a list index (e.g. 'a[1] = [1,2,3]'). + * If the same rvalue is assigned two different values (e.g. 'a=1,a=2', + 'a[1]=1,a[1]=2', or 'a=1,a=[1]') + """ + results_dictionary = {} + pos = 0 + while pos < len(values): + m = PARAM_RE.match(values, pos) + if not m: + raise ValueError('Malformed hyperparameter value: %s' % values[pos:]) + # Check that there is a comma between parameters and move past it. + pos = m.end() + # Parse the values. + m_dict = m.groupdict() + name = m_dict['name'] + if name not in type_map: + if ignore_unknown: + continue + raise ValueError('Unknown hyperparameter type for %s' % name) + type_ = type_map[name] + + # Set up correct parsing function (depending on whether type_ is a bool) + if type_ == bool: + + def parse_bool(value): + if value in ['true', 'True']: + return True + elif value in ['false', 'False']: + return False + else: + try: + return bool(int(value)) + except ValueError: + _parse_fail(name, type_, value, values) + + parse = parse_bool + else: + parse = type_ + + # If a singe value is provided + if m_dict['val'] is not None: + _process_scalar_value(name, parse, type_, m_dict, values, + results_dictionary) + + # If the assigned value is a list: + elif m_dict['vals'] is not None: + _process_list_value(name, parse, type_, m_dict, values, + results_dictionary) + + else: # Not assigned a list or value + _parse_fail(name, type_, '', values) + + return results_dictionary + + +class HParams(object): + + _HAS_DYNAMIC_ATTRIBUTES = True # Required for pytype checks. + + def __init__(self, hparam_def=None, model_structure=None, **kwargs): + self._hparam_types = {} + self._model_structure = model_structure + if hparam_def: + self._init_from_proto(hparam_def) + if kwargs: + raise ValueError('hparam_def and initialization values are ' + 'mutually exclusive') + else: + for name, value in six.iteritems(kwargs): + self.add_hparam(name, value) + + def _init_from_proto(self, hparam_def): + """Creates a new HParams from `HParamDef` protocol buffer. + Args: + hparam_def: `HParamDef` protocol buffer. + """ + assert isinstance(hparam_def, hparam_pb2.HParamDef) + for name, value in hparam_def.hparam.items(): + kind = value.WhichOneof('kind') + if kind.endswith('_value'): + # Single value. + if kind.startswith('int64'): + # Setting attribute value to be 'int' to ensure the type is compatible + # with both Python2 and Python3. + self.add_hparam(name, int(getattr(value, kind))) + elif kind.startswith('bytes'): + # Setting attribute value to be 'str' to ensure the type is compatible + # with both Python2 and Python3. UTF-8 encoding is assumed. + self.add_hparam(name, compat.as_str(getattr(value, kind))) + else: + self.add_hparam(name, getattr(value, kind)) + else: + # List of values. + if kind.startswith('int64'): + # Setting attribute value to be 'int' to ensure the type is compatible + # with both Python2 and Python3. + self.add_hparam(name, [int(v) for v in getattr(value, kind).value]) + elif kind.startswith('bytes'): + # Setting attribute value to be 'str' to ensure the type is compatible + # with both Python2 and Python3. UTF-8 encoding is assumed. + self.add_hparam( + name, [compat.as_str(v) for v in getattr(value, kind).value]) + else: + self.add_hparam(name, [v for v in getattr(value, kind).value]) + + def add_hparam(self, name, value): + """Adds {name, value} pair to hyperparameters. + Args: + name: Name of the hyperparameter. + value: Value of the hyperparameter. Can be one of the following types: + int, float, string, int list, float list, or string list. + Raises: + ValueError: if one of the arguments is invalid. + """ + # Keys in kwargs are unique, but 'name' could the name of a pre-existing + # attribute of this object. In that case we refuse to use it as a + # hyperparameter name. + if getattr(self, name, None) is not None: + raise ValueError('Hyperparameter name is reserved: %s' % name) + if isinstance(value, (list, tuple)): + if not value: + raise ValueError( + 'Multi-valued hyperparameters cannot be empty: %s' % name) + self._hparam_types[name] = (type(value[0]), True) + else: + self._hparam_types[name] = (type(value), False) + setattr(self, name, value) + + def set_hparam(self, name, value): + """Set the value of an existing hyperparameter. + This function verifies that the type of the value matches the type of the + existing hyperparameter. + Args: + name: Name of the hyperparameter. + value: New value of the hyperparameter. + Raises: + KeyError: If the hyperparameter doesn't exist. + ValueError: If there is a type mismatch. + """ + param_type, is_list = self._hparam_types[name] + if isinstance(value, list): + if not is_list: + raise ValueError( + 'Must not pass a list for single-valued parameter: %s' % name) + setattr(self, name, [ + _cast_to_type_if_compatible(name, param_type, v) for v in value]) + else: + if is_list: + raise ValueError( + 'Must pass a list for multi-valued parameter: %s.' % name) + setattr(self, name, _cast_to_type_if_compatible(name, param_type, value)) + + def del_hparam(self, name): + """Removes the hyperparameter with key 'name'. + Does nothing if it isn't present. + Args: + name: Name of the hyperparameter. + """ + if hasattr(self, name): + delattr(self, name) + del self._hparam_types[name] + + def parse(self, values): + """Override existing hyperparameter values, parsing new values from a string. + See parse_values for more detail on the allowed format for values. + Args: + values: String. Comma separated list of `name=value` pairs where 'value' + must follow the syntax described above. + Returns: + The `HParams` instance. + Raises: + ValueError: If `values` cannot be parsed or a hyperparameter in `values` + doesn't exist. + """ + type_map = {} + for name, t in self._hparam_types.items(): + param_type, _ = t + type_map[name] = param_type + + values_map = parse_values(values, type_map) + return self.override_from_dict(values_map) + + def override_from_dict(self, values_dict): + """Override existing hyperparameter values, parsing new values from a dictionary. + Args: + values_dict: Dictionary of name:value pairs. + Returns: + The `HParams` instance. + Raises: + KeyError: If a hyperparameter in `values_dict` doesn't exist. + ValueError: If `values_dict` cannot be parsed. + """ + for name, value in values_dict.items(): + self.set_hparam(name, value) + return self + + def set_model_structure(self, model_structure): + self._model_structure = model_structure + + def get_model_structure(self): + return self._model_structure + + def to_json(self, indent=None, separators=None, sort_keys=False): + """Serializes the hyperparameters into JSON. + Args: + indent: If a non-negative integer, JSON array elements and object members + will be pretty-printed with that indent level. An indent level of 0, or + negative, will only insert newlines. `None` (the default) selects the + most compact representation. + separators: Optional `(item_separator, key_separator)` tuple. Default is + `(', ', ': ')`. + sort_keys: If `True`, the output dictionaries will be sorted by key. + Returns: + A JSON string. + """ + return json.dumps( + self.values(), + indent=indent, + separators=separators, + sort_keys=sort_keys) + + def parse_json(self, values_json): + """Override existing hyperparameter values, parsing new values from a json object. + Args: + values_json: String containing a json object of name:value pairs. + Returns: + The `HParams` instance. + Raises: + KeyError: If a hyperparameter in `values_json` doesn't exist. + ValueError: If `values_json` cannot be parsed. + """ + values_map = json.loads(values_json) + return self.override_from_dict(values_map) + + def values(self): + """Return the hyperparameter values as a Python dictionary. + Returns: + A dictionary with hyperparameter names as keys. The values are the + hyperparameter values. + """ + return {n: getattr(self, n) for n in self._hparam_types.keys()} + + def get(self, key, default=None): + """Returns the value of `key` if it exists, else `default`.""" + if key in self._hparam_types: + # Ensure that default is compatible with the parameter type. + if default is not None: + param_type, is_param_list = self._hparam_types[key] + type_str = 'list<%s>' % param_type if is_param_list else str(param_type) + fail_msg = ("Hparam '%s' of type '%s' is incompatible with " + 'default=%s' % (key, type_str, default)) + + is_default_list = isinstance(default, list) + if is_param_list != is_default_list: + raise ValueError(fail_msg) + + try: + if is_default_list: + for value in default: + _cast_to_type_if_compatible(key, param_type, value) + else: + _cast_to_type_if_compatible(key, param_type, default) + except ValueError as e: + raise ValueError('%s. %s' % (fail_msg, e)) + + return getattr(self, key) + + return default + + def __contains__(self, key): + return key in self._hparam_types + + def __str__(self): + hpdict = self.values() + output_list = ['{}={}'.format(key, hpdict[key]) for key in hpdict] + return ','.join(output_list) + + def __repr__(self): + strval = str(sorted(self.values().items())) + return '%s(%s)' % (type(self).__name__, strval) + + @staticmethod + def _get_kind_name(param_type, is_list): + """Returns the field name given parameter type and is_list. + Args: + param_type: Data type of the hparam. + is_list: Whether this is a list. + Returns: + A string representation of the field name. + Raises: + ValueError: If parameter type is not recognized. + """ + if issubclass(param_type, bool): + # This check must happen before issubclass(param_type, six.integer_types), + # since Python considers bool to be a subclass of int. + typename = 'bool' + elif issubclass(param_type, six.integer_types): + # Setting 'int' and 'long' types to be 'int64' to ensure the type is + # compatible with both Python2 and Python3. + typename = 'int64' + elif issubclass(param_type, (six.string_types, six.binary_type)): + # Setting 'string' and 'bytes' types to be 'bytes' to ensure the type is + # compatible with both Python2 and Python3. + typename = 'bytes' + elif issubclass(param_type, float): + typename = 'float' + else: + raise ValueError('Unsupported parameter type: %s' % str(param_type)) + + suffix = 'list' if is_list else 'value' + return '_'.join([typename, suffix]) + + def to_proto(self, export_scope=None): # pylint: disable=unused-argument + """Converts a `HParams` object to a `HParamDef` protocol buffer. + Args: + export_scope: Optional `string`. Name scope to remove. + Returns: + A `HParamDef` protocol buffer. + """ + hparam_proto = hparam_pb2.HParamDef() + for name in self._hparam_types: + # Parse the values. + param_type, is_list = self._hparam_types.get(name, (None, None)) + kind = HParams._get_kind_name(param_type, is_list) + + if is_list: + if kind.startswith('bytes'): + v_list = [compat.as_bytes(v) for v in getattr(self, name)] + else: + v_list = [v for v in getattr(self, name)] + getattr(hparam_proto.hparam[name], kind).value.extend(v_list) + else: + v = getattr(self, name) + if kind.startswith('bytes'): + v = compat.as_bytes(getattr(self, name)) + setattr(hparam_proto.hparam[name], kind, v) + + return hparam_proto + + @staticmethod + def from_proto(hparam_def, import_scope=None): # pylint: disable=unused-argument + return HParams(hparam_def=hparam_def) + diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/misc_utils.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/misc_utils.py new file mode 100644 index 000000000..3d33b9b29 --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/misc_utils.py @@ -0,0 +1,182 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Generally useful utility functions.""" +from __future__ import print_function + +import codecs +import collections +import json +import math +import os +import sys +import time +from distutils import version + +import numpy as np +import tensorflow as tf + +import hparam + +def check_tensorflow_version(): + min_tf_version = "1.4.0-dev20171024" + if (version.LooseVersion(tf.__version__) < + version.LooseVersion(min_tf_version)): + raise EnvironmentError("Tensorflow version must >= %s" % min_tf_version) + + +def safe_exp(value): + """Exponentiation with catching of overflow error.""" + try: + ans = math.exp(value) + except OverflowError: + ans = float("inf") + return ans + + +def print_time(s, start_time): + """Take a start time, print elapsed duration, and return a new time.""" + print("%s, time %ds, %s." % (s, (time.time() - start_time), time.ctime())) + sys.stdout.flush() + return time.time() + + +def print_out(s, f=None, new_line=True): + """Similar to print but with support to flush and output to a file.""" + if isinstance(s, bytes): + s = s.decode("utf-8") + + if f: + f.write(s.encode("utf-8")) + if new_line: + f.write(b"\n") + + # stdout + out_s = s.encode("utf-8") + if not isinstance(out_s, str): + out_s = out_s.decode("utf-8") + print(out_s, end="", file=sys.stdout) + + if new_line: + sys.stdout.write("\n") + sys.stdout.flush() + + +def print_hparams(hparams, skip_patterns=None, header=None): + """Print hparams, can skip keys based on pattern.""" + if header: print_out("%s" % header) + values = hparams.values() + for key in sorted(values.keys()): + if not skip_patterns or all( + [skip_pattern not in key for skip_pattern in skip_patterns]): + print_out(" %s=%s" % (key, str(values[key]))) + + +def load_hparams(model_dir): + """Load hparams from an existing model directory.""" + hparams_file = os.path.join(model_dir, "hparams") + if tf.io.gfile.exists(hparams_file): + print_out("# Loading hparams from %s" % hparams_file) + with codecs.getreader("utf-8")(tf.io.gfile.GFile(hparams_file, "rb")) as f: + try: + hparams_values = json.load(f) + hparams = hparams.HParams(**hparams_values) + except ValueError: + print_out(" can't load hparams file") + return None + return hparams + else: + return None + + +def maybe_parse_standard_hparams(hparams, hparams_path): + """Override hparams values with existing standard hparams config.""" + if hparams_path and tf.io.gfile.exists(hparams_path): + print_out("# Loading standard hparams from %s" % hparams_path) + with codecs.getreader("utf-8")(tf.io.gfile.GFile(hparams_path, "rb")) as f: + hparams.parse_json(f.read()) + return hparams + + +def save_hparams(out_dir, hparams): + """Save hparams.""" + hparams_file = os.path.join(out_dir, "hparams") + print_out(" saving hparams to %s" % hparams_file) + with codecs.getwriter("utf-8")(tf.io.gfile.GFile(hparams_file, "wb")) as f: + f.write(hparams.to_json(indent=4, sort_keys=True)) + + +def debug_tensor(s, msg=None, summarize=10): + """Print the shape and value of a tensor at test time. Return a new tensor.""" + if not msg: + msg = s.name + return tf.compat.v1.Print(s, [tf.shape(input=s), s], msg + " ", summarize=summarize) + + +def add_summary(summary_writer, global_step, tag, value): + """Add a new summary to the current summary_writer. + Useful to log things that are not part of the training graph, e.g., tag=BLEU. + """ + summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)]) + summary_writer.add_summary(summary, global_step) + + +def get_config_proto(log_device_placement=False, allow_soft_placement=True, + num_intra_threads=0, num_inter_threads=0): + # GPU options: + # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html + config_proto = tf.compat.v1.ConfigProto( + log_device_placement=log_device_placement, + allow_soft_placement=allow_soft_placement) + config_proto.gpu_options.allow_growth = True + + # CPU threads options + if num_intra_threads: + config_proto.intra_op_parallelism_threads = num_intra_threads + if num_inter_threads: + config_proto.inter_op_parallelism_threads = num_inter_threads + + return config_proto + + +def format_text(words): + """Convert a sequence words into sentence.""" + if (not hasattr(words, "__len__") and # for numpy array + not isinstance(words, collections.Iterable)): + words = [words] + return b" ".join(words) + + +def format_bpe_text(symbols, delimiter=b"@@"): + """Convert a sequence of bpe words into sentence.""" + words = [] + word = b"" + if isinstance(symbols, str): + symbols = symbols.encode() + delimiter_len = len(delimiter) + for symbol in symbols: + if len(symbol) >= delimiter_len and symbol[-delimiter_len:] == delimiter: + word += symbol[:-delimiter_len] + else: # end of a word + word += symbol + words.append(word) + word = b"" + return b" ".join(words) + + +def format_spm_text(symbols): + """Decode a text in SPM (https://github.com/google/sentencepiece) format.""" + return u"".join(format_text(symbols).decode("utf-8").split()).replace( + u"\u2581", u" ").strip().encode("utf-8") diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/nmt_utils.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/nmt_utils.py new file mode 100644 index 000000000..543110e7f --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/nmt_utils.py @@ -0,0 +1,111 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utility functions specifically for NMT.""" +from __future__ import print_function + +import codecs +import time +import numpy as np +import tensorflow as tf +import logging as log +import evaluation_utils +import misc_utils as utils + + +__all__ = ["decode_and_evaluate", "get_translation"] + + +def decode_and_evaluate(mode, sess, out_tensor, trans_file, ref_file, + metric='bleu', beam_width=10, + num_translations_per_input=1, iterations=1): + """Decode a test set and compute a score according to the evaluation task.""" + utils.print_out(" Decoding to output %s" % trans_file) + + with codecs.getwriter("utf-8")( + tf.io.gfile.GFile(trans_file, mode="wb")) as trans_f: + trans_f.write("") # Write empty string to ensure file is created. + num_translations_per_input = min(num_translations_per_input, beam_width) + + print(" Running inference with beam_width %g, num translations per input %d. " \ + % (beam_width, num_translations_per_input)) + print(" Total iterations count %d." % iterations) + + # Warmup for the first batch to take out the very first runtime + # session overhead. + nmt_outputs = sess.run(out_tensor) # time x batch_size x beam_width + nmt_outputs = nmt_outputs.transpose() # beam_width x batch_size x time + batch_size = nmt_outputs.shape[1] + for sent_id in range(batch_size): + translation = get_translation(nmt_outputs[0], sent_id, + tgt_eos='') + if mode == 'accuracy': + trans_f.write((translation + b"\n").decode("utf-8")) + + # prediction time is the time for the model prediction only + # overall time is the time for data pre-processing and data post-processing + prediction_times = list() + overall_start = time.time() + num_sentences = 0 + n = 0 + while n < iterations: + n += 1 + while True: + try: + start = time.time() + nmt_outputs = sess.run(out_tensor) # time x batch_size x beam_width + nmt_outputs = nmt_outputs.transpose() # beam_width x batch_size x time + prediction_times.append(time.time() - start) + batch_size = nmt_outputs.shape[1] + num_sentences += batch_size + for sent_id in range(batch_size): + for beam_id in range(num_translations_per_input): + translation = get_translation(nmt_outputs[beam_id], sent_id, + tgt_eos='') + if mode == 'accuracy': + trans_f.write((translation + b"\n").decode("utf-8")) + + except tf.errors.OutOfRangeError: + utils.print_time( + " Done, num sentences %d, num translations per input %d" % + (num_sentences, num_translations_per_input), overall_start) + break + + overall_time = (time.time() - overall_start) + print("\nAverage Prediction Latency: {:.5f} sec per batch.".format( + sum(prediction_times)/float(len(prediction_times)))) + print("Overall Latency: {:.5f} sec for the entire test " + "dataset.".format(overall_time/float(iterations))) + print("Overall Throughput : {:.3f} sentences per sec.".format( + num_sentences/float(overall_time))) + + # Evaluation + if mode == 'accuracy': + if ref_file and tf.io.gfile.exists(trans_file): + score = evaluation_utils.evaluate(ref_file, trans_file, metric) + utils.print_out(" Accuracy metric %s: %.1f" % (metric, score)) + + +def get_translation(nmt_outputs, sent_id, tgt_eos): + """Given batch decoding outputs, select a sentence and turn to text.""" + if tgt_eos: tgt_eos = tgt_eos.encode("utf-8") + # Select a sentence + output = nmt_outputs[sent_id, :].tolist() + + # If there is an eos symbol in outputs, cut them at that point. + if tgt_eos and tgt_eos in output: + output = output[:output.index(tgt_eos)] + translation = utils.format_text(output) + + return translation diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/rouge.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/rouge.py new file mode 100644 index 000000000..e0269b9c1 --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/rouge.py @@ -0,0 +1,352 @@ +"""ROUGE metric implementation. + +Copy from tf_seq2seq/seq2seq/metrics/rouge.py. +This is a modified and slightly extended verison of +https://github.com/miso-belica/sumy/blob/dev/sumy/evaluation/rouge.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import itertools +import numpy as np + +#pylint: disable=C0103 + + +def _get_ngrams(n, text): + """Calcualtes n-grams. + + Args: + n: which n-grams to calculate + text: An array of tokens + + Returns: + A set of n-grams + """ + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + +def _split_into_words(sentences): + """Splits multiple sentences into words and flattens the result""" + return list(itertools.chain(*[_.split(" ") for _ in sentences])) + + +def _get_word_ngrams(n, sentences): + """Calculates word n-grams for multiple sentences. + """ + assert len(sentences) > 0 + assert n > 0 + + words = _split_into_words(sentences) + return _get_ngrams(n, words) + + +def _len_lcs(x, y): + """ + Returns the length of the Longest Common Subsequence between sequences x + and y. + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns + integer: Length of LCS between x and y + """ + table = _lcs(x, y) + n, m = len(x), len(y) + return table[n, m] + + +def _lcs(x, y): + """ + Computes the length of the longest common subsequence (lcs) between two + strings. The implementation below uses a DP programming algorithm and runs + in O(nm) time where n = len(x) and m = len(y). + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: collection of words + y: collection of words + + Returns: + Table of dictionary of coord and len lcs + """ + n, m = len(x), len(y) + table = dict() + for i in range(n + 1): + for j in range(m + 1): + if i == 0 or j == 0: + table[i, j] = 0 + elif x[i - 1] == y[j - 1]: + table[i, j] = table[i - 1, j - 1] + 1 + else: + table[i, j] = max(table[i - 1, j], table[i, j - 1]) + return table + + +def _recon_lcs(x, y): + """ + Returns the Longest Subsequence between x and y. + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns: + sequence: LCS of x and y + """ + i, j = len(x), len(y) + table = _lcs(x, y) + + def _recon(i, j): + """private recon calculation""" + if i == 0 or j == 0: + return [] + elif x[i - 1] == y[j - 1]: + return _recon(i - 1, j - 1) + [(x[i - 1], i)] + elif table[i - 1, j] > table[i, j - 1]: + return _recon(i - 1, j) + else: + return _recon(i, j - 1) + + recon_tuple = tuple(map(lambda x: x[0], _recon(i, j))) + return recon_tuple + + +def rouge_n(evaluated_sentences, reference_sentences, n=2): + """ + Computes ROUGE-N of two text collections of sentences. + Sourece: http://research.microsoft.com/en-us/um/people/cyl/download/ + papers/rouge-working-note-v1.3.1.pdf + + Args: + evaluated_sentences: The sentences that have been picked by the summarizer + reference_sentences: The sentences from the referene set + n: Size of ngram. Defaults to 2. + + Returns: + A tuple (f1, precision, recall) for ROUGE-N + + Raises: + ValueError: raises exception if a param has len <= 0 + """ + if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: + raise ValueError("Collections must contain at least 1 sentence.") + + evaluated_ngrams = _get_word_ngrams(n, evaluated_sentences) + reference_ngrams = _get_word_ngrams(n, reference_sentences) + reference_count = len(reference_ngrams) + evaluated_count = len(evaluated_ngrams) + + # Gets the overlapping ngrams between evaluated and reference + overlapping_ngrams = evaluated_ngrams.intersection(reference_ngrams) + overlapping_count = len(overlapping_ngrams) + + # Handle edge case. This isn't mathematically correct, but it's good enough + if evaluated_count == 0: + precision = 0.0 + else: + precision = overlapping_count / evaluated_count + + if reference_count == 0: + recall = 0.0 + else: + recall = overlapping_count / reference_count + + f1_score = 2.0 * ((precision * recall) / (precision + recall + 1e-8)) + + # return overlapping_count / reference_count + return f1_score, precision, recall + + +def _f_p_r_lcs(llcs, m, n): + """ + Computes the LCS-based F-measure score + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Args: + llcs: Length of LCS + m: number of words in reference summary + n: number of words in candidate summary + + Returns: + Float. LCS-based F-measure score + """ + r_lcs = llcs / m + p_lcs = llcs / n + beta = p_lcs / (r_lcs + 1e-12) + num = (1 + (beta**2)) * r_lcs * p_lcs + denom = r_lcs + ((beta**2) * p_lcs) + f_lcs = num / (denom + 1e-12) + return f_lcs, p_lcs, r_lcs + + +def rouge_l_sentence_level(evaluated_sentences, reference_sentences): + """ + Computes ROUGE-L (sentence level) of two text collections of sentences. + http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Calculated according to: + R_lcs = LCS(X,Y)/m + P_lcs = LCS(X,Y)/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + X = reference summary + Y = Candidate summary + m = length of reference summary + n = length of candidate summary + + Args: + evaluated_sentences: The sentences that have been picked by the summarizer + reference_sentences: The sentences from the referene set + + Returns: + A float: F_lcs + + Raises: + ValueError: raises exception if a param has len <= 0 + """ + if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: + raise ValueError("Collections must contain at least 1 sentence.") + reference_words = _split_into_words(reference_sentences) + evaluated_words = _split_into_words(evaluated_sentences) + m = len(reference_words) + n = len(evaluated_words) + lcs = _len_lcs(evaluated_words, reference_words) + return _f_p_r_lcs(lcs, m, n) + + +def _union_lcs(evaluated_sentences, reference_sentence): + """ + Returns LCS_u(r_i, C) which is the LCS score of the union longest common + subsequence between reference sentence ri and candidate summary C. For example + if r_i= w1 w2 w3 w4 w5, and C contains two sentences: c1 = w1 w2 w6 w7 w8 and + c2 = w1 w3 w8 w9 w5, then the longest common subsequence of r_i and c1 is + "w1 w2" and the longest common subsequence of r_i and c2 is "w1 w3 w5". The + union longest common subsequence of r_i, c1, and c2 is "w1 w2 w3 w5" and + LCS_u(r_i, C) = 4/5. + + Args: + evaluated_sentences: The sentences that have been picked by the summarizer + reference_sentence: One of the sentences in the reference summaries + + Returns: + float: LCS_u(r_i, C) + + ValueError: + Raises exception if a param has len <= 0 + """ + if len(evaluated_sentences) <= 0: + raise ValueError("Collections must contain at least 1 sentence.") + + lcs_union = set() + reference_words = _split_into_words([reference_sentence]) + combined_lcs_length = 0 + for eval_s in evaluated_sentences: + evaluated_words = _split_into_words([eval_s]) + lcs = set(_recon_lcs(reference_words, evaluated_words)) + combined_lcs_length += len(lcs) + lcs_union = lcs_union.union(lcs) + + union_lcs_count = len(lcs_union) + union_lcs_value = union_lcs_count / combined_lcs_length + return union_lcs_value + + +def rouge_l_summary_level(evaluated_sentences, reference_sentences): + """ + Computes ROUGE-L (summary level) of two text collections of sentences. + http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Calculated according to: + R_lcs = SUM(1, u)[LCS(r_i,C)]/m + P_lcs = SUM(1, u)[LCS(r_i,C)]/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + SUM(i,u) = SUM from i through u + u = number of sentences in reference summary + C = Candidate summary made up of v sentences + m = number of words in reference summary + n = number of words in candidate summary + + Args: + evaluated_sentences: The sentences that have been picked by the summarizer + reference_sentence: One of the sentences in the reference summaries + + Returns: + A float: F_lcs + + Raises: + ValueError: raises exception if a param has len <= 0 + """ + if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: + raise ValueError("Collections must contain at least 1 sentence.") + + # total number of words in reference sentences + m = len(_split_into_words(reference_sentences)) + + # total number of words in evaluated sentences + n = len(_split_into_words(evaluated_sentences)) + + union_lcs_sum_across_all_references = 0 + for ref_s in reference_sentences: + union_lcs_sum_across_all_references += _union_lcs(evaluated_sentences, + ref_s) + return _f_p_r_lcs(union_lcs_sum_across_all_references, m, n) + + +def rouge(hypotheses, references): + """Calculates average rouge scores for a list of hypotheses and + references""" + + # Filter out hyps that are of 0 length + # hyps_and_refs = zip(hypotheses, references) + # hyps_and_refs = [_ for _ in hyps_and_refs if len(_[0]) > 0] + # hypotheses, references = zip(*hyps_and_refs) + + # Calculate ROUGE-1 F1, precision, recall scores + rouge_1 = [ + rouge_n([hyp], [ref], 1) for hyp, ref in zip(hypotheses, references) + ] + rouge_1_f, rouge_1_p, rouge_1_r = map(np.mean, zip(*rouge_1)) + + # Calculate ROUGE-2 F1, precision, recall scores + rouge_2 = [ + rouge_n([hyp], [ref], 2) for hyp, ref in zip(hypotheses, references) + ] + rouge_2_f, rouge_2_p, rouge_2_r = map(np.mean, zip(*rouge_2)) + + # Calculate ROUGE-L F1, precision, recall scores + rouge_l = [ + rouge_l_sentence_level([hyp], [ref]) + for hyp, ref in zip(hypotheses, references) + ] + rouge_l_f, rouge_l_p, rouge_l_r = map(np.mean, zip(*rouge_l)) + + return { + "rouge_1/f_score": rouge_1_f, + "rouge_1/r_score": rouge_1_r, + "rouge_1/p_score": rouge_1_p, + "rouge_2/f_score": rouge_2_f, + "rouge_2/r_score": rouge_2_r, + "rouge_2/p_score": rouge_2_p, + "rouge_l/f_score": rouge_l_f, + "rouge_l/r_score": rouge_l_r, + "rouge_l/p_score": rouge_l_p, + } diff --git a/models/language_translation/tensorflow/mlperf_gnmt/fp32/run_inference.py b/models/language_translation/tensorflow/mlperf_gnmt/fp32/run_inference.py new file mode 100644 index 000000000..051d24c6b --- /dev/null +++ b/models/language_translation/tensorflow/mlperf_gnmt/fp32/run_inference.py @@ -0,0 +1,120 @@ +import codecs +import argparse +import os +import tensorflow as tf +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.framework import importer + +import misc_utils as utils +from nmt_utils import decode_and_evaluate + +from tensorflow_addons import seq2seq + +parser = argparse.ArgumentParser() +parser.add_argument("--in_graph", type=str, required=True, + help="Specify the frozen inference graph in pb format.") +parser.add_argument("--batch_size", type=int, default=32, + help="Specify inference batch size.") +parser.add_argument("--num_inter_threads", type=int, default=0, + help="Specify number of inter-op threads.") +parser.add_argument("--num_intra_threads", type=int, default=0, + help="Specify number of intra-op threads.") +parser.add_argument("--src_vocab_file", type=str, required=True, + help="Specify source vocab file.") +parser.add_argument("--tgt_vocab_file", type=str, required=True, + help="Specify target vocabulary file.") +parser.add_argument("--inference_input_file", type=str, required=True, + help="Specify input file to be translated.") +parser.add_argument("--inference_output_file", type=str, default=None, + help="Specify output file for resulting translation.") +parser.add_argument("--inference_ref_file", type=str, required=True, + help="Specify reference output file.") +parser.add_argument("--run", type=str, default="accuracy", + help="Specify either 'accuracy' for BLEU metric or " + "'performance' for latency and throughput.") +args = parser.parse_args() + +out_dir = os.path.join(os.getcwd(), 'output') +tf.io.gfile.makedirs(out_dir) + +if args.inference_output_file: + inference_output_file = args.inference_output_file +else: + inference_output_file = os.path.join(out_dir, 'gnmt-out') + +def read_source_sentences(inference_input_file): + """Load inference data.""" + with codecs.getreader("utf-8")( + tf.io.gfile.GFile(inference_input_file, mode="rb")) as f: + inference_data = f.read().splitlines() + return inference_data + +def create_new_vocab_file(vocab_file): + """Creates a new vocabulary file prepending three new tokens: + (1) for unknown tag, (2) for start of sentence tag, and (3) for end of + sentence tag.""" + vocab = [] + with codecs.getreader("utf-8")(tf.io.gfile.GFile(vocab_file, "rb")) as f: + vocab_size = 0 + for word in f: + vocab_size += 1 + vocab.append(word.strip()) + + if tf.io.gfile.exists(vocab_file): + utils.print_out("# Vocab file %s exists" % vocab_file) + assert len(vocab) >= 3 + (unk, sos, eos) = ("", "", "") + if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: + utils.print_out("The first 3 vocab words [%s, %s, %s]" + " are not [%s, %s, %s]" % + (vocab[0], vocab[1], vocab[2], unk, sos, eos)) + vocab = [unk, sos, eos] + vocab + vocab_size += 3 + new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file)) + with codecs.getwriter("utf-8")( + tf.io.gfile.GFile(new_vocab_file, "wb")) as f: + for word in vocab: + f.write("%s\n" % word) + vocab_file = new_vocab_file + else: + raise ValueError("vocab_file '%s' does not exist." % vocab_file) + return vocab_file + +if __name__ == "__main__": + graph_def = graph_pb2.GraphDef() + with tf.io.gfile.GFile(args.in_graph, "rb") as f: + data = f.read() + graph_def.ParseFromString(data) + graph = tf.Graph() + with graph.as_default(): + importer.import_graph_def(graph_def, input_map={}, name="") + # Get input and output and tensors/ops for inference. + src_vocab_placeholder = graph.get_tensor_by_name('source_vocab_file:0') + tgt_vocab_placeholder = graph.get_tensor_by_name('target_vocab_file:0') + src_data_placeholder = graph.get_tensor_by_name('source_data:0') + batch_size_placeholder = graph.get_tensor_by_name('batch_size:0') + + tables_initializer = graph.get_operation_by_name('init_all_tables') + iterator_initilizer = graph.get_operation_by_name('MakeIterator') + sample_words_tensor = graph.get_tensor_by_name('hash_table_Lookup_1/LookupTableFindV2:0') + + # Create a session with imported graph. + config_proto = tf.compat.v1.ConfigProto(allow_soft_placement=True, + intra_op_parallelism_threads = args.num_intra_threads, + inter_op_parallelism_threads = args.num_inter_threads) + sess = tf.compat.v1.Session(graph=graph, config=config_proto) + + # Read source data. + src_data = read_source_sentences(args.inference_input_file) + + # Initialize vocabulary tables and source data iterator. + sess.run(tables_initializer, feed_dict={ + src_vocab_placeholder: create_new_vocab_file(args.src_vocab_file), + tgt_vocab_placeholder: create_new_vocab_file(args.tgt_vocab_file)}) + sess.run(iterator_initilizer, feed_dict={ + src_data_placeholder: src_data, + batch_size_placeholder: args.batch_size}) + + # Decode + decode_and_evaluate(args.run, sess, sample_words_tensor, inference_output_file, + args.inference_ref_file) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/compute_bleu.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/compute_bleu.py new file mode 100644 index 000000000..bc1f52252 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/compute_bleu.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to compute official BLEU score. + +Source: +https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re +import sys +import unicodedata + +# pylint: disable=g-bad-import-order +import six +from absl import app as absl_app +from absl import flags +import tensorflow as tf +# pylint: enable=g-bad-import-order + +from utils import metrics +from utils import tokenizer +from official.utils.flags import core as flags_core + + +class UnicodeRegex(object): + """Ad-hoc hack to recognize all punctuation and symbols.""" + + def __init__(self): + punctuation = self.property_chars("P") + self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])") + self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])") + self.symbol_re = re.compile("([" + self.property_chars("S") + "])") + + def property_chars(self, prefix): + return "".join(six.unichr(x) for x in range(sys.maxunicode) + if unicodedata.category(six.unichr(x)).startswith(prefix)) + + +uregex = UnicodeRegex() + + +def bleu_tokenize(string): + r"""Tokenize a string following the official BLEU implementation. + + See https://github.com/moses-smt/mosesdecoder/' + 'blob/master/scripts/generic/mteval-v14.pl#L954-L983 + In our case, the input string is expected to be just one line + and no HTML entities de-escaping is needed. + So we just tokenize on punctuation and symbols, + except when a punctuation is preceded and followed by a digit + (e.g. a comma/dot as a thousand/decimal separator). + + Note that a numer (e.g. a year) followed by a dot at the end of sentence + is NOT tokenized, + i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g` + does not match this case (unless we add a space after each sentence). + However, this error is already in the original mteval-v14.pl + and we want to be consistent with it. + + Args: + string: the input string + + Returns: + a list of tokens + """ + string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string) + string = uregex.punct_nondigit_re.sub(r" \1 \2", string) + string = uregex.symbol_re.sub(r" \1 ", string) + return string.split() + + +def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): + """Compute BLEU for two files (reference and hypothesis translation).""" + ref_lines = tokenizer.native_to_unicode( + tf.io.gfile.GFile(ref_filename).read()).strip().splitlines() + hyp_lines = tokenizer.native_to_unicode( + tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines() + + if len(ref_lines) != len(hyp_lines): + raise ValueError("Reference and translation files have different number of " + "lines. If training only a few steps (100-200), the " + "translation may be empty.") + if not case_sensitive: + ref_lines = [x.lower() for x in ref_lines] + hyp_lines = [x.lower() for x in hyp_lines] + ref_tokens = [bleu_tokenize(x) for x in ref_lines] + hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] + return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100 + + +def main(unused_argv): + if FLAGS.bleu_variant in ("both", "uncased"): + score = bleu_wrapper(FLAGS.reference, FLAGS.translation, False) + tf.compat.v1.logging.info("Case-insensitive results: %f" % score) + + if FLAGS.bleu_variant in ("both", "cased"): + score = bleu_wrapper(FLAGS.reference, FLAGS.translation, True) + tf.compat.v1.logging.info("Case-sensitive results: %f" % score) + + +def define_compute_bleu_flags(): + """Add flags for computing BLEU score.""" + flags.DEFINE_string( + name="translation", default=None, + help=flags_core.help_wrap("File containing translated text.")) + flags.mark_flag_as_required("translation") + + flags.DEFINE_string( + name="reference", default=None, + help=flags_core.help_wrap("File containing reference translation.")) + flags.mark_flag_as_required("reference") + + flags.DEFINE_enum( + name="bleu_variant", short_name="bv", default="both", + enum_values=["both", "uncased", "cased"], case_sensitive=False, + help=flags_core.help_wrap( + "Specify one or more BLEU variants to calculate. Variants: \"cased\"" + ", \"uncased\", or \"both\".")) + + +if __name__ == "__main__": + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + define_compute_bleu_flags() + FLAGS = flags.FLAGS + absl_app.run(main) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py new file mode 100644 index 000000000..fa680046f --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py @@ -0,0 +1,232 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import tensorflow as tf +from tensorflow.python.framework import ops +from google.protobuf import text_format + +from tensorflow.core.framework import graph_pb2 +import numpy as np +from utils import tokenizer +from utils.tokenizer import Subtokenizer + +from tensorflow.python.platform import flags as flags_lib +from tensorflow.python.platform import app +import time +import pandas as pd +from timeit import default_timer as timer + +flags = flags_lib +FLAGS = flags.FLAGS + +flags.DEFINE_string("in_graph", 'fp32_graphdef.pb', + """TensorFlow 'GraphDef' file for FP32 to load.""") +flags.DEFINE_bool("input_binary", True, + """Whether the input files are in binary format.""") +flags.DEFINE_string( + "vocab_file", "vocab.ende.32768", + "Path to subtoken vocabulary file.") +flags.DEFINE_string( + "file", "newstest2014.en", + """File saved to an output file.""") +flags.DEFINE_string( + "file_out", "translate.txt", + """If --file flag is specified, save translation to this file.""") +flags.DEFINE_integer("batch_size", 64, + """The validation batch size""") +flags.DEFINE_integer("num_inter", 1, + """Number of sentences to exclude from validation file.""") +flags.DEFINE_integer("num_intra", 40, + """Number of sentences to exclude from validation file.""") +flags.DEFINE_integer("num_batches", 0, + """Number of batches of sentences to run inference for""") +flags.DEFINE_bool("sort_input_sentences", None, + """Sort the sequence of sentences in validation file. + Sorting improves batch processing time""") + +def input_generator_ts(): + """Read and sort lines based on token count from the file + sorted by decreasing length based on token sorting. + Args: + filename: String name of file to read inputs from. + Returns: + Sorted list of inputs, and dictionary mapping original index->sorted index + of each element. + """ + with tf.io.gfile.GFile(FLAGS.file) as f: + records = f.read().split("\n") + inputs = [record.strip() for record in records] + if not inputs[-1]: + inputs.pop() + + subtokenizer = Subtokenizer(FLAGS.vocab_file) + + batch = [] + token_lens=[] + for i, line in enumerate(inputs): + enc = subtokenizer.encode(line, add_eos=True) + token_lens.append((i, len(enc))) + + sorted_by_token_input_lens = sorted(token_lens, key=lambda x: x[1], reverse=True) + + #print('sorted_by_token_input_lens:{}'.format(sorted_by_token_input_lens)) + + sorted_inputs = [None] * len(sorted_by_token_input_lens) + sorted_keys = [0] * len(sorted_by_token_input_lens) + + for i, (index, _) in enumerate(sorted_by_token_input_lens): + sorted_inputs[i] = inputs[index] + sorted_keys[index] = i + enc=subtokenizer.encode(sorted_inputs[i], add_eos=True) + batch.append(enc) + + return batch,sorted_keys + +def input_generator_ws(): + """Read and sort lines from the file sorted by decreasing length based on word counts. + Args: + filename: String name of file to read inputs from. + Returns: + Sorted list of inputs, and dictionary mapping original index->sorted index + of each element. + """ + with tf.io.gfile.GFile(FLAGS.file) as f: + records = f.read().split("\n") + inputs = [record.strip() for record in records] + if not inputs[-1]: + inputs.pop() + + batch = [] + + subtokenizer = Subtokenizer(FLAGS.vocab_file) + + input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)] + sorted_input_lens = sorted(input_lens, key=lambda x: x[1], reverse=True) + + sorted_inputs = [None] * len(sorted_input_lens) + sorted_keys = [0] * len(sorted_input_lens) + for i, (index, _) in enumerate(sorted_input_lens): + sorted_inputs[i] = inputs[index] + sorted_keys[index] = i + enc=subtokenizer.encode(sorted_inputs[i], add_eos=True) + batch.append(enc) + return batch,sorted_keys + +def _trim_and_decode(ids): + """Trim EOS and PAD tokens from ids, and decode to return a string.""" + subtokenizer = Subtokenizer(FLAGS.vocab_file) + try: + index = list(ids).index(tokenizer.EOS_ID) + return subtokenizer.decode(ids[:index]) + except ValueError: # No EOS found in sequence + return subtokenizer.decode(ids) + +def main(unused_args): + + graph_def = graph_pb2.GraphDef() + graph_file=FLAGS.in_graph + + + start=timer() + with open(graph_file, "rb") as f: + if FLAGS.input_binary: + graph_def.ParseFromString(f.read()) + else: + text_format.Merge(f.read(), graph_def) + end=timer() + graph_parse_time = end-start + print("Graph parsed in %f s" % (end-start)) + + start=timer() + with tf.Graph().as_default() as graph: + y = tf.import_graph_def(graph_def,return_elements=["model/Transformer/strided_slice_19:0"], name='') + end=timer() + print("import_graph_def took %fs" % (end-start)) + + start=timer() + batches,sorted_keys = input_generator_ts() + end=timer() + sort_time = end-start + print("tokenizer took %f s" % (sort_time)) + + DATASET_SIZE=len(batches) + print("Translating {} sentences from English to German.".format(DATASET_SIZE)) + + session_config = tf.compat.v1.ConfigProto( + inter_op_parallelism_threads=FLAGS.num_inter, + intra_op_parallelism_threads=FLAGS.num_intra) + + with tf.compat.v1.Session(config=session_config, graph=graph) as sess: + + run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + run_metadata = tf.compat.v1.RunMetadata() + + translations = [] + + batch = [] + batch_num=0 + line_count=0 + + total_start_time = time.time() + inference_time = 0.0 + + for i, line in enumerate(batches): + batch.append(line) + duration = 0.0 + if i % FLAGS.batch_size == 0: + line_count += 1 + batch_num = (i // FLAGS.batch_size) + 1 + start_time = time.time() + dec_tensor = sess.run(y, feed_dict={'input_tensor:0': pd.DataFrame(batch).fillna(0).values.astype(np.int32)}) + duration = time.time() - start_time + translations.append(dec_tensor) + #print('Batch inferencing time:%s for batch size:%d and batch:%d' % (duration, FLAGS.batch_size, batch_num)) + batch = [] + elif i % (len(batches) - 1) == 0: + batch_num = (i // FLAGS.batch_size) + 1 + start_time = time.time() + dec_tensor = sess.run(y, feed_dict={'input_tensor:0': pd.DataFrame(batch).fillna(0).values.astype(np.int32)}) + duration = time.time() - start_time + translations.append(dec_tensor) + #print('Batch inferencing time:%s for batch size:%d and batch:%d' % (duration, FLAGS.batch_size, batch_num)) + batch = [] + inference_time += duration + + inference_time += graph_parse_time + inference_time += sort_time + print('Total inferencing time:%s' %(inference_time)) + print('Throughput:{} sentences/second'.format((DATASET_SIZE)/inference_time)) + + translation_count = 0 + + decoded_translations=[] + for i,tr in enumerate(translations): + for j,itr in enumerate(tr): + for k,otr in enumerate(itr): + translation_count += 1 + decoded_translations.append(_trim_and_decode(otr)) + + print('Total number of sentences translated:%d' % (translation_count)) + + with tf.io.gfile.GFile(FLAGS.file_out, "w") as f: + for i in sorted_keys: + f.write("%s\n" % decoded_translations[i]) + +if __name__ == "__main__": + app.run() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/__init__.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/README.md b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/README.md new file mode 100644 index 000000000..18160f780 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/README.md @@ -0,0 +1,97 @@ +# Adding Abseil (absl) flags quickstart +## Defining a flag +absl flag definitions are similar to argparse, although they are defined on a global namespace. + +For instance defining a string flag looks like: +```$xslt +from absl import flags +flags.DEFINE_string( + name="my_flag", + default="a_sensible_default", + help="Here is what this flag does." +) +``` + +All three arguments are required, but default may be `None`. A common optional argument is +short_name for defining abreviations. Certain `DEFINE_*` methods will have other required arguments. +For instance `DEFINE_enum` requires the `enum_values` argument to be specified. + +## Key Flags +absl has the concept of a key flag. Any flag defined in `__main__` is considered a key flag by +default. Key flags are displayed in `--help`, others only appear in `--helpfull`. In order to +handle key flags that are defined outside the module in question, absl provides the +`flags.adopt_module_key_flags()` method. This adds the key flags of a different module to one's own +key flags. For example: +```$xslt +File: flag_source.py +--------------------------------------- + +from absl import flags +flags.DEFINE_string(name="my_flag", default="abc", help="a flag.") +``` + +```$xslt +File: my_module.py +--------------------------------------- + +from absl import app as absl_app +from absl import flags + +import flag_source + +flags.adopt_module_key_flags(flag_source) + +def main(_): + pass + +absl_app.run(main, [__file__, "-h"] +``` + +when `my_module.py` is run it will show the help text for `my_flag`. Because not all flags defined +in a file are equally important, `official/utils/flags/core.py` (generally imported as flags_core) +provides an abstraction for handling key flag declaration in an easy way through the +`register_key_flags_in_core()` function, which allows a module to make a single +`adopt_key_flags(flags_core)` call when using the util flag declaration functions. + +## Validators +Often the constraints on a flag are complicated. absl provides the validator decorator to allow +one to mark a function as a flag validation function. Suppose we want users to provide a flag +which is a palindrome. + +```$xslt +from absl import flags + +flags.DEFINE_string(name="pal_flag", short_name="pf", default="", help="Give me a palindrome") + +@flags.validator("pal_flag") +def _check_pal(provided_pal_flag): + return provided_pal_flag == provided_pal_flag[::-1] + +``` + +Validators take the form that returning True (truthy) passes, and all others +(False, None, exception) fail. + +## Testing +To test using absl, simply declare flags in the setupClass method of TensorFlow's TestCase. + +```$xslt +from absl import flags +import tensorflow as tf + +def define_flags(): + flags.DEFINE_string(name="test_flag", default="abc", help="an example flag") + + +class BaseTester(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(BaseTester, cls).setUpClass() + define_flags() + + def test_trivial(self): + flags_core.parse_flags([__file__, "test_flag", "def"]) + self.AssertEqual(flags.FLAGS.test_flag, "def") + +``` diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/__init__.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_base.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_base.py new file mode 100644 index 000000000..b9007dc30 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_base.py @@ -0,0 +1,163 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flags which will be nearly universal across models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags +import tensorflow as tf + +from official.utils.flags._conventions import help_wrap +from official.utils.logs import hooks_helper + + +def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, + epochs_between_evals=False, stop_threshold=False, + batch_size=True, num_gpu=False, hooks=False, export_dir=False, + distribution_strategy=False, run_eagerly=False): + """Register base flags. + + Args: + data_dir: Create a flag for specifying the input data directory. + model_dir: Create a flag for specifying the model file directory. + clean: Create a flag for removing the model_dir. + train_epochs: Create a flag to specify the number of training epochs. + epochs_between_evals: Create a flag to specify the frequency of testing. + stop_threshold: Create a flag to specify a threshold accuracy or other + eval metric which should trigger the end of training. + batch_size: Create a flag to specify the batch size. + num_gpu: Create a flag to specify the number of GPUs used. + hooks: Create a flag to specify hooks for logging. + export_dir: Create a flag to specify where a SavedModel should be exported. + distribution_strategy: Create a flag to specify which Distribution Strategy + to use. + run_eagerly: Create a flag to specify to run eagerly op by op. + Returns: + A list of flags for core.py to marks as key flags. + """ + key_flags = [] + + if data_dir: + flags.DEFINE_string( + name="data_dir", short_name="dd", default="/tmp", + help=help_wrap("The location of the input data.")) + key_flags.append("data_dir") + + if model_dir: + flags.DEFINE_string( + name="model_dir", short_name="md", default="/tmp", + help=help_wrap("The location of the model checkpoint files.")) + key_flags.append("model_dir") + + if clean: + flags.DEFINE_boolean( + name="clean", default=False, + help=help_wrap("If set, model_dir will be removed if it exists.")) + key_flags.append("clean") + + if train_epochs: + flags.DEFINE_integer( + name="train_epochs", short_name="te", default=1, + help=help_wrap("The number of epochs used to train.")) + key_flags.append("train_epochs") + + if epochs_between_evals: + flags.DEFINE_integer( + name="epochs_between_evals", short_name="ebe", default=1, + help=help_wrap("The number of training epochs to run between " + "evaluations.")) + key_flags.append("epochs_between_evals") + + if stop_threshold: + flags.DEFINE_float( + name="stop_threshold", short_name="st", + default=None, + help=help_wrap("If passed, training will stop at the earlier of " + "train_epochs and when the evaluation metric is " + "greater than or equal to stop_threshold.")) + + if batch_size: + flags.DEFINE_integer( + name="batch_size", short_name="bs", default=32, + help=help_wrap("Batch size for training and evaluation. When using " + "multiple gpus, this is the global batch size for " + "all devices. For example, if the batch size is 32 " + "and there are 4 GPUs, each GPU will get 8 examples on " + "each step.")) + key_flags.append("batch_size") + + if num_gpu: + flags.DEFINE_integer( + name="num_gpus", short_name="ng", + default=1, + help=help_wrap( + "How many GPUs to use at each worker with the " + "DistributionStrategies API. The default is 1.")) + + if run_eagerly: + flags.DEFINE_boolean( + name="run_eagerly", default=False, + help="Run the model op by op without building a model function.") + + if hooks: + # Construct a pretty summary of hooks. + hook_list_str = ( + u"\ufeff Hook:\n" + u"\n".join([u"\ufeff {}".format(key) for key + in hooks_helper.HOOKS])) + flags.DEFINE_list( + name="hooks", short_name="hk", default="LoggingTensorHook", + help=help_wrap( + u"A list of (case insensitive) strings to specify the names of " + u"training hooks.\n{}\n\ufeff Example: `--hooks ProfilerHook," + u"ExamplesPerSecondHook`\n See official.utils.logs.hooks_helper " + u"for details.".format(hook_list_str)) + ) + key_flags.append("hooks") + + if export_dir: + flags.DEFINE_string( + name="export_dir", short_name="ed", default=None, + help=help_wrap("If set, a SavedModel serialization of the model will " + "be exported to this directory at the end of training. " + "See the README for more details and relevant links.") + ) + key_flags.append("export_dir") + + if distribution_strategy: + flags.DEFINE_string( + name="distribution_strategy", short_name="ds", default="mirrored", + help=help_wrap("The Distribution Strategy to use for training. " + "Accepted values are 'off', 'one_device', " + "'mirrored', 'parameter_server', 'collective', " + "case insensitive. 'off' means not to use " + "Distribution Strategy; 'default' means to choose " + "from `MirroredStrategy` or `OneDeviceStrategy` " + "according to the number of GPUs.") + ) + + + return key_flags + + +def get_num_gpus(flags_obj): + """Treat num_gpus=-1 as 'use all'.""" + if flags_obj.num_gpus != -1: + return flags_obj.num_gpus + + from tensorflow.python.client import device_lib # pylint: disable=g-import-not-at-top + local_device_protos = device_lib.list_local_devices() + return sum([1 for d in local_device_protos if d.device_type == "GPU"]) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_benchmark.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_benchmark.py new file mode 100644 index 000000000..2a20ed997 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_benchmark.py @@ -0,0 +1,105 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flags for benchmarking models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags + +from official.utils.flags._conventions import help_wrap + + +def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True): + """Register benchmarking flags. + + Args: + benchmark_log_dir: Create a flag to specify location for benchmark logging. + bigquery_uploader: Create flags for uploading results to BigQuery. + + Returns: + A list of flags for core.py to marks as key flags. + """ + + key_flags = [] + + flags.DEFINE_enum( + name="benchmark_logger_type", default="BaseBenchmarkLogger", + enum_values=["BaseBenchmarkLogger", "BenchmarkFileLogger", + "BenchmarkBigQueryLogger"], + help=help_wrap("The type of benchmark logger to use. Defaults to using " + "BaseBenchmarkLogger which logs to STDOUT. Different " + "loggers will require other flags to be able to work.")) + flags.DEFINE_string( + name="benchmark_test_id", short_name="bti", default=None, + help=help_wrap("The unique test ID of the benchmark run. It could be the " + "combination of key parameters. It is hardware " + "independent and could be used compare the performance " + "between different test runs. This flag is designed for " + "human consumption, and does not have any impact within " + "the system.")) + + flags.DEFINE_integer( + name='log_steps', default=100, + help='For every log_steps, we log the timing information such as ' + 'examples per second. Besides, for every log_steps, we store the ' + 'timestamp of a batch end.') + + if benchmark_log_dir: + flags.DEFINE_string( + name="benchmark_log_dir", short_name="bld", default=None, + help=help_wrap("The location of the benchmark logging.") + ) + + if bigquery_uploader: + flags.DEFINE_string( + name="gcp_project", short_name="gp", default=None, + help=help_wrap( + "The GCP project name where the benchmark will be uploaded.")) + + flags.DEFINE_string( + name="bigquery_data_set", short_name="bds", default="test_benchmark", + help=help_wrap( + "The Bigquery dataset name where the benchmark will be uploaded.")) + + flags.DEFINE_string( + name="bigquery_run_table", short_name="brt", default="benchmark_run", + help=help_wrap("The Bigquery table name where the benchmark run " + "information will be uploaded.")) + + flags.DEFINE_string( + name="bigquery_run_status_table", short_name="brst", + default="benchmark_run_status", + help=help_wrap("The Bigquery table name where the benchmark run " + "status information will be uploaded.")) + + flags.DEFINE_string( + name="bigquery_metric_table", short_name="bmt", + default="benchmark_metric", + help=help_wrap("The Bigquery table name where the benchmark metric " + "information will be uploaded.")) + + @flags.multi_flags_validator( + ["benchmark_logger_type", "benchmark_log_dir"], + message="--benchmark_logger_type=BenchmarkFileLogger will require " + "--benchmark_log_dir being set") + def _check_benchmark_log_dir(flags_dict): + benchmark_logger_type = flags_dict["benchmark_logger_type"] + if benchmark_logger_type == "BenchmarkFileLogger": + return flags_dict["benchmark_log_dir"] + return True + + return key_flags diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_conventions.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_conventions.py new file mode 100644 index 000000000..81ad21b0c --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_conventions.py @@ -0,0 +1,54 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Central location for shared argparse convention definitions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import codecs +import functools + +from absl import app as absl_app +from absl import flags + + +# This codifies help string conventions and makes it easy to update them if +# necessary. Currently the only major effect is that help bodies start on the +# line after flags are listed. All flag definitions should wrap the text bodies +# with help wrap when calling DEFINE_*. +_help_wrap = functools.partial(flags.text_wrap, length=80, indent="", + firstline_indent="\n") + + +# Pretty formatting causes issues when utf-8 is not installed on a system. +def _stdout_utf8(): + try: + codecs.lookup("utf-8") + except LookupError: + return False + return sys.stdout.encoding == "UTF-8" + + +if _stdout_utf8(): + help_wrap = _help_wrap +else: + def help_wrap(text, *args, **kwargs): + return _help_wrap(text, *args, **kwargs).replace(u"\ufeff", u"") + + +# Replace None with h to also allow -h +absl_app.HelpshortFlag.SHORT_NAME = "h" diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_device.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_device.py new file mode 100644 index 000000000..edaf2f9a1 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_device.py @@ -0,0 +1,85 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flags for managing compute devices. Currently only contains TPU flags.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags +import tensorflow as tf + +from official.utils.flags._conventions import help_wrap + + +def require_cloud_storage(flag_names): + """Register a validator to check directory flags. + Args: + flag_names: An iterable of strings containing the names of flags to be + checked. + """ + msg = "TPU requires GCS path for {}".format(", ".join(flag_names)) + @flags.multi_flags_validator(["tpu"] + flag_names, message=msg) + def _path_check(flag_values): # pylint: disable=missing-docstring + if flag_values["tpu"] is None: + return True + + valid_flags = True + for key in flag_names: + if not flag_values[key].startswith("gs://"): + tf.compat.v1.logging.error("{} must be a GCS path.".format(key)) + valid_flags = False + + return valid_flags + + +def define_device(tpu=True): + """Register device specific flags. + Args: + tpu: Create flags to specify TPU operation. + Returns: + A list of flags for core.py to marks as key flags. + """ + + key_flags = [] + + if tpu: + flags.DEFINE_string( + name="tpu", default=None, + help=help_wrap( + "The Cloud TPU to use for training. This should be either the name " + "used when creating the Cloud TPU, or a " + "grpc://ip.address.of.tpu:8470 url. Passing `local` will use the" + "CPU of the local instance instead. (Good for debugging.)")) + key_flags.append("tpu") + + flags.DEFINE_string( + name="tpu_zone", default=None, + help=help_wrap( + "[Optional] GCE zone where the Cloud TPU is located in. If not " + "specified, we will attempt to automatically detect the GCE " + "project from metadata.")) + + flags.DEFINE_string( + name="tpu_gcp_project", default=None, + help=help_wrap( + "[Optional] Project name for the Cloud TPU-enabled project. If not " + "specified, we will attempt to automatically detect the GCE " + "project from metadata.")) + + flags.DEFINE_integer(name="num_tpu_shards", default=8, + help=help_wrap("Number of shards (TPU chips).")) + + return key_flags diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_distribution.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_distribution.py new file mode 100644 index 000000000..ca331bf24 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_distribution.py @@ -0,0 +1,54 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flags related to distributed execution.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags +import tensorflow as tf + +from official.utils.flags._conventions import help_wrap + + +def define_distribution(worker_hosts=True, task_index=True): + """Register distributed execution flags. + + Args: + worker_hosts: Create a flag for specifying comma-separated list of workers. + task_index: Create a flag for specifying index of task. + + Returns: + A list of flags for core.py to marks as key flags. + """ + key_flags = [] + + if worker_hosts: + flags.DEFINE_string( + name='worker_hosts', default=None, + help=help_wrap( + 'Comma-separated list of worker ip:port pairs for running ' + 'multi-worker models with DistributionStrategy. The user would ' + 'start the program on each host with identical value for this ' + 'flag.')) + + if task_index: + flags.DEFINE_integer( + name='task_index', default=-1, + help=help_wrap('If multi-worker training, the task_index of this ' + 'worker.')) + + return key_flags diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_misc.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_misc.py new file mode 100644 index 000000000..c6fa24b5a --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_misc.py @@ -0,0 +1,50 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Misc flags.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import flags + +from official.utils.flags._conventions import help_wrap + + +def define_image(data_format=True): + """Register image specific flags. + + Args: + data_format: Create a flag to specify image axis convention. + + Returns: + A list of flags for core.py to marks as key flags. + """ + + key_flags = [] + + if data_format: + flags.DEFINE_enum( + name="data_format", short_name="df", default=None, + enum_values=["channels_first", "channels_last"], + help=help_wrap( + "A flag to override the data format used in the model. " + "channels_first provides a performance boost on GPU but is not " + "always compatible with CPU. If left unspecified, the data format " + "will be chosen automatically based on whether TensorFlow was " + "built for CPU or GPU.")) + key_flags.append("data_format") + + return key_flags diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_performance.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_performance.py new file mode 100644 index 000000000..45b1d5bd9 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/_performance.py @@ -0,0 +1,299 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Register flags for optimizing performance.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import multiprocessing + +from absl import flags # pylint: disable=g-bad-import-order +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.flags._conventions import help_wrap + + +# Map string to TensorFlow dtype +DTYPE_MAP = { + "fp16": tf.float16, + "bf16": tf.bfloat16, + "fp32": tf.float32, +} + + +def get_tf_dtype(flags_obj): + if getattr(flags_obj, "fp16_implementation", None) == "graph_rewrite": + # If the graph_rewrite is used, we build the graph with fp32, and let the + # graph rewrite change ops to fp16. + return tf.float32 + return DTYPE_MAP[flags_obj.dtype] + + +def get_loss_scale(flags_obj, default_for_fp16): + if flags_obj.loss_scale == "dynamic": + return flags_obj.loss_scale + elif flags_obj.loss_scale is not None: + return float(flags_obj.loss_scale) + elif flags_obj.dtype == "fp32": + return 1 # No loss scaling is needed for fp32 + else: + assert flags_obj.dtype == "fp16" + return default_for_fp16 + + +def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, + synthetic_data=False, max_train_steps=False, dtype=False, + all_reduce_alg=False, num_packs=False, + tf_gpu_thread_mode=False, + datasets_num_private_threads=False, + datasets_num_parallel_batches=False, + dynamic_loss_scale=False, fp16_implementation=False, + loss_scale=False, + tf_data_experimental_slack=False, enable_xla=False, + force_v2_in_keras_compile=False, + training_dataset_cache=False): + """Register flags for specifying performance tuning arguments. + + Args: + num_parallel_calls: Create a flag to specify parallelism of data loading. + inter_op: Create a flag to allow specification of inter op threads. + intra_op: Create a flag to allow specification of intra op threads. + synthetic_data: Create a flag to allow the use of synthetic data. + max_train_steps: Create a flags to allow specification of maximum number + of training steps + dtype: Create flags for specifying dtype. + all_reduce_alg: If set forces a specific algorithm for multi-gpu. + num_packs: If set provides number of packs for MirroredStrategy's cross + device ops. + tf_gpu_thread_mode: gpu_private triggers us of private thread pool. + datasets_num_private_threads: Number of private threads for datasets. + datasets_num_parallel_batches: Determines how many batches to process in + parallel when using map and batch from tf.data. + dynamic_loss_scale: Allow the "loss_scale" flag to take on the value + "dynamic". Only valid if `dtype` is True. + fp16_implementation: Create fp16_implementation flag. + loss_scale: Controls the loss scaling, normally for mixed-precision + training. Can only be turned on if dtype is also True. + tf_data_experimental_slack: Determines whether to enable tf.data's + `experimental_slack` option. + enable_xla: Determines if XLA (auto clustering) is turned on. + force_v2_in_keras_compile: Forces the use of run_distribued path even if not + using a `strategy`. This is not the same as + `tf.distribute.OneDeviceStrategy` + training_dataset_cache: Whether to cache the training dataset on workers. + Typically used to improve training performance when training data is in + remote storage and can fit into worker memory. + + Returns: + A list of flags for core.py to marks as key flags. + """ + + key_flags = [] + if num_parallel_calls: + flags.DEFINE_integer( + name="num_parallel_calls", short_name="npc", + default=multiprocessing.cpu_count(), + help=help_wrap("The number of records that are processed in parallel " + "during input processing. This can be optimized per " + "data set but for generally homogeneous data sets, " + "should be approximately the number of available CPU " + "cores. (default behavior)")) + + if inter_op: + flags.DEFINE_integer( + name="inter_op_parallelism_threads", short_name="inter", default=0, + help=help_wrap("Number of inter_op_parallelism_threads to use for CPU. " + "See TensorFlow config.proto for details.") + ) + + if intra_op: + flags.DEFINE_integer( + name="intra_op_parallelism_threads", short_name="intra", default=0, + help=help_wrap("Number of intra_op_parallelism_threads to use for CPU. " + "See TensorFlow config.proto for details.")) + + if synthetic_data: + flags.DEFINE_bool( + name="use_synthetic_data", short_name="synth", default=False, + help=help_wrap( + "If set, use fake data (zeroes) instead of a real dataset. " + "This mode is useful for performance debugging, as it removes " + "input processing steps, but will not learn anything.")) + + if max_train_steps: + flags.DEFINE_integer( + name="max_train_steps", short_name="mts", default=None, help=help_wrap( + "The model will stop training if the global_step reaches this " + "value. If not set, training will run until the specified number " + "of epochs have run as usual. It is generally recommended to set " + "--train_epochs=1 when using this flag." + )) + + if dtype: + flags.DEFINE_enum( + name="dtype", short_name="dt", default="fp32", + enum_values=DTYPE_MAP.keys(), + help=help_wrap("The TensorFlow datatype used for calculations. " + "Variables may be cast to a higher precision on a " + "case-by-case basis for numerical stability.")) + + loss_scale_help_text = ( + "The amount to scale the loss by when the model is run. {}. Before " + "gradients are computed, the loss is multiplied by the loss scale, " + "making all gradients loss_scale times larger. To adjust for this, " + "gradients are divided by the loss scale before being applied to " + "variables. This is mathematically equivalent to training without " + "a loss scale, but the loss scale helps avoid some intermediate " + "gradients from underflowing to zero. If not provided the default " + "for fp16 is 128 and 1 for all other dtypes.{}" + ) + if dynamic_loss_scale: + loss_scale_help_text = loss_scale_help_text.format( + "This can be an int/float or the string 'dynamic'", + " The string 'dynamic' can be used to dynamically determine the " + "optimal loss scale during training, but currently this " + "significantly slows down performance") + loss_scale_validation_msg = ("loss_scale should be a positive int/float " + "or the string 'dynamic'.") + else: + loss_scale_help_text = loss_scale_help_text.format( + "This must be an int/float", "") + loss_scale_validation_msg = "loss_scale should be a positive int/float." + if loss_scale: + flags.DEFINE_string( + name="loss_scale", short_name="ls", default=None, + help=help_wrap(loss_scale_help_text)) + + @flags.validator(flag_name="loss_scale", + message=loss_scale_validation_msg) + def _check_loss_scale(loss_scale): # pylint: disable=unused-variable + """Validator to check the loss scale flag is valid.""" + if loss_scale is None: + return True # null case is handled in get_loss_scale() + + if loss_scale == "dynamic" and dynamic_loss_scale: + return True + + try: + loss_scale = float(loss_scale) + except ValueError: + return False + + return loss_scale > 0 + + if fp16_implementation: + flags.DEFINE_enum( + name="fp16_implementation", default="keras", + enum_values=("keras', 'graph_rewrite"), + help=help_wrap( + "When --dtype=fp16, how fp16 should be implemented. This has no " + "impact on correctness. 'keras' uses the " + "tf.keras.mixed_precision API. 'graph_rewrite' uses the " + "tf.train.experimental.enable_mixed_precision_graph_rewrite " + "API.")) + + @flags.multi_flags_validator(["fp16_implementation", "dtype", + "loss_scale"]) + def _check_fp16_implementation(flags_dict): + """Validator to check fp16_implementation flag is valid.""" + if (flags_dict["fp16_implementation"] == "graph_rewrite" and + flags_dict["dtype"] != "fp16"): + raise flags.ValidationError("--fp16_implementation should not be " + "specified unless --dtype=fp16") + return True + + if all_reduce_alg: + flags.DEFINE_string( + name="all_reduce_alg", short_name="ara", default=None, + help=help_wrap("Defines the algorithm to use for performing all-reduce." + "When specified with MirroredStrategy for single " + "worker, this controls " + "tf.contrib.distribute.AllReduceCrossTowerOps. When " + "specified with MultiWorkerMirroredStrategy, this " + "controls " + "tf.distribute.experimental.CollectiveCommunication; " + "valid options are `ring` and `nccl`.")) + + if num_packs: + flags.DEFINE_integer( + name="num_packs", default=1, + help=help_wrap("Sets `num_packs` in the cross device ops used in " + "MirroredStrategy. For details, see " + "tf.distribute.NcclAllReduce.")) + + if tf_gpu_thread_mode: + flags.DEFINE_string( + name="tf_gpu_thread_mode", short_name="gt_mode", default=None, + help=help_wrap( + "Whether and how the GPU device uses its own threadpool.") + ) + + flags.DEFINE_integer( + name="per_gpu_thread_count", short_name="pgtc", default=0, + help=help_wrap( + "The number of threads to use for GPU. Only valid when " + "tf_gpu_thread_mode is not global.") + ) + + if datasets_num_private_threads: + flags.DEFINE_integer( + name="datasets_num_private_threads", + default=None, + help=help_wrap( + "Number of threads for a private threadpool created for all" + "datasets computation..") + ) + + if datasets_num_parallel_batches: + flags.DEFINE_integer( + name="datasets_num_parallel_batches", + default=None, + help=help_wrap( + "Determines how many batches to process in parallel when using " + "map and batch from tf.data.") + ) + + if training_dataset_cache: + flags.DEFINE_boolean( + name="training_dataset_cache", + default=False, + help=help_wrap( + "Determines whether to cache the training dataset on workers. " + "Typically used to improve training performance when training " + "data is in remote storage and can fit into worker memory.") + ) + + if tf_data_experimental_slack: + flags.DEFINE_boolean( + name="tf_data_experimental_slack", + default=False, + help=help_wrap( + "Whether to enable tf.data's `experimental_slack` option.") + ) + + if enable_xla: + flags.DEFINE_boolean( + name="enable_xla", default=False, + help="Whether to enable XLA auto jit compilation") + + if force_v2_in_keras_compile: + flags.DEFINE_boolean( + name="force_v2_in_keras_compile", default=None, + help="Forces the use of run_distribued path even if not" + "using a `strategy`. This is not the same as" + "`tf.distribute.OneDeviceStrategy`") + + return key_flags diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/core.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/core.py new file mode 100644 index 000000000..2b07ed8f2 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/core.py @@ -0,0 +1,132 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Public interface for flag definition. + +See _example.py for detailed instructions on defining flags. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +from six.moves import shlex_quote + +from absl import app as absl_app +from absl import flags + +from official.utils.flags import _base +from official.utils.flags import _benchmark +from official.utils.flags import _conventions +from official.utils.flags import _device +from official.utils.flags import _distribution +from official.utils.flags import _misc +from official.utils.flags import _performance + + +def set_defaults(**kwargs): + for key, value in kwargs.items(): + flags.FLAGS.set_default(name=key, value=value) + + +def parse_flags(argv=None): + """Reset flags and reparse. Currently only used in testing.""" + flags.FLAGS.unparse_flags() + absl_app.parse_flags_with_usage(argv or sys.argv) + + +def register_key_flags_in_core(f): + """Defines a function in core.py, and registers its key flags. + + absl uses the location of a flags.declare_key_flag() to determine the context + in which a flag is key. By making all declares in core, this allows model + main functions to call flags.adopt_module_key_flags() on core and correctly + chain key flags. + + Args: + f: The function to be wrapped + + Returns: + The "core-defined" version of the input function. + """ + + def core_fn(*args, **kwargs): + key_flags = f(*args, **kwargs) + [flags.declare_key_flag(fl) for fl in key_flags] # pylint: disable=expression-not-assigned + return core_fn + + +define_base = register_key_flags_in_core(_base.define_base) +# We have define_base_eager for compatibility, since it used to be a separate +# function from define_base. +define_base_eager = define_base +define_benchmark = register_key_flags_in_core(_benchmark.define_benchmark) +define_device = register_key_flags_in_core(_device.define_device) +define_image = register_key_flags_in_core(_misc.define_image) +define_performance = register_key_flags_in_core(_performance.define_performance) +define_distribution = register_key_flags_in_core( + _distribution.define_distribution) + + +help_wrap = _conventions.help_wrap + + +get_num_gpus = _base.get_num_gpus +get_tf_dtype = _performance.get_tf_dtype +get_loss_scale = _performance.get_loss_scale +DTYPE_MAP = _performance.DTYPE_MAP +require_cloud_storage = _device.require_cloud_storage + +def _get_nondefault_flags_as_dict(): + """Returns the nondefault flags as a dict from flag name to value.""" + nondefault_flags = {} + for flag_name in flags.FLAGS: + flag_value = getattr(flags.FLAGS, flag_name) + if (flag_name != flags.FLAGS[flag_name].short_name and + flag_value != flags.FLAGS[flag_name].default): + nondefault_flags[flag_name] = flag_value + return nondefault_flags + + +def get_nondefault_flags_as_str(): + """Returns flags as a string that can be passed as command line arguments. + + E.g., returns: "--batch_size=256 --use_synthetic_data" for the following code + block: + + ``` + flags.FLAGS.batch_size = 256 + flags.FLAGS.use_synthetic_data = True + print(get_nondefault_flags_as_str()) + ``` + + Only flags with nondefault values are returned, as passing default flags as + command line arguments has no effect. + + Returns: + A string with the flags, that can be passed as command line arguments to a + program to use the flags. + """ + nondefault_flags = _get_nondefault_flags_as_dict() + flag_strings = [] + for name, value in sorted(nondefault_flags.items()): + if isinstance(value, bool): + flag_str = '--{}'.format(name) if value else '--no{}'.format(name) + elif isinstance(value, list): + flag_str = '--{}={}'.format(name, ','.join(value)) + else: + flag_str = '--{}={}'.format(name, value) + flag_strings.append(flag_str) + return ' '.join(shlex_quote(flag_str) for flag_str in flag_strings) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/flags_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/flags_test.py new file mode 100644 index 000000000..e11a16422 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/flags_test.py @@ -0,0 +1,162 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import unittest + +from absl import flags +import tensorflow as tf + +from official.utils.flags import core as flags_core # pylint: disable=g-bad-import-order + + +def define_flags(): + flags_core.define_base(clean=True, num_gpu=False, stop_threshold=True, + hooks=True, train_epochs=True, + epochs_between_evals=True) + flags_core.define_performance( + num_parallel_calls=True, inter_op=True, intra_op=True, + dynamic_loss_scale=True, loss_scale=True, synthetic_data=True, + dtype=True) + flags_core.define_image() + flags_core.define_benchmark() + + +class BaseTester(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(BaseTester, cls).setUpClass() + define_flags() + + def test_default_setting(self): + """Test to ensure fields exist and defaults can be set. + """ + + defaults = dict( + data_dir="dfgasf", + model_dir="dfsdkjgbs", + train_epochs=534, + epochs_between_evals=15, + batch_size=256, + hooks=["LoggingTensorHook"], + num_parallel_calls=18, + inter_op_parallelism_threads=5, + intra_op_parallelism_threads=10, + data_format="channels_first" + ) + + flags_core.set_defaults(**defaults) + flags_core.parse_flags() + + for key, value in defaults.items(): + assert flags.FLAGS.get_flag_value(name=key, default=None) == value + + def test_benchmark_setting(self): + defaults = dict( + hooks=["LoggingMetricHook"], + benchmark_log_dir="/tmp/12345", + gcp_project="project_abc", + ) + + flags_core.set_defaults(**defaults) + flags_core.parse_flags() + + for key, value in defaults.items(): + assert flags.FLAGS.get_flag_value(name=key, default=None) == value + + def test_booleans(self): + """Test to ensure boolean flags trigger as expected. + """ + + flags_core.parse_flags([__file__, "--use_synthetic_data"]) + + assert flags.FLAGS.use_synthetic_data + + def test_parse_dtype_info(self): + flags_core.parse_flags([__file__, "--dtype", "fp16"]) + self.assertEqual(flags_core.get_tf_dtype(flags.FLAGS), tf.float16) + self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, + default_for_fp16=2), 2) + + flags_core.parse_flags( + [__file__, "--dtype", "fp16", "--loss_scale", "5"]) + self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, + default_for_fp16=2), 5) + + flags_core.parse_flags( + [__file__, "--dtype", "fp16", "--loss_scale", "dynamic"]) + self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, + default_for_fp16=2), "dynamic") + + flags_core.parse_flags([__file__, "--dtype", "fp32"]) + self.assertEqual(flags_core.get_tf_dtype(flags.FLAGS), tf.float32) + self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, + default_for_fp16=2), 1) + + flags_core.parse_flags([__file__, "--dtype", "fp32", "--loss_scale", "5"]) + self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, + default_for_fp16=2), 5) + + + with self.assertRaises(SystemExit): + flags_core.parse_flags([__file__, "--dtype", "int8"]) + + with self.assertRaises(SystemExit): + flags_core.parse_flags([__file__, "--dtype", "fp16", + "--loss_scale", "abc"]) + + def test_get_nondefault_flags_as_str(self): + defaults = dict( + clean=True, + data_dir="abc", + hooks=["LoggingTensorHook"], + stop_threshold=1.5, + use_synthetic_data=False + ) + flags_core.set_defaults(**defaults) + flags_core.parse_flags() + + expected_flags = "" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + flags.FLAGS.clean = False + expected_flags += "--noclean" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + flags.FLAGS.data_dir = "xyz" + expected_flags += " --data_dir=xyz" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + flags.FLAGS.hooks = ["aaa", "bbb", "ccc"] + expected_flags += " --hooks=aaa,bbb,ccc" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + flags.FLAGS.stop_threshold = 3. + expected_flags += " --stop_threshold=3.0" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + flags.FLAGS.use_synthetic_data = True + expected_flags += " --use_synthetic_data" + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + # Assert that explicit setting a flag to its default value does not cause it + # to appear in the string + flags.FLAGS.use_synthetic_data = False + expected_flags = expected_flags[:-len(" --use_synthetic_data")] + self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) + + +if __name__ == "__main__": + unittest.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/guidelines.md b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/guidelines.md new file mode 100644 index 000000000..db963aabe --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/flags/guidelines.md @@ -0,0 +1,65 @@ +# Using flags in official models + +1. **All common flags must be incorporated in the models.** + + Common flags (i.e. batch_size, model_dir, etc.) are provided by various flag definition functions, + and channeled through `official.utils.flags.core`. For instance to define common supervised + learning parameters one could use the following code: + + ```$xslt + from absl import app as absl_app + from absl import flags + + from official.utils.flags import core as flags_core + + + def define_flags(): + flags_core.define_base() + flags.adopt_key_flags(flags_core) + + + def main(_): + flags_obj = flags.FLAGS + print(flags_obj) + + + if __name__ == "__main__" + absl_app.run(main) + ``` +2. **Validate flag values.** + + See the [Validators](#validators) section for implementation details. + + Validators in the official model repo should not access the file system, such as verifying + that files exist, due to the strict ordering requirements. + +3. **Flag values should not be mutated.** + + Instead of mutating flag values, use getter functions to return the desired values. An example + getter function is `get_tf_dtype` function below: + + ``` + # Map string to TensorFlow dtype + DTYPE_MAP = { + "fp16": tf.float16, + "fp32": tf.float32, + } + + def get_tf_dtype(flags_obj): + if getattr(flags_obj, "fp16_implementation", None) == "graph_rewrite": + # If the graph_rewrite is used, we build the graph with fp32, and let the + # graph rewrite change ops to fp16. + return tf.float32 + return DTYPE_MAP[flags_obj.dtype] + + + def main(_): + flags_obj = flags.FLAGS() + + # Do not mutate flags_obj + # if flags_obj.fp16_implementation == "graph_rewrite": + # flags_obj.dtype = "float32" # Don't do this + + print(get_tf_dtype(flags_obj)) + ... + ``` \ No newline at end of file diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/hyperparams_flags.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/hyperparams_flags.py new file mode 100644 index 000000000..961d6150e --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/hyperparams_flags.py @@ -0,0 +1,119 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Common flags for importing hyperparameters.""" + +from __future__ import absolute_import +from __future__ import division +# from __future__ import google_type_annotations +from __future__ import print_function + +from absl import flags +from official.utils.flags import core as flags_core + +FLAGS = flags.FLAGS + + +def define_common_hparams_flags(): + """Define the common flags across models.""" + + flags.DEFINE_string( + 'model_dir', + default=None, + help=('The directory where the model and training/evaluation summaries' + 'are stored.')) + + flags.DEFINE_integer( + 'train_batch_size', default=None, help='Batch size for training.') + + flags.DEFINE_integer( + 'eval_batch_size', default=None, help='Batch size for evaluation.') + + flags.DEFINE_string( + 'precision', + default=None, + help=('Precision to use; one of: {bfloat16, float32}')) + + flags.DEFINE_string( + 'config_file', + default=None, + help=('A YAML file which specifies overrides. Note that this file can be ' + 'used as an override template to override the default parameters ' + 'specified in Python. If the same parameter is specified in both ' + '`--config_file` and `--params_override`, the one in ' + '`--params_override` will be used finally.')) + + flags.DEFINE_string( + 'params_override', + default=None, + help=('a YAML/JSON string or a YAML file which specifies additional ' + 'overrides over the default parameters and those specified in ' + '`--config_file`. Note that this is supposed to be used only to ' + 'override the model parameters, but not the parameters like TPU ' + 'specific flags. One canonical use case of `--config_file` and ' + '`--params_override` is users first define a template config file ' + 'using `--config_file`, then use `--params_override` to adjust the ' + 'minimal set of tuning parameters, for example setting up different' + ' `train_batch_size`. ' + 'The final override order of parameters: default_model_params --> ' + 'params from config_file --> params in params_override.' + 'See also the help message of `--config_file`.')) + flags.DEFINE_integer('save_checkpoint_freq', None, + 'Number of steps to save checkpoint.') + + +def initialize_common_flags(): + """Define the common flags across models.""" + define_common_hparams_flags() + + flags_core.define_device(tpu=True) + flags_core.define_base( + num_gpu=True, model_dir=False, data_dir=False, batch_size=False) + flags_core.define_distribution(worker_hosts=True, task_index=True) + flags_core.define_performance(all_reduce_alg=True, num_packs=True) + + # Reset the default value of num_gpus to zero. + FLAGS.num_gpus = 0 + + flags.DEFINE_string( + 'strategy_type', 'mirrored', 'Type of distribute strategy.' + 'One of mirrored, tpu and multiworker.') + + +def strategy_flags_dict(): + """Returns TPU and/or GPU related flags in a dictionary.""" + return { + # TPUStrategy related flags. + 'tpu': FLAGS.tpu, + # MultiWorkerMirroredStrategy related flags. + 'all_reduce_alg': FLAGS.all_reduce_alg, + 'worker_hosts': FLAGS.worker_hosts, + 'task_index': FLAGS.task_index, + # MirroredStrategy and OneDeviceStrategy + 'num_gpus': FLAGS.num_gpus, + 'num_packs': FLAGS.num_packs, + } + + +def hparam_flags_dict(): + """Returns model params related flags in a dictionary.""" + return { + 'data_dir': FLAGS.data_dir, + 'model_dir': FLAGS.model_dir, + 'train_batch_size': FLAGS.train_batch_size, + 'eval_batch_size': FLAGS.eval_batch_size, + 'precision': FLAGS.precision, + 'config_file': FLAGS.config_file, + 'params_override': FLAGS.params_override, + } diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/__init__.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib.py new file mode 100644 index 000000000..a2d9bd3db --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib.py @@ -0,0 +1,34 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities that interact with cloud service. +""" + +import requests + +GCP_METADATA_URL = "http://metadata/computeMetadata/v1/instance/hostname" +GCP_METADATA_HEADER = {"Metadata-Flavor": "Google"} + + +def on_gcp(): + """Detect whether the current running environment is on GCP.""" + try: + # Timeout in 5 seconds, in case the test environment has connectivity issue. + # There is not default timeout, which means it might block forever. + response = requests.get( + GCP_METADATA_URL, headers=GCP_METADATA_HEADER, timeout=5) + return response.status_code == 200 + except requests.exceptions.RequestException: + return False diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib_test.py new file mode 100644 index 000000000..901576d24 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/cloud_lib_test.py @@ -0,0 +1,48 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for cloud_lib.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest + +import mock +import requests + +from official.utils.logs import cloud_lib + + +class CloudLibTest(unittest.TestCase): + + @mock.patch("requests.get") + def test_on_gcp(self, mock_requests_get): + mock_response = mock.MagicMock() + mock_requests_get.return_value = mock_response + mock_response.status_code = 200 + + self.assertEqual(cloud_lib.on_gcp(), True) + + @mock.patch("requests.get") + def test_not_on_gcp(self, mock_requests_get): + mock_requests_get.side_effect = requests.exceptions.ConnectionError() + + self.assertEqual(cloud_lib.on_gcp(), False) + + +if __name__ == "__main__": + unittest.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/guidelines.md b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/guidelines.md new file mode 100644 index 000000000..408c3cd58 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/guidelines.md @@ -0,0 +1,58 @@ +# Logging in official models + +This library adds logging functions that print or save tensor values. Official models should define all common hooks +(using hooks helper) and a benchmark logger. + +1. **Training Hooks** + + Hooks are a TensorFlow concept that define specific actions at certain points of the execution. We use them to obtain and log + tensor values during training. + + hooks_helper.py provides an easy way to create common hooks. The following hooks are currently defined: + * LoggingTensorHook: Logs tensor values + * ProfilerHook: Writes a timeline json that can be loaded into chrome://tracing. + * ExamplesPerSecondHook: Logs the number of examples processed per second. + * LoggingMetricHook: Similar to LoggingTensorHook, except that the tensors are logged in a format defined by our data + anaylsis pipeline. + + +2. **Benchmarks** + + The benchmark logger provides useful functions for logging environment information, and evaluation results. + The module also contains a context which is used to update the status of the run. + +Example usage: + +``` +from absl import app as absl_app + +from official.utils.logs import hooks_helper +from official.utils.logs import logger + +def model_main(flags_obj): + estimator = ... + + benchmark_logger = logger.get_benchmark_logger() + benchmark_logger.log_run_info(...) + + train_hooks = hooks_helper.get_train_hooks(...) + + for epoch in range(10): + estimator.train(..., hooks=train_hooks) + eval_results = estimator.evaluate(...) + + # Log a dictionary of metrics + benchmark_logger.log_evaluation_result(eval_results) + + # Log an individual metric + benchmark_logger.log_metric(...) + + +def main(_): + with logger.benchmark_context(flags.FLAGS): + model_main(flags.FLAGS) + +if __name__ == "__main__": + # define flags + absl_app.run(main) +``` diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks.py new file mode 100644 index 000000000..64743b77f --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks.py @@ -0,0 +1,130 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Hook that counts examples per second every N steps or seconds.""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.logs import logger + + +class ExamplesPerSecondHook(tf.estimator.SessionRunHook): + """Hook to print out examples per second. + + Total time is tracked and then divided by the total number of steps + to get the average step time and then batch_size is used to determine + the running average of examples per second. The examples per second for the + most recent interval is also logged. + """ + + def __init__(self, + batch_size, + every_n_steps=None, + every_n_secs=None, + warm_steps=0, + metric_logger=None): + """Initializer for ExamplesPerSecondHook. + + Args: + batch_size: Total batch size across all workers used to calculate + examples/second from global time. + every_n_steps: Log stats every n steps. + every_n_secs: Log stats every n seconds. Exactly one of the + `every_n_steps` or `every_n_secs` should be set. + warm_steps: The number of steps to be skipped before logging and running + average calculation. warm_steps steps refers to global steps across all + workers, not on each worker + metric_logger: instance of `BenchmarkLogger`, the benchmark logger that + hook should use to write the log. If None, BaseBenchmarkLogger will + be used. + + Raises: + ValueError: if neither `every_n_steps` or `every_n_secs` is set, or + both are set. + """ + + if (every_n_steps is None) == (every_n_secs is None): + raise ValueError("exactly one of every_n_steps" + " and every_n_secs should be provided.") + + self._logger = metric_logger or logger.BaseBenchmarkLogger() + + self._timer = tf.estimator.SecondOrStepTimer( + every_steps=every_n_steps, every_secs=every_n_secs) + + self._step_train_time = 0 + self._total_steps = 0 + self._batch_size = batch_size + self._warm_steps = warm_steps + # List of examples per second logged every_n_steps. + self.current_examples_per_sec_list = [] + + def begin(self): + """Called once before using the session to check global step.""" + self._global_step_tensor = tf.compat.v1.train.get_global_step() + if self._global_step_tensor is None: + raise RuntimeError( + "Global step should be created to use StepCounterHook.") + + def before_run(self, run_context): # pylint: disable=unused-argument + """Called before each call to run(). + + Args: + run_context: A SessionRunContext object. + + Returns: + A SessionRunArgs object or None if never triggered. + """ + return tf.estimator.SessionRunArgs(self._global_step_tensor) + + def after_run(self, run_context, run_values): # pylint: disable=unused-argument + """Called after each call to run(). + + Args: + run_context: A SessionRunContext object. + run_values: A SessionRunValues object. + """ + global_step = run_values.results + + if self._timer.should_trigger_for_step( + global_step) and global_step > self._warm_steps: + elapsed_time, elapsed_steps = self._timer.update_last_triggered_step( + global_step) + if elapsed_time is not None: + self._step_train_time += elapsed_time + self._total_steps += elapsed_steps + + # average examples per second is based on the total (accumulative) + # training steps and training time so far + average_examples_per_sec = self._batch_size * ( + self._total_steps / self._step_train_time) + # current examples per second is based on the elapsed training steps + # and training time per batch + current_examples_per_sec = self._batch_size * ( + elapsed_steps / elapsed_time) + # Logs entries to be read from hook during or after run. + self.current_examples_per_sec_list.append(current_examples_per_sec) + self._logger.log_metric( + "average_examples_per_sec", average_examples_per_sec, + global_step=global_step) + + self._logger.log_metric( + "current_examples_per_sec", current_examples_per_sec, + global_step=global_step) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper.py new file mode 100644 index 000000000..50a380d9e --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper.py @@ -0,0 +1,172 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Hooks helper to return a list of TensorFlow hooks for training by name. + +More hooks can be added to this set. To add a new hook, 1) add the new hook to +the registry in HOOKS, 2) add a corresponding function that parses out necessary +parameters. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.logs import hooks +from official.utils.logs import logger +from official.utils.logs import metric_hook + +_TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate', + 'cross_entropy', + 'train_accuracy']) + + +def get_train_hooks(name_list, use_tpu=False, **kwargs): + """Factory for getting a list of TensorFlow hooks for training by name. + + Args: + name_list: a list of strings to name desired hook classes. Allowed: + LoggingTensorHook, ProfilerHook, ExamplesPerSecondHook, which are defined + as keys in HOOKS + use_tpu: Boolean of whether computation occurs on a TPU. This will disable + hooks altogether. + **kwargs: a dictionary of arguments to the hooks. + + Returns: + list of instantiated hooks, ready to be used in a classifier.train call. + + Raises: + ValueError: if an unrecognized name is passed. + """ + + if not name_list: + return [] + + if use_tpu: + tf.compat.v1.logging.warning('hooks_helper received name_list `{}`, but a ' + 'TPU is specified. No hooks will be used.' + .format(name_list)) + return [] + + train_hooks = [] + for name in name_list: + hook_name = HOOKS.get(name.strip().lower()) + if hook_name is None: + raise ValueError('Unrecognized training hook requested: {}'.format(name)) + else: + train_hooks.append(hook_name(**kwargs)) + + return train_hooks + + +def get_logging_tensor_hook(every_n_iter=100, tensors_to_log=None, **kwargs): # pylint: disable=unused-argument + """Function to get LoggingTensorHook. + + Args: + every_n_iter: `int`, print the values of `tensors` once every N local + steps taken on the current worker. + tensors_to_log: List of tensor names or dictionary mapping labels to tensor + names. If not set, log _TENSORS_TO_LOG by default. + **kwargs: a dictionary of arguments to LoggingTensorHook. + + Returns: + Returns a LoggingTensorHook with a standard set of tensors that will be + printed to stdout. + """ + if tensors_to_log is None: + tensors_to_log = _TENSORS_TO_LOG + + return tf.estimator.LoggingTensorHook( + tensors=tensors_to_log, + every_n_iter=every_n_iter) + + +def get_profiler_hook(model_dir, save_steps=1000, **kwargs): # pylint: disable=unused-argument + """Function to get ProfilerHook. + + Args: + model_dir: The directory to save the profile traces to. + save_steps: `int`, print profile traces every N steps. + **kwargs: a dictionary of arguments to ProfilerHook. + + Returns: + Returns a ProfilerHook that writes out timelines that can be loaded into + profiling tools like chrome://tracing. + """ + return tf.estimator.ProfilerHook(save_steps=save_steps, output_dir=model_dir) + + +def get_examples_per_second_hook(every_n_steps=100, + batch_size=128, + warm_steps=5, + **kwargs): # pylint: disable=unused-argument + """Function to get ExamplesPerSecondHook. + + Args: + every_n_steps: `int`, print current and average examples per second every + N steps. + batch_size: `int`, total batch size used to calculate examples/second from + global time. + warm_steps: skip this number of steps before logging and running average. + **kwargs: a dictionary of arguments to ExamplesPerSecondHook. + + Returns: + Returns a ProfilerHook that writes out timelines that can be loaded into + profiling tools like chrome://tracing. + """ + return hooks.ExamplesPerSecondHook( + batch_size=batch_size, every_n_steps=every_n_steps, + warm_steps=warm_steps, metric_logger=logger.get_benchmark_logger()) + + +def get_logging_metric_hook(tensors_to_log=None, + every_n_secs=600, + **kwargs): # pylint: disable=unused-argument + """Function to get LoggingMetricHook. + + Args: + tensors_to_log: List of tensor names or dictionary mapping labels to tensor + names. If not set, log _TENSORS_TO_LOG by default. + every_n_secs: `int`, the frequency for logging the metric. Default to every + 10 mins. + **kwargs: a dictionary of arguments. + + Returns: + Returns a LoggingMetricHook that saves tensor values in a JSON format. + """ + if tensors_to_log is None: + tensors_to_log = _TENSORS_TO_LOG + return metric_hook.LoggingMetricHook( + tensors=tensors_to_log, + metric_logger=logger.get_benchmark_logger(), + every_n_secs=every_n_secs) + + +def get_step_counter_hook(**kwargs): + """Function to get StepCounterHook.""" + del kwargs + return tf.estimator.StepCounterHook() + + +# A dictionary to map one hook name and its corresponding function +HOOKS = { + 'loggingtensorhook': get_logging_tensor_hook, + 'profilerhook': get_profiler_hook, + 'examplespersecondhook': get_examples_per_second_hook, + 'loggingmetrichook': get_logging_metric_hook, + 'stepcounterhook': get_step_counter_hook +} diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper_test.py new file mode 100644 index 000000000..693311b55 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_helper_test.py @@ -0,0 +1,73 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for hooks_helper.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.logs import hooks_helper +from official.utils.misc import keras_utils + + +class BaseTest(unittest.TestCase): + + def setUp(self): + super(BaseTest, self).setUp() + if keras_utils.is_v2_0: + tf.compat.v1.disable_eager_execution() + + def test_raise_in_non_list_names(self): + with self.assertRaises(ValueError): + hooks_helper.get_train_hooks( + 'LoggingTensorHook, ProfilerHook', model_dir="", batch_size=256) + + def test_raise_in_invalid_names(self): + invalid_names = ['StepCounterHook', 'StopAtStepHook'] + with self.assertRaises(ValueError): + hooks_helper.get_train_hooks(invalid_names, model_dir="", batch_size=256) + + def validate_train_hook_name(self, + test_hook_name, + expected_hook_name, + **kwargs): + returned_hook = hooks_helper.get_train_hooks( + [test_hook_name], model_dir="", **kwargs) + self.assertEqual(len(returned_hook), 1) + self.assertIsInstance(returned_hook[0], tf.estimator.SessionRunHook) + self.assertEqual(returned_hook[0].__class__.__name__.lower(), + expected_hook_name) + + def test_get_train_hooks_logging_tensor_hook(self): + self.validate_train_hook_name('LoggingTensorHook', 'loggingtensorhook') + + def test_get_train_hooks_profiler_hook(self): + self.validate_train_hook_name('ProfilerHook', 'profilerhook') + + def test_get_train_hooks_examples_per_second_hook(self): + self.validate_train_hook_name('ExamplesPerSecondHook', + 'examplespersecondhook') + + def test_get_logging_metric_hook(self): + test_hook_name = 'LoggingMetricHook' + self.validate_train_hook_name(test_hook_name, 'loggingmetrichook') + +if __name__ == '__main__': + tf.test.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_test.py new file mode 100644 index 000000000..70697797d --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/hooks_test.py @@ -0,0 +1,158 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for hooks.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.utils.logs import hooks +from official.utils.testing import mock_lib + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG) + + +class ExamplesPerSecondHookTest(tf.test.TestCase): + """Tests for the ExamplesPerSecondHook. + + In the test, we explicitly run global_step tensor after train_op in order to + keep the global_step value and the train_op (which increase the glboal_step + by 1) consistent. This is to correct the discrepancies in reported global_step + value when running on GPUs. + """ + + def setUp(self): + """Mock out logging calls to verify if correct info is being monitored.""" + self._logger = mock_lib.MockBenchmarkLogger() + + self.graph = tf.Graph() + with self.graph.as_default(): + tf.compat.v1.train.create_global_step() + self.train_op = tf.compat.v1.assign_add( + tf.compat.v1.train.get_global_step(), 1) + self.global_step = tf.compat.v1.train.get_global_step() + + def test_raise_in_both_secs_and_steps(self): + with self.assertRaises(ValueError): + hooks.ExamplesPerSecondHook( + batch_size=256, + every_n_steps=10, + every_n_secs=20, + metric_logger=self._logger) + + def test_raise_in_none_secs_and_steps(self): + with self.assertRaises(ValueError): + hooks.ExamplesPerSecondHook( + batch_size=256, + every_n_steps=None, + every_n_secs=None, + metric_logger=self._logger) + + def _validate_log_every_n_steps(self, every_n_steps, warm_steps): + hook = hooks.ExamplesPerSecondHook( + batch_size=256, + every_n_steps=every_n_steps, + warm_steps=warm_steps, + metric_logger=self._logger) + + with tf.compat.v1.train.MonitoredSession( + tf.compat.v1.train.ChiefSessionCreator(), [hook]) as mon_sess: + for _ in range(every_n_steps): + # Explicitly run global_step after train_op to get the accurate + # global_step value + mon_sess.run(self.train_op) + mon_sess.run(self.global_step) + # Nothing should be in the list yet + self.assertFalse(self._logger.logged_metric) + + mon_sess.run(self.train_op) + global_step_val = mon_sess.run(self.global_step) + + if global_step_val > warm_steps: + self._assert_metrics() + else: + # Nothing should be in the list yet + self.assertFalse(self._logger.logged_metric) + + # Add additional run to verify proper reset when called multiple times. + prev_log_len = len(self._logger.logged_metric) + mon_sess.run(self.train_op) + global_step_val = mon_sess.run(self.global_step) + + if every_n_steps == 1 and global_step_val > warm_steps: + # Each time, we log two additional metrics. Did exactly 2 get added? + self.assertEqual(len(self._logger.logged_metric), prev_log_len + 2) + else: + # No change in the size of the metric list. + self.assertEqual(len(self._logger.logged_metric), prev_log_len) + + def test_examples_per_sec_every_1_steps(self): + with self.graph.as_default(): + self._validate_log_every_n_steps(1, 0) + + def test_examples_per_sec_every_5_steps(self): + with self.graph.as_default(): + self._validate_log_every_n_steps(5, 0) + + def test_examples_per_sec_every_1_steps_with_warm_steps(self): + with self.graph.as_default(): + self._validate_log_every_n_steps(1, 10) + + def test_examples_per_sec_every_5_steps_with_warm_steps(self): + with self.graph.as_default(): + self._validate_log_every_n_steps(5, 10) + + def _validate_log_every_n_secs(self, every_n_secs): + hook = hooks.ExamplesPerSecondHook( + batch_size=256, + every_n_steps=None, + every_n_secs=every_n_secs, + metric_logger=self._logger) + + with tf.compat.v1.train.MonitoredSession( + tf.compat.v1.train.ChiefSessionCreator(), [hook]) as mon_sess: + # Explicitly run global_step after train_op to get the accurate + # global_step value + mon_sess.run(self.train_op) + mon_sess.run(self.global_step) + # Nothing should be in the list yet + self.assertFalse(self._logger.logged_metric) + time.sleep(every_n_secs) + + mon_sess.run(self.train_op) + mon_sess.run(self.global_step) + self._assert_metrics() + + def test_examples_per_sec_every_1_secs(self): + with self.graph.as_default(): + self._validate_log_every_n_secs(1) + + def test_examples_per_sec_every_5_secs(self): + with self.graph.as_default(): + self._validate_log_every_n_secs(5) + + def _assert_metrics(self): + metrics = self._logger.logged_metric + self.assertEqual(metrics[-2]["name"], "average_examples_per_sec") + self.assertEqual(metrics[-1]["name"], "current_examples_per_sec") + + +if __name__ == "__main__": + tf.test.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger.py new file mode 100644 index 000000000..398aa8a51 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger.py @@ -0,0 +1,423 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Logging utilities for benchmark. + +For collecting local environment metrics like CPU and memory, certain python +packages need be installed. See README for details. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import datetime +import json +import multiprocessing +import numbers +import os +import threading +import uuid + +from six.moves import _thread as thread +from absl import flags +import tensorflow as tf +from tensorflow.python.client import device_lib + +from official.utils.logs import cloud_lib + +METRIC_LOG_FILE_NAME = "metric.log" +BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log" +_DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ" +GCP_TEST_ENV = "GCP" +RUN_STATUS_SUCCESS = "success" +RUN_STATUS_FAILURE = "failure" +RUN_STATUS_RUNNING = "running" + + +FLAGS = flags.FLAGS + +# Don't use it directly. Use get_benchmark_logger to access a logger. +_benchmark_logger = None +_logger_lock = threading.Lock() + + +def config_benchmark_logger(flag_obj=None): + """Config the global benchmark logger.""" + _logger_lock.acquire() + try: + global _benchmark_logger + if not flag_obj: + flag_obj = FLAGS + + if (not hasattr(flag_obj, "benchmark_logger_type") or + flag_obj.benchmark_logger_type == "BaseBenchmarkLogger"): + _benchmark_logger = BaseBenchmarkLogger() + elif flag_obj.benchmark_logger_type == "BenchmarkFileLogger": + _benchmark_logger = BenchmarkFileLogger(flag_obj.benchmark_log_dir) + elif flag_obj.benchmark_logger_type == "BenchmarkBigQueryLogger": + from official.benchmark import benchmark_uploader as bu # pylint: disable=g-import-not-at-top + bq_uploader = bu.BigQueryUploader(gcp_project=flag_obj.gcp_project) + _benchmark_logger = BenchmarkBigQueryLogger( + bigquery_uploader=bq_uploader, + bigquery_data_set=flag_obj.bigquery_data_set, + bigquery_run_table=flag_obj.bigquery_run_table, + bigquery_run_status_table=flag_obj.bigquery_run_status_table, + bigquery_metric_table=flag_obj.bigquery_metric_table, + run_id=str(uuid.uuid4())) + else: + raise ValueError("Unrecognized benchmark_logger_type: %s" + % flag_obj.benchmark_logger_type) + + finally: + _logger_lock.release() + return _benchmark_logger + + +def get_benchmark_logger(): + if not _benchmark_logger: + config_benchmark_logger() + return _benchmark_logger + + +@contextlib.contextmanager +def benchmark_context(flag_obj): + """Context of benchmark, which will update status of the run accordingly.""" + benchmark_logger = config_benchmark_logger(flag_obj) + try: + yield + benchmark_logger.on_finish(RUN_STATUS_SUCCESS) + except Exception: # pylint: disable=broad-except + # Catch all the exception, update the run status to be failure, and re-raise + benchmark_logger.on_finish(RUN_STATUS_FAILURE) + raise + + +class BaseBenchmarkLogger(object): + """Class to log the benchmark information to STDOUT.""" + + def log_evaluation_result(self, eval_results): + """Log the evaluation result. + + The evaluate result is a dictionary that contains metrics defined in + model_fn. It also contains a entry for global_step which contains the value + of the global step when evaluation was performed. + + Args: + eval_results: dict, the result of evaluate. + """ + if not isinstance(eval_results, dict): + tf.compat.v1.logging.warning( + "eval_results should be dictionary for logging. Got %s", + type(eval_results)) + return + global_step = eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP] + for key in sorted(eval_results): + if key != tf.compat.v1.GraphKeys.GLOBAL_STEP: + self.log_metric(key, eval_results[key], global_step=global_step) + + def log_metric(self, name, value, unit=None, global_step=None, extras=None): + """Log the benchmark metric information to local file. + + Currently the logging is done in a synchronized way. This should be updated + to log asynchronously. + + Args: + name: string, the name of the metric to log. + value: number, the value of the metric. The value will not be logged if it + is not a number type. + unit: string, the unit of the metric, E.g "image per second". + global_step: int, the global_step when the metric is logged. + extras: map of string:string, the extra information about the metric. + """ + metric = _process_metric_to_json(name, value, unit, global_step, extras) + if metric: + tf.compat.v1.logging.info("Benchmark metric: %s", metric) + + def log_run_info(self, model_name, dataset_name, run_params, test_id=None): + tf.compat.v1.logging.info( + "Benchmark run: %s", _gather_run_info(model_name, dataset_name, + run_params, test_id)) + + def on_finish(self, status): + pass + + +class BenchmarkFileLogger(BaseBenchmarkLogger): + """Class to log the benchmark information to local disk.""" + + def __init__(self, logging_dir): + super(BenchmarkFileLogger, self).__init__() + self._logging_dir = logging_dir + if not tf.io.gfile.isdir(self._logging_dir): + tf.io.gfile.makedirs(self._logging_dir) + self._metric_file_handler = tf.io.gfile.GFile( + os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a") + + def log_metric(self, name, value, unit=None, global_step=None, extras=None): + """Log the benchmark metric information to local file. + + Currently the logging is done in a synchronized way. This should be updated + to log asynchronously. + + Args: + name: string, the name of the metric to log. + value: number, the value of the metric. The value will not be logged if it + is not a number type. + unit: string, the unit of the metric, E.g "image per second". + global_step: int, the global_step when the metric is logged. + extras: map of string:string, the extra information about the metric. + """ + metric = _process_metric_to_json(name, value, unit, global_step, extras) + if metric: + try: + json.dump(metric, self._metric_file_handler) + self._metric_file_handler.write("\n") + self._metric_file_handler.flush() + except (TypeError, ValueError) as e: + tf.compat.v1.logging.warning( + "Failed to dump metric to log file: name %s, value %s, error %s", + name, value, e) + + def log_run_info(self, model_name, dataset_name, run_params, test_id=None): + """Collect most of the TF runtime information for the local env. + + The schema of the run info follows official/benchmark/datastore/schema. + + Args: + model_name: string, the name of the model. + dataset_name: string, the name of dataset for training and evaluation. + run_params: dict, the dictionary of parameters for the run, it could + include hyperparameters or other params that are important for the run. + test_id: string, the unique name of the test run by the combination of key + parameters, eg batch size, num of GPU. It is hardware independent. + """ + run_info = _gather_run_info(model_name, dataset_name, run_params, test_id) + + with tf.io.gfile.GFile(os.path.join( + self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f: + try: + json.dump(run_info, f) + f.write("\n") + except (TypeError, ValueError) as e: + tf.compat.v1.logging.warning( + "Failed to dump benchmark run info to log file: %s", e) + + def on_finish(self, status): + self._metric_file_handler.flush() + self._metric_file_handler.close() + + +class BenchmarkBigQueryLogger(BaseBenchmarkLogger): + """Class to log the benchmark information to BigQuery data store.""" + + def __init__(self, + bigquery_uploader, + bigquery_data_set, + bigquery_run_table, + bigquery_run_status_table, + bigquery_metric_table, + run_id): + super(BenchmarkBigQueryLogger, self).__init__() + self._bigquery_uploader = bigquery_uploader + self._bigquery_data_set = bigquery_data_set + self._bigquery_run_table = bigquery_run_table + self._bigquery_run_status_table = bigquery_run_status_table + self._bigquery_metric_table = bigquery_metric_table + self._run_id = run_id + + def log_metric(self, name, value, unit=None, global_step=None, extras=None): + """Log the benchmark metric information to bigquery. + + Args: + name: string, the name of the metric to log. + value: number, the value of the metric. The value will not be logged if it + is not a number type. + unit: string, the unit of the metric, E.g "image per second". + global_step: int, the global_step when the metric is logged. + extras: map of string:string, the extra information about the metric. + """ + metric = _process_metric_to_json(name, value, unit, global_step, extras) + if metric: + # Starting new thread for bigquery upload in case it might take long time + # and impact the benchmark and performance measurement. Starting a new + # thread might have potential performance impact for model that run on + # CPU. + thread.start_new_thread( + self._bigquery_uploader.upload_benchmark_metric_json, + (self._bigquery_data_set, + self._bigquery_metric_table, + self._run_id, + [metric])) + + def log_run_info(self, model_name, dataset_name, run_params, test_id=None): + """Collect most of the TF runtime information for the local env. + + The schema of the run info follows official/benchmark/datastore/schema. + + Args: + model_name: string, the name of the model. + dataset_name: string, the name of dataset for training and evaluation. + run_params: dict, the dictionary of parameters for the run, it could + include hyperparameters or other params that are important for the run. + test_id: string, the unique name of the test run by the combination of key + parameters, eg batch size, num of GPU. It is hardware independent. + """ + run_info = _gather_run_info(model_name, dataset_name, run_params, test_id) + # Starting new thread for bigquery upload in case it might take long time + # and impact the benchmark and performance measurement. Starting a new + # thread might have potential performance impact for model that run on CPU. + thread.start_new_thread( + self._bigquery_uploader.upload_benchmark_run_json, + (self._bigquery_data_set, + self._bigquery_run_table, + self._run_id, + run_info)) + thread.start_new_thread( + self._bigquery_uploader.insert_run_status, + (self._bigquery_data_set, + self._bigquery_run_status_table, + self._run_id, + RUN_STATUS_RUNNING)) + + def on_finish(self, status): + self._bigquery_uploader.update_run_status( + self._bigquery_data_set, + self._bigquery_run_status_table, + self._run_id, + status) + + +def _gather_run_info(model_name, dataset_name, run_params, test_id): + """Collect the benchmark run information for the local environment.""" + run_info = { + "model_name": model_name, + "dataset": {"name": dataset_name}, + "machine_config": {}, + "test_id": test_id, + "run_date": datetime.datetime.utcnow().strftime( + _DATE_TIME_FORMAT_PATTERN)} + _collect_tensorflow_info(run_info) + _collect_tensorflow_environment_variables(run_info) + _collect_run_params(run_info, run_params) + _collect_cpu_info(run_info) + _collect_memory_info(run_info) + _collect_test_environment(run_info) + return run_info + + +def _process_metric_to_json( + name, value, unit=None, global_step=None, extras=None): + """Validate the metric data and generate JSON for insert.""" + if not isinstance(value, numbers.Number): + tf.compat.v1.logging.warning( + "Metric value to log should be a number. Got %s", type(value)) + return None + + extras = _convert_to_json_dict(extras) + return { + "name": name, + "value": float(value), + "unit": unit, + "global_step": global_step, + "timestamp": datetime.datetime.utcnow().strftime( + _DATE_TIME_FORMAT_PATTERN), + "extras": extras} + + +def _collect_tensorflow_info(run_info): + run_info["tensorflow_version"] = { + "version": tf.version.VERSION, "git_hash": tf.version.GIT_VERSION} + + +def _collect_run_params(run_info, run_params): + """Log the parameter information for the benchmark run.""" + def process_param(name, value): + type_check = { + str: {"name": name, "string_value": value}, + int: {"name": name, "long_value": value}, + bool: {"name": name, "bool_value": str(value)}, + float: {"name": name, "float_value": value}, + } + return type_check.get(type(value), + {"name": name, "string_value": str(value)}) + if run_params: + run_info["run_parameters"] = [ + process_param(k, v) for k, v in sorted(run_params.items())] + + +def _collect_tensorflow_environment_variables(run_info): + run_info["tensorflow_environment_variables"] = [ + {"name": k, "value": v} + for k, v in sorted(os.environ.items()) if k.startswith("TF_")] + + +# The following code is mirrored from tensorflow/tools/test/system_info_lib +# which is not exposed for import. +def _collect_cpu_info(run_info): + """Collect the CPU information for the local environment.""" + cpu_info = {} + + cpu_info["num_cores"] = multiprocessing.cpu_count() + + try: + # Note: cpuinfo is not installed in the TensorFlow OSS tree. + # It is installable via pip. + import cpuinfo # pylint: disable=g-import-not-at-top + + info = cpuinfo.get_cpu_info() + cpu_info["cpu_info"] = info["brand"] + cpu_info["mhz_per_cpu"] = info["hz_advertised_raw"][0] / 1.0e6 + + run_info["machine_config"]["cpu_info"] = cpu_info + except ImportError: + tf.compat.v1.logging.warn( + "'cpuinfo' not imported. CPU info will not be logged.") + + +def _collect_memory_info(run_info): + try: + # Note: psutil is not installed in the TensorFlow OSS tree. + # It is installable via pip. + import psutil # pylint: disable=g-import-not-at-top + vmem = psutil.virtual_memory() + run_info["machine_config"]["memory_total"] = vmem.total + run_info["machine_config"]["memory_available"] = vmem.available + except ImportError: + tf.compat.v1.logging.warn( + "'psutil' not imported. Memory info will not be logged.") + + +def _collect_test_environment(run_info): + """Detect the local environment, eg GCE, AWS or DGX, etc.""" + if cloud_lib.on_gcp(): + run_info["test_environment"] = GCP_TEST_ENV + # TODO(scottzhu): Add more testing env detection for other platform + + +def _parse_gpu_model(physical_device_desc): + # Assume all the GPU connected are same model + for kv in physical_device_desc.split(","): + k, _, v = kv.partition(":") + if k.strip() == "name": + return v.strip() + return None + + +def _convert_to_json_dict(input_dict): + if input_dict: + return [{"name": k, "value": v} for k, v in sorted(input_dict.items())] + else: + return [] diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger_test.py new file mode 100644 index 000000000..520db5ffd --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/logger_test.py @@ -0,0 +1,365 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for benchmark logger.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import os +import tempfile +import time +import unittest + +import mock +from absl.testing import flagsaver +import tensorflow as tf # pylint: disable=g-bad-import-order + +try: + from google.cloud import bigquery +except ImportError: + bigquery = None + +from official.utils.misc import keras_utils +from official.utils.flags import core as flags_core +from official.utils.logs import logger + + +class BenchmarkLoggerTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): # pylint: disable=invalid-name + super(BenchmarkLoggerTest, cls).setUpClass() + flags_core.define_benchmark() + + def test_get_default_benchmark_logger(self): + with flagsaver.flagsaver(benchmark_logger_type="foo"): + self.assertIsInstance(logger.get_benchmark_logger(), + logger.BaseBenchmarkLogger) + + def test_config_base_benchmark_logger(self): + with flagsaver.flagsaver(benchmark_logger_type="BaseBenchmarkLogger"): + logger.config_benchmark_logger() + self.assertIsInstance(logger.get_benchmark_logger(), + logger.BaseBenchmarkLogger) + + def test_config_benchmark_file_logger(self): + # Set the benchmark_log_dir first since the benchmark_logger_type will need + # the value to be set when it does the validation. + with flagsaver.flagsaver(benchmark_log_dir="/tmp"): + with flagsaver.flagsaver(benchmark_logger_type="BenchmarkFileLogger"): + logger.config_benchmark_logger() + self.assertIsInstance(logger.get_benchmark_logger(), + logger.BenchmarkFileLogger) + + @unittest.skipIf(bigquery is None, "Bigquery dependency is not installed.") + @mock.patch.object(bigquery, "Client") + def test_config_benchmark_bigquery_logger(self, mock_bigquery_client): + with flagsaver.flagsaver(benchmark_logger_type="BenchmarkBigQueryLogger"): + logger.config_benchmark_logger() + self.assertIsInstance(logger.get_benchmark_logger(), + logger.BenchmarkBigQueryLogger) + + @mock.patch("official.utils.logs.logger.config_benchmark_logger") + def test_benchmark_context(self, mock_config_benchmark_logger): + mock_logger = mock.MagicMock() + mock_config_benchmark_logger.return_value = mock_logger + with logger.benchmark_context(None): + tf.compat.v1.logging.info("start benchmarking") + mock_logger.on_finish.assert_called_once_with(logger.RUN_STATUS_SUCCESS) + + @mock.patch("official.utils.logs.logger.config_benchmark_logger") + def test_benchmark_context_failure(self, mock_config_benchmark_logger): + mock_logger = mock.MagicMock() + mock_config_benchmark_logger.return_value = mock_logger + with self.assertRaises(RuntimeError): + with logger.benchmark_context(None): + raise RuntimeError("training error") + mock_logger.on_finish.assert_called_once_with(logger.RUN_STATUS_FAILURE) + + +class BaseBenchmarkLoggerTest(tf.test.TestCase): + + def setUp(self): + super(BaseBenchmarkLoggerTest, self).setUp() + self._actual_log = tf.compat.v1.logging.info + self.logged_message = None + + def mock_log(*args, **kwargs): + self.logged_message = args + self._actual_log(*args, **kwargs) + + tf.compat.v1.logging.info = mock_log + + def tearDown(self): + super(BaseBenchmarkLoggerTest, self).tearDown() + tf.compat.v1.logging.info = self._actual_log + + def test_log_metric(self): + log = logger.BaseBenchmarkLogger() + log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"}) + + expected_log_prefix = "Benchmark metric:" + self.assertRegexpMatches(str(self.logged_message), expected_log_prefix) + + +class BenchmarkFileLoggerTest(tf.test.TestCase): + + def setUp(self): + super(BenchmarkFileLoggerTest, self).setUp() + # Avoid pulling extra env vars from test environment which affects the test + # result, eg. Kokoro test has a TF_PKG env which affect the test case + # test_collect_tensorflow_environment_variables() + self.original_environ = dict(os.environ) + os.environ.clear() + + def tearDown(self): + super(BenchmarkFileLoggerTest, self).tearDown() + tf.io.gfile.rmtree(self.get_temp_dir()) + os.environ.clear() + os.environ.update(self.original_environ) + + def test_create_logging_dir(self): + non_exist_temp_dir = os.path.join(self.get_temp_dir(), "unknown_dir") + self.assertFalse(tf.io.gfile.isdir(non_exist_temp_dir)) + + logger.BenchmarkFileLogger(non_exist_temp_dir) + self.assertTrue(tf.io.gfile.isdir(non_exist_temp_dir)) + + def test_log_metric(self): + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"}) + + metric_log = os.path.join(log_dir, "metric.log") + self.assertTrue(tf.io.gfile.exists(metric_log)) + with tf.io.gfile.GFile(metric_log) as f: + metric = json.loads(f.readline()) + self.assertEqual(metric["name"], "accuracy") + self.assertEqual(metric["value"], 0.999) + self.assertEqual(metric["unit"], None) + self.assertEqual(metric["global_step"], 1e4) + self.assertEqual(metric["extras"], [{"name": "name", "value": "value"}]) + + def test_log_multiple_metrics(self): + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"}) + log.log_metric("loss", 0.02, global_step=1e4) + + metric_log = os.path.join(log_dir, "metric.log") + self.assertTrue(tf.io.gfile.exists(metric_log)) + with tf.io.gfile.GFile(metric_log) as f: + accuracy = json.loads(f.readline()) + self.assertEqual(accuracy["name"], "accuracy") + self.assertEqual(accuracy["value"], 0.999) + self.assertEqual(accuracy["unit"], None) + self.assertEqual(accuracy["global_step"], 1e4) + self.assertEqual(accuracy["extras"], [{"name": "name", "value": "value"}]) + + loss = json.loads(f.readline()) + self.assertEqual(loss["name"], "loss") + self.assertEqual(loss["value"], 0.02) + self.assertEqual(loss["unit"], None) + self.assertEqual(loss["global_step"], 1e4) + self.assertEqual(loss["extras"], []) + + def test_log_non_number_value(self): + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + const = tf.constant(1) + log.log_metric("accuracy", const) + + metric_log = os.path.join(log_dir, "metric.log") + self.assertFalse(tf.io.gfile.exists(metric_log)) + + def test_log_evaluation_result(self): + eval_result = {"loss": 0.46237424, + "global_step": 207082, + "accuracy": 0.9285} + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + log.log_evaluation_result(eval_result) + + metric_log = os.path.join(log_dir, "metric.log") + self.assertTrue(tf.io.gfile.exists(metric_log)) + with tf.io.gfile.GFile(metric_log) as f: + accuracy = json.loads(f.readline()) + self.assertEqual(accuracy["name"], "accuracy") + self.assertEqual(accuracy["value"], 0.9285) + self.assertEqual(accuracy["unit"], None) + self.assertEqual(accuracy["global_step"], 207082) + + loss = json.loads(f.readline()) + self.assertEqual(loss["name"], "loss") + self.assertEqual(loss["value"], 0.46237424) + self.assertEqual(loss["unit"], None) + self.assertEqual(loss["global_step"], 207082) + + def test_log_evaluation_result_with_invalid_type(self): + eval_result = "{'loss': 0.46237424, 'global_step': 207082}" + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + log.log_evaluation_result(eval_result) + + metric_log = os.path.join(log_dir, "metric.log") + self.assertFalse(tf.io.gfile.exists(metric_log)) + + @mock.patch("official.utils.logs.logger._gather_run_info") + def test_log_run_info(self, mock_gather_run_info): + log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + log = logger.BenchmarkFileLogger(log_dir) + run_info = {"model_name": "model_name", + "dataset": "dataset_name", + "run_info": "run_value"} + mock_gather_run_info.return_value = run_info + log.log_run_info("model_name", "dataset_name", {}) + + run_log = os.path.join(log_dir, "benchmark_run.log") + self.assertTrue(tf.io.gfile.exists(run_log)) + with tf.io.gfile.GFile(run_log) as f: + run_info = json.loads(f.readline()) + self.assertEqual(run_info["model_name"], "model_name") + self.assertEqual(run_info["dataset"], "dataset_name") + self.assertEqual(run_info["run_info"], "run_value") + + def test_collect_tensorflow_info(self): + run_info = {} + logger._collect_tensorflow_info(run_info) + self.assertNotEqual(run_info["tensorflow_version"], {}) + self.assertEqual(run_info["tensorflow_version"]["version"], + tf.version.VERSION) + self.assertEqual(run_info["tensorflow_version"]["git_hash"], + tf.version.GIT_VERSION) + + def test_collect_run_params(self): + run_info = {} + run_parameters = { + "batch_size": 32, + "synthetic_data": True, + "train_epochs": 100.00, + "dtype": "fp16", + "resnet_size": 50, + "random_tensor": tf.constant(2.0) + } + logger._collect_run_params(run_info, run_parameters) + self.assertEqual(len(run_info["run_parameters"]), 6) + self.assertEqual(run_info["run_parameters"][0], + {"name": "batch_size", "long_value": 32}) + self.assertEqual(run_info["run_parameters"][1], + {"name": "dtype", "string_value": "fp16"}) + v1_tensor = {"name": "random_tensor", "string_value": + "Tensor(\"Const:0\", shape=(), dtype=float32)"} + v2_tensor = {"name": "random_tensor", "string_value": + "tf.Tensor(2.0, shape=(), dtype=float32)"} + self.assertIn(run_info["run_parameters"][2], [v1_tensor, v2_tensor]) + + + self.assertEqual(run_info["run_parameters"][3], + {"name": "resnet_size", "long_value": 50}) + self.assertEqual(run_info["run_parameters"][4], + {"name": "synthetic_data", "bool_value": "True"}) + self.assertEqual(run_info["run_parameters"][5], + {"name": "train_epochs", "float_value": 100.00}) + + def test_collect_tensorflow_environment_variables(self): + os.environ["TF_ENABLE_WINOGRAD_NONFUSED"] = "1" + os.environ["TF_OTHER"] = "2" + os.environ["OTHER"] = "3" + + run_info = {} + logger._collect_tensorflow_environment_variables(run_info) + self.assertIsNotNone(run_info["tensorflow_environment_variables"]) + expected_tf_envs = [ + {"name": "TF_ENABLE_WINOGRAD_NONFUSED", "value": "1"}, + {"name": "TF_OTHER", "value": "2"}, + ] + self.assertEqual(run_info["tensorflow_environment_variables"], + expected_tf_envs) + + def test_collect_memory_info(self): + run_info = {"machine_config": {}} + logger._collect_memory_info(run_info) + self.assertIsNotNone(run_info["machine_config"]["memory_total"]) + self.assertIsNotNone(run_info["machine_config"]["memory_available"]) + + +@unittest.skipIf(bigquery is None, "Bigquery dependency is not installed.") +class BenchmarkBigQueryLoggerTest(tf.test.TestCase): + + def setUp(self): + super(BenchmarkBigQueryLoggerTest, self).setUp() + # Avoid pulling extra env vars from test environment which affects the test + # result, eg. Kokoro test has a TF_PKG env which affect the test case + # test_collect_tensorflow_environment_variables() + self.original_environ = dict(os.environ) + os.environ.clear() + + self.mock_bq_uploader = mock.MagicMock() + self.logger = logger.BenchmarkBigQueryLogger( + self.mock_bq_uploader, "dataset", "run_table", "run_status_table", + "metric_table", "run_id") + + def tearDown(self): + super(BenchmarkBigQueryLoggerTest, self).tearDown() + tf.io.gfile.rmtree(self.get_temp_dir()) + os.environ.clear() + os.environ.update(self.original_environ) + + def test_log_metric(self): + self.logger.log_metric( + "accuracy", 0.999, global_step=1e4, extras={"name": "value"}) + expected_metric_json = [{ + "name": "accuracy", + "value": 0.999, + "unit": None, + "global_step": 1e4, + "timestamp": mock.ANY, + "extras": [{"name": "name", "value": "value"}] + }] + # log_metric will call upload_benchmark_metric_json in a separate thread. + # Give it some grace period for the new thread before assert. + time.sleep(1) + self.mock_bq_uploader.upload_benchmark_metric_json.assert_called_once_with( + "dataset", "metric_table", "run_id", expected_metric_json) + + @mock.patch("official.utils.logs.logger._gather_run_info") + def test_log_run_info(self, mock_gather_run_info): + run_info = {"model_name": "model_name", + "dataset": "dataset_name", + "run_info": "run_value"} + mock_gather_run_info.return_value = run_info + self.logger.log_run_info("model_name", "dataset_name", {}) + # log_metric will call upload_benchmark_metric_json in a separate thread. + # Give it some grace period for the new thread before assert. + time.sleep(1) + self.mock_bq_uploader.upload_benchmark_run_json.assert_called_once_with( + "dataset", "run_table", "run_id", run_info) + self.mock_bq_uploader.insert_run_status.assert_called_once_with( + "dataset", "run_status_table", "run_id", "running") + + def test_on_finish(self): + self.logger.on_finish(logger.RUN_STATUS_SUCCESS) + # log_metric will call upload_benchmark_metric_json in a separate thread. + # Give it some grace period for the new thread before assert. + time.sleep(1) + self.mock_bq_uploader.update_run_status.assert_called_once_with( + "dataset", "run_status_table", "run_id", logger.RUN_STATUS_SUCCESS) + + +if __name__ == "__main__": + tf.test.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook.py new file mode 100644 index 000000000..f408e3e95 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook.py @@ -0,0 +1,97 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Session hook for logging benchmark metric.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf # pylint: disable=g-bad-import-order + + +class LoggingMetricHook(tf.estimator.LoggingTensorHook): + """Hook to log benchmark metric information. + + This hook is very similar as tf.train.LoggingTensorHook, which logs given + tensors every N local steps, every N seconds, or at the end. The metric + information will be logged to given log_dir or via metric_logger in JSON + format, which can be consumed by data analysis pipeline later. + + Note that if `at_end` is True, `tensors` should not include any tensor + whose evaluation produces a side effect such as consuming additional inputs. + """ + + def __init__(self, tensors, metric_logger=None, + every_n_iter=None, every_n_secs=None, at_end=False): + """Initializer for LoggingMetricHook. + + Args: + tensors: `dict` that maps string-valued tags to tensors/tensor names, + or `iterable` of tensors/tensor names. + metric_logger: instance of `BenchmarkLogger`, the benchmark logger that + hook should use to write the log. + every_n_iter: `int`, print the values of `tensors` once every N local + steps taken on the current worker. + every_n_secs: `int` or `float`, print the values of `tensors` once every N + seconds. Exactly one of `every_n_iter` and `every_n_secs` should be + provided. + at_end: `bool` specifying whether to print the values of `tensors` at the + end of the run. + + Raises: + ValueError: + 1. `every_n_iter` is non-positive, or + 2. Exactly one of every_n_iter and every_n_secs should be provided. + 3. Exactly one of log_dir and metric_logger should be provided. + """ + super(LoggingMetricHook, self).__init__( + tensors=tensors, + every_n_iter=every_n_iter, + every_n_secs=every_n_secs, + at_end=at_end) + + if metric_logger is None: + raise ValueError("metric_logger should be provided.") + self._logger = metric_logger + + def begin(self): + super(LoggingMetricHook, self).begin() + self._global_step_tensor = tf.compat.v1.train.get_global_step() + if self._global_step_tensor is None: + raise RuntimeError( + "Global step should be created to use LoggingMetricHook.") + if self._global_step_tensor.name not in self._current_tensors: + self._current_tensors[self._global_step_tensor.name] = ( + self._global_step_tensor) + + def after_run(self, unused_run_context, run_values): + # should_trigger is a internal state that populated at before_run, and it is + # using self_timer to determine whether it should trigger. + if self._should_trigger: + self._log_metric(run_values.results) + + self._iter_count += 1 + + def end(self, session): + if self._log_at_end: + values = session.run(self._current_tensors) + self._log_metric(values) + + def _log_metric(self, tensor_values): + self._timer.update_last_triggered_step(self._iter_count) + global_step = tensor_values[self._global_step_tensor.name] + # self._tag_order is populated during the init of LoggingTensorHook + for tag in self._tag_order: + self._logger.log_metric(tag, tensor_values[tag], global_step=global_step) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook_test.py new file mode 100644 index 000000000..870ed6eb0 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/metric_hook_test.py @@ -0,0 +1,217 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for metric_hook.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile +import time + +import tensorflow as tf # pylint: disable=g-bad-import-order +from tensorflow.python.training import monitored_session # pylint: disable=g-bad-import-order + +from official.utils.logs import metric_hook +from official.utils.testing import mock_lib + + +class LoggingMetricHookTest(tf.test.TestCase): + """Tests for LoggingMetricHook.""" + + def setUp(self): + super(LoggingMetricHookTest, self).setUp() + + self._log_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + self._logger = mock_lib.MockBenchmarkLogger() + + def tearDown(self): + super(LoggingMetricHookTest, self).tearDown() + tf.io.gfile.rmtree(self.get_temp_dir()) + + def test_illegal_args(self): + with self.assertRaisesRegexp(ValueError, "nvalid every_n_iter"): + metric_hook.LoggingMetricHook(tensors=["t"], every_n_iter=0) + with self.assertRaisesRegexp(ValueError, "nvalid every_n_iter"): + metric_hook.LoggingMetricHook(tensors=["t"], every_n_iter=-10) + with self.assertRaisesRegexp(ValueError, "xactly one of"): + metric_hook.LoggingMetricHook( + tensors=["t"], every_n_iter=5, every_n_secs=5) + with self.assertRaisesRegexp(ValueError, "xactly one of"): + metric_hook.LoggingMetricHook(tensors=["t"]) + with self.assertRaisesRegexp(ValueError, "metric_logger"): + metric_hook.LoggingMetricHook(tensors=["t"], every_n_iter=5) + + def test_print_at_end_only(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + t = tf.constant(42.0, name="foo") + train_op = tf.constant(3) + hook = metric_hook.LoggingMetricHook( + tensors=[t.name], at_end=True, metric_logger=self._logger) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) # pylint: disable=protected-access + sess.run(tf.compat.v1.global_variables_initializer()) + + for _ in range(3): + mon_sess.run(train_op) + self.assertEqual(self._logger.logged_metric, []) + + hook.end(sess) + self.assertEqual(len(self._logger.logged_metric), 1) + metric = self._logger.logged_metric[0] + self.assertRegexpMatches(metric["name"], "foo") + self.assertEqual(metric["value"], 42.0) + self.assertEqual(metric["unit"], None) + self.assertEqual(metric["global_step"], 0) + + def test_global_step_not_found(self): + with tf.Graph().as_default(): + t = tf.constant(42.0, name="foo") + hook = metric_hook.LoggingMetricHook( + tensors=[t.name], at_end=True, metric_logger=self._logger) + + with self.assertRaisesRegexp( + RuntimeError, "should be created to use LoggingMetricHook."): + hook.begin() + + def test_log_tensors(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + t1 = tf.constant(42.0, name="foo") + t2 = tf.constant(43.0, name="bar") + train_op = tf.constant(3) + hook = metric_hook.LoggingMetricHook( + tensors=[t1, t2], at_end=True, metric_logger=self._logger) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) # pylint: disable=protected-access + sess.run(tf.compat.v1.global_variables_initializer()) + + for _ in range(3): + mon_sess.run(train_op) + self.assertEqual(self._logger.logged_metric, []) + + hook.end(sess) + self.assertEqual(len(self._logger.logged_metric), 2) + metric1 = self._logger.logged_metric[0] + self.assertRegexpMatches(str(metric1["name"]), "foo") + self.assertEqual(metric1["value"], 42.0) + self.assertEqual(metric1["unit"], None) + self.assertEqual(metric1["global_step"], 0) + + metric2 = self._logger.logged_metric[1] + self.assertRegexpMatches(str(metric2["name"]), "bar") + self.assertEqual(metric2["value"], 43.0) + self.assertEqual(metric2["unit"], None) + self.assertEqual(metric2["global_step"], 0) + + def _validate_print_every_n_steps(self, sess, at_end): + t = tf.constant(42.0, name="foo") + + train_op = tf.constant(3) + hook = metric_hook.LoggingMetricHook( + tensors=[t.name], every_n_iter=10, at_end=at_end, + metric_logger=self._logger) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) # pylint: disable=protected-access + sess.run(tf.compat.v1.global_variables_initializer()) + mon_sess.run(train_op) + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + for _ in range(3): + self._logger.logged_metric = [] + for _ in range(9): + mon_sess.run(train_op) + # assertNotRegexpMatches is not supported by python 3.1 and later + self.assertEqual(str(self._logger.logged_metric).find(t.name), -1) + mon_sess.run(train_op) + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + + # Add additional run to verify proper reset when called multiple times. + self._logger.logged_metric = [] + mon_sess.run(train_op) + # assertNotRegexpMatches is not supported by python 3.1 and later + self.assertEqual(str(self._logger.logged_metric).find(t.name), -1) + + self._logger.logged_metric = [] + hook.end(sess) + if at_end: + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + else: + # assertNotRegexpMatches is not supported by python 3.1 and later + self.assertEqual(str(self._logger.logged_metric).find(t.name), -1) + + def test_print_every_n_steps(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + self._validate_print_every_n_steps(sess, at_end=False) + # Verify proper reset. + self._validate_print_every_n_steps(sess, at_end=False) + + def test_print_every_n_steps_and_end(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + self._validate_print_every_n_steps(sess, at_end=True) + # Verify proper reset. + self._validate_print_every_n_steps(sess, at_end=True) + + def _validate_print_every_n_secs(self, sess, at_end): + t = tf.constant(42.0, name="foo") + train_op = tf.constant(3) + + hook = metric_hook.LoggingMetricHook( + tensors=[t.name], every_n_secs=1.0, at_end=at_end, + metric_logger=self._logger) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) # pylint: disable=protected-access + sess.run(tf.compat.v1.global_variables_initializer()) + + mon_sess.run(train_op) + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + + # assertNotRegexpMatches is not supported by python 3.1 and later + self._logger.logged_metric = [] + mon_sess.run(train_op) + self.assertEqual(str(self._logger.logged_metric).find(t.name), -1) + time.sleep(1.0) + + self._logger.logged_metric = [] + mon_sess.run(train_op) + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + + self._logger.logged_metric = [] + hook.end(sess) + if at_end: + self.assertRegexpMatches(str(self._logger.logged_metric), t.name) + else: + # assertNotRegexpMatches is not supported by python 3.1 and later + self.assertEqual(str(self._logger.logged_metric).find(t.name), -1) + + def test_print_every_n_secs(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + self._validate_print_every_n_secs(sess, at_end=False) + # Verify proper reset. + self._validate_print_every_n_secs(sess, at_end=False) + + def test_print_every_n_secs_and_end(self): + with tf.Graph().as_default(), tf.compat.v1.Session() as sess: + tf.compat.v1.train.get_or_create_global_step() + self._validate_print_every_n_secs(sess, at_end=True) + # Verify proper reset. + self._validate_print_every_n_secs(sess, at_end=True) + + +if __name__ == "__main__": + tf.test.main() diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/mlperf_helper.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/mlperf_helper.py new file mode 100644 index 000000000..8695f9808 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/official/utils/logs/mlperf_helper.py @@ -0,0 +1,193 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Wrapper for the mlperf logging utils. + +MLPerf compliance logging is only desired under a limited set of circumstances. +This module is intended to keep users from needing to consider logging (or +install the module) unless they are performing mlperf runs. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import namedtuple +import json +import os +import re +import subprocess +import sys +import typing +import shlex + +import tensorflow as tf + +_MIN_VERSION = (0, 0, 10) +_STACK_OFFSET = 2 + +SUDO = "sudo" if os.geteuid() else "" + +# This indirection is used in docker. +DROP_CACHE_LOC = os.getenv("DROP_CACHE_LOC", "/proc/sys/vm/drop_caches") + +_NCF_PREFIX = "NCF_RAW_" + +# TODO(robieta): move line parsing to mlperf util +_PREFIX = r"(?:{})?:::MLPv([0-9]+).([0-9]+).([0-9]+)".format(_NCF_PREFIX) +_BENCHMARK = r"([a-zA-Z0-9_]+)" +_TIMESTAMP = r"([0-9]+\.[0-9]+)" +_CALLSITE = r"\((.+):([0-9]+)\)" +_TAG = r"([a-zA-Z0-9_]+)" +_VALUE = r"(.*)" + +ParsedLine = namedtuple("ParsedLine", ["version", "benchmark", "timestamp", + "callsite", "tag", "value"]) + +LINE_PATTERN = re.compile( + "^{prefix} {benchmark} {timestamp} {callsite} {tag}(: |$){value}?$".format( + prefix=_PREFIX, benchmark=_BENCHMARK, timestamp=_TIMESTAMP, + callsite=_CALLSITE, tag=_TAG, value=_VALUE)) + + +def parse_line(line): # type: (str) -> typing.Optional[ParsedLine] + match = LINE_PATTERN.match(line.strip()) + if not match: + return + + major, minor, micro, benchmark, timestamp = match.groups()[:5] + call_file, call_line, tag, _, value = match.groups()[5:] + + return ParsedLine(version=(int(major), int(minor), int(micro)), + benchmark=benchmark, timestamp=timestamp, + callsite=(call_file, call_line), tag=tag, value=value) + + +def unparse_line(parsed_line): # type: (ParsedLine) -> str + version_str = "{}.{}.{}".format(*parsed_line.version) + callsite_str = "({}:{})".format(*parsed_line.callsite) + value_str = ": {}".format(parsed_line.value) if parsed_line.value else "" + return ":::MLPv{} {} {} {} {} {}".format( + version_str, parsed_line.benchmark, parsed_line.timestamp, callsite_str, + parsed_line.tag, value_str) + + +def get_mlperf_log(): + """Shielded import of mlperf_log module.""" + try: + import mlperf_compliance + + def test_mlperf_log_pip_version(): + """Check that mlperf_compliance is up to date.""" + import pkg_resources + version = pkg_resources.get_distribution("mlperf_compliance") + version = tuple(int(i) for i in version.version.split(".")) + if version < _MIN_VERSION: + tf.compat.v1.logging.warning( + "mlperf_compliance is version {}, must be >= {}".format( + ".".join([str(i) for i in version]), + ".".join([str(i) for i in _MIN_VERSION]))) + raise ImportError + return mlperf_compliance.mlperf_log + + mlperf_log = test_mlperf_log_pip_version() + + except ImportError: + mlperf_log = None + + return mlperf_log + + +class Logger(object): + """MLPerf logger indirection class. + + This logger only logs for MLPerf runs, and prevents various errors associated + with not having the mlperf_compliance package installed. + """ + class Tags(object): + def __init__(self, mlperf_log): + self._enabled = False + self._mlperf_log = mlperf_log + + def __getattr__(self, item): + if self._mlperf_log is None or not self._enabled: + return + return getattr(self._mlperf_log, item) + + def __init__(self): + self._enabled = False + self._mlperf_log = get_mlperf_log() + self.tags = self.Tags(self._mlperf_log) + + def __call__(self, enable=False): + if enable and self._mlperf_log is None: + raise ImportError("MLPerf logging was requested, but mlperf_compliance " + "module could not be loaded.") + + self._enabled = enable + self.tags._enabled = enable + return self + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self._enabled = False + self.tags._enabled = False + + @property + def log_file(self): + if self._mlperf_log is None: + return + return self._mlperf_log.LOG_FILE + + @property + def enabled(self): + return self._enabled + + def ncf_print(self, key, value=None, stack_offset=_STACK_OFFSET, + deferred=False, extra_print=False, prefix=_NCF_PREFIX): + if self._mlperf_log is None or not self.enabled: + return + self._mlperf_log.ncf_print(key=key, value=value, stack_offset=stack_offset, + deferred=deferred, extra_print=extra_print, + prefix=prefix) + + def set_ncf_root(self, path): + if self._mlperf_log is None: + return + self._mlperf_log.ROOT_DIR_NCF = path + + +LOGGER = Logger() +ncf_print, set_ncf_root = LOGGER.ncf_print, LOGGER.set_ncf_root +TAGS = LOGGER.tags + + +def clear_system_caches(): + if not LOGGER.enabled: + return + ret_code = subprocess.call( + [shlex.split("sync && echo 3 | {} tee {}".format(SUDO, DROP_CACHE_LOC))], + shell=False) + + if ret_code: + raise ValueError("Failed to clear caches") + + +if __name__ == "__main__": + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + with LOGGER(True): + ncf_print(key=TAGS.RUN_START) diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/__init__.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/metrics.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/metrics.py new file mode 100644 index 000000000..3e41f985c --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/metrics.py @@ -0,0 +1,490 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions for calculating loss, accuracy, and other model metrics. + +Metrics: + - Padded loss, accuracy, and negative log perplexity. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/metrics.py + - BLEU approximation. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py + - ROUGE score. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/rouge.py +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import math + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + + +def _pad_tensors_to_same_length(x, y): + """Pad x and y so that the results have the same length (second dimension).""" + with tf.name_scope("pad_to_same_length"): + x_length = tf.shape(x)[1] + y_length = tf.shape(y)[1] + + max_length = tf.maximum(x_length, y_length) + + x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]]) + y = tf.pad(y, [[0, 0], [0, max_length - y_length]]) + return x, y + + +def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size): + """Calculate cross entropy loss while ignoring padding. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch_size, length_labels] + smoothing: Label smoothing constant, used to determine the on and off values + vocab_size: int size of the vocabulary + Returns: + Returns the cross entropy loss and weight tensors: float32 tensors with + shape [batch_size, max(length_logits, length_labels)] + """ + with tf.name_scope("loss", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + + # Calculate smoothing cross entropy + with tf.name_scope("smoothing_cross_entropy", values=[logits, labels]): + confidence = 1.0 - smoothing + low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) + soft_targets = tf.one_hot( + tf.cast(labels, tf.int32), + depth=vocab_size, + on_value=confidence, + off_value=low_confidence) + xentropy = tf.nn.softmax_cross_entropy_with_logits_v2( + logits=logits, labels=soft_targets) + + # Calculate the best (lowest) possible value of cross entropy, and + # subtract from the cross entropy loss. + normalizing_constant = -( + confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * + low_confidence * tf.log(low_confidence + 1e-20)) + xentropy -= normalizing_constant + + weights = tf.to_float(tf.not_equal(labels, 0)) + return xentropy * weights, weights + + +def _convert_to_eval_metric(metric_fn): + """Wrap a metric fn that returns scores and weights as an eval metric fn. + + The input metric_fn returns values for the current batch. The wrapper + aggregates the return values collected over all of the batches evaluated. + + Args: + metric_fn: function that returns scores and weights for the current batch's + logits and predicted labels. + + Returns: + function that aggregates the scores and weights from metric_fn. + """ + def problem_metric_fn(*args): + """Returns an aggregation of the metric_fn's returned values.""" + (scores, weights) = metric_fn(*args) + + # The tf.metrics.mean function assures correct aggregation. + return tf.metrics.mean(scores, weights) + return problem_metric_fn + + +def get_eval_metrics(logits, labels, params): + """Return dictionary of model evaluation metrics.""" + metrics = { + "accuracy": _convert_to_eval_metric(padded_accuracy)(logits, labels), + "accuracy_top5": _convert_to_eval_metric(padded_accuracy_top5)( + logits, labels), + "accuracy_per_sequence": _convert_to_eval_metric( + padded_sequence_accuracy)(logits, labels), + "neg_log_perplexity": _convert_to_eval_metric(padded_neg_log_perplexity)( + logits, labels, params["vocab_size"]), + } + + if not params["use_tpu"]: + # TPU does not support tf.py_func + metrics.update({ + "approx_bleu_score": _convert_to_eval_metric( + bleu_score)(logits, labels), + "rouge_2_fscore": _convert_to_eval_metric( + rouge_2_fscore)(logits, labels), + "rouge_L_fscore": _convert_to_eval_metric( + rouge_l_fscore)(logits, labels), + }) + + # Prefix each of the metric names with "metrics/". This allows the metric + # graphs to display under the "metrics" category in TensorBoard. + metrics = {"metrics/%s" % k: v for k, v in six.iteritems(metrics)} + return metrics + + +def padded_accuracy(logits, labels): + """Percentage of times that predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + return tf.to_float(tf.equal(outputs, padded_labels)), weights + + +def padded_accuracy_topk(logits, labels, k): + """Percentage of times that top-k predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy_topk", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + effective_k = tf.minimum(k, tf.shape(logits)[-1]) + _, outputs = tf.nn.top_k(logits, k=effective_k) + outputs = tf.to_int32(outputs) + padded_labels = tf.to_int32(labels) + padded_labels = tf.expand_dims(padded_labels, axis=-1) + padded_labels += tf.zeros_like(outputs) # Pad to same shape. + same = tf.to_float(tf.equal(outputs, padded_labels)) + same_topk = tf.reduce_sum(same, axis=-1) + return same_topk, weights + + +def padded_accuracy_top5(logits, labels): + return padded_accuracy_topk(logits, labels, 5) + + +def padded_sequence_accuracy(logits, labels): + """Percentage of times that predictions matches labels everywhere (non-0).""" + with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights + axis = list(range(1, len(outputs.get_shape()))) + correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) + return correct_seq, tf.constant(1.0) + + +def padded_neg_log_perplexity(logits, labels, vocab_size): + """Average log-perplexity excluding padding 0s. No smoothing.""" + num, den = padded_cross_entropy_loss(logits, labels, 0, vocab_size) + return -num, den + + +def bleu_score(logits, labels): + """Approximate BLEU score computation between labels and predictions. + + An approximate BLEU scoring method since we do not glue word pieces or + decode the ids and tokenize the output. By default, we use ngram order of 4 + and use brevity penalty. Also, this does not have beam search. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch-size, length_labels] + + Returns: + bleu: int, approx bleu score + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32) + return bleu, tf.constant(1.0) + + +def _get_ngrams_with_counter(segment, max_order): + """Extracts all n-grams up to a given maximum order from an input segment. + + Args: + segment: text segment from which n-grams will be extracted. + max_order: maximum length in tokens of the n-grams returned by this + methods. + + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in xrange(1, max_order + 1): + for i in xrange(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts + + +def compute_bleu(reference_corpus, translation_corpus, max_order=4, + use_bp=True): + """Computes BLEU score of translated segments against one or more references. + + Args: + reference_corpus: list of references for each translation. Each + reference should be tokenized into a list of tokens. + translation_corpus: list of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + use_bp: boolean, whether to apply brevity penalty. + + Returns: + BLEU score. + """ + reference_length = 0 + translation_length = 0 + bp = 1.0 + geo_mean = 0 + + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + precisions = [] + + for (references, translations) in zip(reference_corpus, translation_corpus): + reference_length += len(references) + translation_length += len(translations) + ref_ngram_counts = _get_ngrams_with_counter(references, max_order) + translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) + + overlap = dict((ngram, + min(count, translation_ngram_counts[ngram])) + for ngram, count in ref_ngram_counts.items()) + + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for ngram in translation_ngram_counts: + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ + ngram] + + precisions = [0] * max_order + smooth = 1.0 + + for i in xrange(0, max_order): + if possible_matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ + i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if max(precisions) > 0: + p_log_sum = sum(math.log(p) for p in precisions if p) + geo_mean = math.exp(p_log_sum / max_order) + + if use_bp: + ratio = translation_length / reference_length + bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bleu = geo_mean * bp + return np.float32(bleu) + + +def rouge_2_fscore(logits, labels): + """ROUGE-2 F1 score computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + logits: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge2_fscore: approx rouge-2 f1 score. + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32) + return rouge_2_f_score, tf.constant(1.0) + + +def _get_ngrams(n, text): + """Calculates n-grams. + + Args: + n: which n-grams to calculate + text: An array of tokens + + Returns: + A set of n-grams + """ + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + +def rouge_n(eval_sentences, ref_sentences, n=2): + """Computes ROUGE-N f1 score of two text collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Args: + eval_sentences: Predicted sentences. + ref_sentences: Sentences from the reference set + n: Size of ngram. Defaults to 2. + + Returns: + f1 score for ROUGE-N + """ + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + eval_ngrams = _get_ngrams(n, eval_sentence) + ref_ngrams = _get_ngrams(n, ref_sentence) + ref_count = len(ref_ngrams) + eval_count = len(eval_ngrams) + + # Count the overlapping ngrams between evaluated and reference + overlapping_ngrams = eval_ngrams.intersection(ref_ngrams) + overlapping_count = len(overlapping_ngrams) + + # Handle edge case. This isn't mathematically correct, but it's good enough + if eval_count == 0: + precision = 0.0 + else: + precision = float(overlapping_count) / eval_count + if ref_count == 0: + recall = 0.0 + else: + recall = float(overlapping_count) / ref_count + f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8))) + + # return overlapping_count / reference_count + return np.mean(f1_scores, dtype=np.float32) + + +def rouge_l_fscore(predictions, labels): + """ROUGE scores computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + predictions: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge_l_fscore: approx rouge-l f1 score. + """ + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels), + tf.float32) + return rouge_l_f_score, tf.constant(1.0) + + +def rouge_l_sentence_level(eval_sentences, ref_sentences): + """Computes ROUGE-L (sentence level) of two collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Calculated according to: + R_lcs = LCS(X,Y)/m + P_lcs = LCS(X,Y)/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + X = reference summary + Y = Candidate summary + m = length of reference summary + n = length of candidate summary + + Args: + eval_sentences: The sentences that have been picked by the summarizer + ref_sentences: The sentences from the reference set + + Returns: + A float: F_lcs + """ + + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + m = float(len(ref_sentence)) + n = float(len(eval_sentence)) + lcs = _len_lcs(eval_sentence, ref_sentence) + f1_scores.append(_f_lcs(lcs, m, n)) + return np.mean(f1_scores, dtype=np.float32) + + +def _len_lcs(x, y): + """Returns the length of the Longest Common Subsequence between two seqs. + + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns + integer: Length of LCS between x and y + """ + table = _lcs(x, y) + n, m = len(x), len(y) + return table[n, m] + + +def _lcs(x, y): + """Computes the length of the LCS between two seqs. + + The implementation below uses a DP programming algorithm and runs + in O(nm) time where n = len(x) and m = len(y). + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: collection of words + y: collection of words + + Returns: + Table of dictionary of coord and len lcs + """ + n, m = len(x), len(y) + table = dict() + for i in range(n + 1): + for j in range(m + 1): + if i == 0 or j == 0: + table[i, j] = 0 + elif x[i - 1] == y[j - 1]: + table[i, j] = table[i - 1, j - 1] + 1 + else: + table[i, j] = max(table[i - 1, j], table[i, j - 1]) + return table + + +def _f_lcs(llcs, m, n): + """Computes the LCS-based F-measure score. + + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Args: + llcs: Length of LCS + m: number of words in reference summary + n: number of words in candidate summary + + Returns: + Float. LCS-based F-measure score + """ + r_lcs = llcs / m + p_lcs = llcs / n + beta = p_lcs / (r_lcs + 1e-12) + num = (1 + (beta ** 2)) * r_lcs * p_lcs + denom = r_lcs + ((beta ** 2) * p_lcs) + f_lcs = num / (denom + 1e-12) + return f_lcs diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer.py new file mode 100644 index 000000000..20302266a --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer.py @@ -0,0 +1,620 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines Subtokenizer class to encode and decode strings.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import sys +import unicodedata + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +PAD = "" +PAD_ID = 0 +EOS = "" +EOS_ID = 1 +RESERVED_TOKENS = [PAD, EOS] + +# Set of characters that will be used in the function _escape_token() (see func +# docstring for more details). +# This set is added to the alphabet list to ensure that all escaped tokens can +# be encoded. +_ESCAPE_CHARS = set(u"\\_u;0123456789") +# Regex for the function _unescape_token(), the inverse of _escape_token(). +# This is used to find "\u", "\\", and "\###;" substrings in the token. +_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") + +_UNDEFINED_UNICODE = u"\u3013" + +# Set contains all letter and number characters. +_ALPHANUMERIC_CHAR_SET = set( + six.unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(six.unichr(i)).startswith("L") or + unicodedata.category(six.unichr(i)).startswith("N"))) + +# min_count is the minimum number of times a subtoken must appear in the data +# before before it is added to the vocabulary. The value is found using binary +# search to obtain the target vocabulary size. +_MIN_MIN_COUNT = 1 # min value to use when binary searching for min_count +_MAX_MIN_COUNT = 1000 # max value to use when binary searching for min_count + + +class Subtokenizer(object): + """Encodes and decodes strings to/from integer IDs.""" + + def __init__(self, vocab_file, reserved_tokens=None): + """Initializes class, creating a vocab file if data_files is provided.""" + tf.compat.v1.logging.info("Initializing Subtokenizer from file %s." % + vocab_file) + + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + self.subtoken_list = _load_vocab_file(vocab_file, reserved_tokens) + self.alphabet = _generate_alphabet_dict(self.subtoken_list) + self.subtoken_to_id_dict = _list_to_index_dict(self.subtoken_list) + + self.max_subtoken_length = 0 + for subtoken in self.subtoken_list: + self.max_subtoken_length = max(self.max_subtoken_length, len(subtoken)) + + # Create cache to speed up subtokenization + self._cache_size = 2 ** 20 + self._cache = [(None, None)] * self._cache_size + + @staticmethod + def init_from_files( + vocab_file, files, target_vocab_size, threshold, min_count=None, + file_byte_limit=1e6, reserved_tokens=None, correct_strip=True): + """Create subtoken vocabulary based on files, and save vocab to file. + + Args: + vocab_file: String name of vocab file to store subtoken vocabulary. + files: List of file paths that will be used to generate vocabulary. + target_vocab_size: target vocabulary size to generate. + threshold: int threshold of vocabulary size to accept. + min_count: int minimum count to use for generating the vocabulary. The min + count is the minimum number of times a subtoken should appear in the + files before it is added to the vocabulary. If set to none, this value + is found using binary search. + file_byte_limit: (Default 1e6) Maximum number of bytes of sample text that + will be drawn from the files. + reserved_tokens: List of string tokens that are guaranteed to be at the + beginning of the subtoken vocabulary list. + correct_strip: Whether to convert text to unicode before strip. + + Returns: + Subtokenizer object + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if tf.io.gfile.exists(vocab_file): + tf.compat.v1.logging.info("Vocab file already exists (%s)" % vocab_file) + else: + tf.compat.v1.logging.info("Begin steps to create subtoken vocabulary...") + token_counts = _count_tokens(files, file_byte_limit, correct_strip) + alphabet = _generate_alphabet_dict(token_counts) + subtoken_list = _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_vocab_size, threshold, min_count, + reserved_tokens) + tf.compat.v1.logging.info("Generated vocabulary with %d subtokens." % + len(subtoken_list)) + _save_vocab_file(vocab_file, subtoken_list) + return Subtokenizer(vocab_file) + + def encode(self, raw_string, add_eos=False): + """Encodes a string into a list of int subtoken ids.""" + ret = [] + tokens = _split_string_to_tokens(native_to_unicode(raw_string)) + for token in tokens: + ret.extend(self._token_to_subtoken_ids(token)) + if add_eos: + ret.append(EOS_ID) + return ret + + def _token_to_subtoken_ids(self, token): + """Encode a single token into a list of subtoken ids.""" + cache_location = hash(token) % self._cache_size + cache_key, cache_value = self._cache[cache_location] + if cache_key == token: + return cache_value + + ret = _split_token_to_subtokens( + _escape_token(token, self.alphabet), self.subtoken_to_id_dict, + self.max_subtoken_length) + ret = [self.subtoken_to_id_dict[subtoken_id] for subtoken_id in ret] + + self._cache[cache_location] = (token, ret) + return ret + + def decode(self, subtokens): + """Converts list of int subtokens ids into a string.""" + if isinstance(subtokens, np.ndarray): + # Note that list(subtokens) converts subtokens to a python list, but the + # items remain as np.int32. This converts both the array and its items. + subtokens = subtokens.tolist() + + if not subtokens: + return "" + + assert isinstance(subtokens, list) and isinstance(subtokens[0], int), ( + "Subtokens argument passed into decode() must be a list of integers.") + + return _unicode_to_native( + _join_tokens_to_string(self._subtoken_ids_to_tokens(subtokens))) + + def _subtoken_ids_to_tokens(self, subtokens): + """Convert list of int subtoken ids to a list of string tokens.""" + escaped_tokens = "".join([ + self.subtoken_list[s] for s in subtokens + if s < len(self.subtoken_list)]) + escaped_tokens = escaped_tokens.split("_") + + # All tokens in the vocabulary list have been escaped (see _escape_token()) + # so each token must be unescaped when decoding. + ret = [] + for token in escaped_tokens: + if token: + ret.append(_unescape_token(token)) + return ret + + +def _save_vocab_file(vocab_file, subtoken_list): + """Save subtokens to file.""" + with tf.io.gfile.GFile(vocab_file, mode="w") as f: + for subtoken in subtoken_list: + f.write("'%s'\n" % _unicode_to_native(subtoken)) + + +def _load_vocab_file(vocab_file, reserved_tokens=None): + """Load vocabulary while ensuring reserved tokens are at the top.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + subtoken_list = [] + with tf.io.gfile.GFile(vocab_file, mode="r") as f: + for line in f: + subtoken = native_to_unicode(line.strip()) + subtoken = subtoken[1:-1] # Remove surrounding single-quotes + if subtoken in reserved_tokens: + continue + subtoken_list.append(native_to_unicode(subtoken)) + return reserved_tokens + subtoken_list + + +def native_to_unicode(s): + """Convert string to unicode (required in Python 2).""" + try: # Python 2 + return s if isinstance(s, unicode) else s.decode("utf-8") + except NameError: # Python 3 + return s + + +def _unicode_to_native(s): + """Convert string from unicode to native format (required in Python 2).""" + try: # Python 2 + return s.encode("utf-8") if isinstance(s, unicode) else s + except NameError: # Python 3 + return s + + +def _split_string_to_tokens(text): + """Splits text to a list of string tokens.""" + if not text: + return [] + ret = [] + token_start = 0 + # Classify each character in the input string + is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text] + for pos in xrange(1, len(text)): + if is_alnum[pos] != is_alnum[pos - 1]: + token = text[token_start:pos] + if token != u" " or token_start == 0: + ret.append(token) + token_start = pos + final_token = text[token_start:] + ret.append(final_token) + return ret + + +def _join_tokens_to_string(tokens): + """Join a list of string tokens into a single string.""" + token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] + ret = [] + for i, token in enumerate(tokens): + if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: + ret.append(u" ") + ret.append(token) + return "".join(ret) + + +def _escape_token(token, alphabet): + r"""Replace characters that aren't in the alphabet and append "_" to token. + + Apply three transformations to the token: + 1. Replace underline character "_" with "\u", and backslash "\" with "\\". + 2. Replace characters outside of the alphabet with "\###;", where ### is the + character's Unicode code point. + 3. Appends "_" to mark the end of a token. + + Args: + token: unicode string to be escaped + alphabet: list of all known characters + + Returns: + escaped string + """ + token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + ret = [c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token] + return u"".join(ret) + "_" + + +def _unescape_token(token): + r"""Replaces escaped characters in the token with their unescaped versions. + + Applies inverse transformations as _escape_token(): + 1. Replace "\u" with "_", and "\\" with "\". + 2. Replace "\###;" with the unicode character the ### refers to. + + Args: + token: escaped string + + Returns: + unescaped string + """ + + def match(m): + r"""Returns replacement string for matched object. + + Matched objects contain one of the strings that matches the regex pattern: + r"\\u|\\\\|\\([0-9]+);" + The strings can be '\u', '\\', or '\###;' (### is any digit number). + + m.group(0) refers to the entire matched string ('\u', '\\', or '\###;'). + m.group(1) refers to the first parenthesized subgroup ('###'). + + m.group(0) exists for all match objects, while m.group(1) exists only for + the string '\###;'. + + This function looks to see if m.group(1) exists. If it doesn't, then the + matched string must be '\u' or '\\' . In this case, the corresponding + replacement ('_' and '\') are returned. Note that in python, a single + backslash is written as '\\', and double backslash as '\\\\'. + + If m.goup(1) exists, then use the integer in m.group(1) to return a + unicode character. + + Args: + m: match object + + Returns: + String to replace matched object with. + """ + # Check if the matched strings are '\u' or '\\'. + if m.group(1) is None: + return u"_" if m.group(0) == u"\\u" else u"\\" + + # If m.group(1) exists, try and return unicode character. + try: + return six.unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return _UNDEFINED_UNICODE + + # Use match function to replace escaped substrings in the token. + return _UNESCAPE_REGEX.sub(match, token) + + +def _count_tokens(files, file_byte_limit=1e6, correct_strip=True): + """Return token counts of words in the files. + + Samples file_byte_limit bytes from each file, and counts the words that appear + in the samples. The samples are semi-evenly distributed across the file. + + Args: + files: List of filepaths + file_byte_limit: Max number of bytes that will be read from each file. + correct_strip: Whether to convert text to unicode before strip. This affects + vocabulary generation for PY2. Sets correct_strip to False in PY2 to + reproduce previous common public result. Sets correct_strip to True will + let PY2 and PY3 get a consistent vocabulary. + + Returns: + Dictionary mapping tokens to the number of times they appear in the sampled + lines from the files. + """ + token_counts = collections.defaultdict(int) + + for filepath in files: + with tf.io.gfile.GFile(filepath, mode="r") as reader: + file_byte_budget = file_byte_limit + counter = 0 + lines_to_skip = int(reader.size() / (file_byte_budget * 2)) + for line in reader: + if counter < lines_to_skip: + counter += 1 + else: + if file_byte_budget < 0: + break + if correct_strip: + line = native_to_unicode(line) + line = line.strip() + file_byte_budget -= len(line) + counter = 0 + + # Add words to token counts + for token in _split_string_to_tokens(native_to_unicode(line)): + token_counts[token] += 1 + return token_counts + + +def _list_to_index_dict(lst): + """Create dictionary mapping list items to their indices in the list.""" + return {item: n for n, item in enumerate(lst)} + + +def _split_token_to_subtokens(token, subtoken_dict, max_subtoken_length): + """Splits a token into subtokens defined in the subtoken dict.""" + ret = [] + start = 0 + token_len = len(token) + while start < token_len: + # Find the longest subtoken, so iterate backwards. + for end in xrange(min(token_len, start + max_subtoken_length), start, -1): + subtoken = token[start:end] + if subtoken in subtoken_dict: + ret.append(subtoken) + start = end + break + else: # Did not break + # If there is no possible encoding of the escaped token then one of the + # characters in the token is not in the alphabet. This should be + # impossible and would be indicative of a bug. + raise ValueError("Was unable to split token \"%s\" into subtokens." % + token) + return ret + + +def _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_size, threshold, min_count=None, + reserved_tokens=None): + """Generate subtoken vocabulary close to the target size.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if min_count is not None: + tf.compat.v1.logging.info( + "Using min_count=%d to generate vocab with target size %d" % + (min_count, target_size)) + return _generate_subtokens( + token_counts, alphabet, min_count, reserved_tokens=reserved_tokens) + + def bisect(min_val, max_val): + """Recursive function to binary search for subtoken vocabulary.""" + cur_count = (min_val + max_val) // 2 + tf.compat.v1.logging.info("Binary search: trying min_count=%d (%d %d)" % + (cur_count, min_val, max_val)) + subtoken_list = _generate_subtokens( + token_counts, alphabet, cur_count, reserved_tokens=reserved_tokens) + + val = len(subtoken_list) + tf.compat.v1.logging.info( + "Binary search: min_count=%d resulted in %d tokens" % (cur_count, val)) + + within_threshold = abs(val - target_size) < threshold + if within_threshold or min_val >= max_val or cur_count < 2: + return subtoken_list + if val > target_size: + other_subtoken_list = bisect(cur_count + 1, max_val) + else: + other_subtoken_list = bisect(min_val, cur_count - 1) + + # Return vocabulary dictionary with the closest number of tokens. + other_val = len(other_subtoken_list) + if abs(other_val - target_size) < abs(val - target_size): + return other_subtoken_list + return subtoken_list + + tf.compat.v1.logging.info("Finding best min_count to get target size of %d" % + target_size) + return bisect(_MIN_MIN_COUNT, _MAX_MIN_COUNT) + + +def _generate_alphabet_dict(iterable, reserved_tokens=None): + """Create set of characters that appear in any element in the iterable.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + alphabet = {c for token in iterable for c in token} + alphabet |= {c for token in reserved_tokens for c in token} + alphabet |= _ESCAPE_CHARS # Add escape characters to alphabet set. + return alphabet + + +def _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length): + """Count number of times subtokens appear, and generate new subtokens. + + Args: + token_counts: dict mapping tokens to the number of times they appear in the + original files. + alphabet: list of allowed characters. Used to escape the tokens, which + guarantees that all tokens can be split into subtokens. + subtoken_dict: dict mapping subtokens to ids. + max_subtoken_length: maximum length of subtoken in subtoken_dict. + + Returns: + A defaultdict mapping subtokens to the number of times they appear in the + tokens. The dict may contain new subtokens. + """ + subtoken_counts = collections.defaultdict(int) + for token, count in six.iteritems(token_counts): + token = _escape_token(token, alphabet) + subtokens = _split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + + # Generate new subtokens by taking substrings from token. + start = 0 + for subtoken in subtokens: + for end in xrange(start + 1, len(token) + 1): + new_subtoken = token[start:end] + subtoken_counts[new_subtoken] += count + start += len(subtoken) + + return subtoken_counts + + +def _filter_and_bucket_subtokens(subtoken_counts, min_count): + """Return a bucketed list of subtokens that are filtered by count. + + Args: + subtoken_counts: defaultdict mapping subtokens to their counts + min_count: int count used to filter subtokens + + Returns: + List of subtoken sets, where subtokens in set i have the same length=i. + """ + # Create list of buckets, where subtokens in bucket i have length i. + subtoken_buckets = [] + for subtoken, count in six.iteritems(subtoken_counts): + if count < min_count: # Filter out subtokens that don't appear enough + continue + while len(subtoken_buckets) <= len(subtoken): + subtoken_buckets.append(set()) + subtoken_buckets[len(subtoken)].add(subtoken) + return subtoken_buckets + + +def _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens=None): + """Generate candidate subtokens ordered by count, and new max subtoken length. + + Add subtokens to the candiate list in order of length (longest subtokens + first). When a subtoken is added, the counts of each of its prefixes are + decreased. Prefixes that don't appear much outside the subtoken are not added + to the candidate list. + + For example: + subtoken being added to candidate list: 'translate' + subtoken_counts: {'translate':10, 't':40, 'tr':16, 'tra':12, ...} + min_count: 5 + + When 'translate' is added, subtoken_counts is updated to: + {'translate':0, 't':30, 'tr':6, 'tra': 2, ...} + + The subtoken 'tra' will not be added to the candidate list, because it appears + twice (less than min_count) outside of 'translate'. + + Args: + subtoken_counts: defaultdict mapping str subtokens to int counts + min_count: int minumum count requirement for subtokens + alphabet: set of characters. Each character is added to the subtoken list to + guarantee that all tokens can be encoded. + reserved_tokens: list of tokens that will be added to the beginning of the + returned subtoken list. + + Returns: + List of candidate subtokens in decreasing count order, and maximum subtoken + length + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Create a list of (count, subtoken) for each candidate subtoken. + subtoken_candidates = [] + + # Use bucketted list to iterate through subtokens in order of length. + # subtoken_buckets[i] = set(subtokens), where each subtoken has length i. + subtoken_buckets = _filter_and_bucket_subtokens(subtoken_counts, min_count) + max_subtoken_length = len(subtoken_buckets) - 1 + + # Go through the list in reverse order to consider longer subtokens first. + for subtoken_len in xrange(max_subtoken_length, 0, -1): + for subtoken in subtoken_buckets[subtoken_len]: + count = subtoken_counts[subtoken] + + # Possible if this subtoken is a prefix of another token. + if count < min_count: + continue + + # Ignore alphabet/reserved tokens, which will be added manually later. + if subtoken not in alphabet and subtoken not in reserved_tokens: + subtoken_candidates.append((count, subtoken)) + + # Decrement count of the subtoken's prefixes (if a longer subtoken is + # added, its prefixes lose priority to be added). + for end in xrange(1, subtoken_len): + subtoken_counts[subtoken[:end]] -= count + + # Add alphabet subtokens (guarantees that all strings are encodable). + subtoken_candidates.extend((subtoken_counts.get(a, 0), a) for a in alphabet) + + # Order subtoken candidates by decreasing count. + subtoken_list = [t for _, t in sorted(subtoken_candidates, reverse=True)] + + # Add reserved tokens to beginning of the list. + subtoken_list = reserved_tokens + subtoken_list + return subtoken_list, max_subtoken_length + + +def _generate_subtokens( + token_counts, alphabet, min_count, num_iterations=4, + reserved_tokens=None): + """Create a list of subtokens in decreasing order of frequency. + + Args: + token_counts: dict mapping str tokens -> int count + alphabet: set of characters + min_count: int minimum number of times a subtoken must appear before it is + added to the vocabulary. + num_iterations: int number of iterations to generate new tokens. + reserved_tokens: list of tokens that will be added to the beginning to the + returned subtoken list. + + Returns: + Sorted list of subtokens (most frequent first) + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Use alphabet set to create initial list of subtokens + subtoken_list = reserved_tokens + list(alphabet) + max_subtoken_length = 1 + + # On each iteration, segment all words using the subtokens defined in + # subtoken_dict, count how often the resulting subtokens appear, and update + # the dictionary with subtokens w/ high enough counts. + for i in xrange(num_iterations): + tf.compat.v1.logging.info("\tGenerating subtokens: iteration %d" % i) + # Generate new subtoken->id dictionary using the new subtoken list. + subtoken_dict = _list_to_index_dict(subtoken_list) + + # Create dict mapping subtoken->count, with additional subtokens created + # from substrings taken from the tokens. + subtoken_counts = _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + # Generate new list of subtokens sorted by subtoken count. + subtoken_list, max_subtoken_length = _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + tf.compat.v1.logging.info("\tVocab size: %d" % len(subtoken_list)) + return subtoken_list diff --git a/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer_test.py b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer_test.py new file mode 100644 index 000000000..f757389f3 --- /dev/null +++ b/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/utils/tokenizer_test.py @@ -0,0 +1,182 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test Subtokenizer and string helper methods.""" + +import collections +import tempfile + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.transformer.utils import tokenizer + + +class SubtokenizerTest(tf.test.TestCase): + + def _init_subtokenizer(self, vocab_list): + temp_file = tempfile.NamedTemporaryFile(delete=False) + with tf.io.gfile.GFile(temp_file.name, "w") as w: + for subtoken in vocab_list: + w.write("'%s'" % subtoken) + w.write("\n") + return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[]) + + def test_encode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + s = "testing 123" + encoded_list = subtokenizer.encode(s) + self.assertEqual([1, 2, 0], encoded_list) + + def test_decode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + decoded_str = subtokenizer.decode(encoded_list) + self.assertEqual("testing 123", decoded_str) + + def test_subtoken_ids_to_tokens(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + token_list = subtokenizer._subtoken_ids_to_tokens(encoded_list) + self.assertEqual([u"testing", u"123"], token_list) + + +class StringHelperTest(tf.test.TestCase): + + def test_split_string_to_tokens(self): + text = "test? testing 123." + + tokens = tokenizer._split_string_to_tokens(text) + self.assertEqual(["test", "? ", "testing", "123", "."], tokens) + + def test_join_tokens_to_string(self): + tokens = ["test", "? ", "testing", "123", "."] + + s = tokenizer._join_tokens_to_string(tokens) + self.assertEqual("test? testing 123.", s) + + def test_escape_token(self): + token = u"abc_\\4" + alphabet = set("abc_\\u;") + + escaped_token = tokenizer._escape_token(token, alphabet) + self.assertEqual("abc\\u\\\\\\52;_", escaped_token) + + def test_unescape_token(self): + escaped_token = u"Underline: \\u, Backslash: \\\\, Unicode: \\52;" + + unescaped_token = tokenizer._unescape_token(escaped_token) + self.assertEqual( + "Underline: _, Backslash: \\, Unicode: 4", unescaped_token) + + def test_list_to_index_dict(self): + lst = ["test", "strings"] + + d = tokenizer._list_to_index_dict(lst) + self.assertDictEqual({"test": 0, "strings": 1}, d) + + def test_split_token_to_subtokens(self): + token = "abc" + subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3} + max_subtoken_length = 2 + + subtokens = tokenizer._split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + self.assertEqual(["ab", "c"], subtokens) + + def test_generate_alphabet_dict(self): + s = ["testing", "123"] + reserved_tokens = ["???"] + + alphabet = tokenizer._generate_alphabet_dict(s, reserved_tokens) + self.assertIn("?", alphabet) + self.assertIn("t", alphabet) + self.assertIn("e", alphabet) + self.assertIn("s", alphabet) + self.assertIn("i", alphabet) + self.assertIn("n", alphabet) + self.assertIn("g", alphabet) + self.assertIn("1", alphabet) + self.assertIn("2", alphabet) + self.assertIn("3", alphabet) + + def test_count_and_gen_subtokens(self): + token_counts = {"abc": 5} + alphabet = set("abc_") + subtoken_dict = {"a": 0, "b": 1, "c": 2, "_": 3} + max_subtoken_length = 2 + + subtoken_counts = tokenizer._count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + self.assertIsInstance(subtoken_counts, collections.defaultdict) + self.assertDictEqual( + {"a": 5, "b": 5, "c": 5, "_": 5, "ab": 5, "bc": 5, "c_": 5, + "abc": 5, "bc_": 5, "abc_": 5}, subtoken_counts) + + def test_filter_and_bucket_subtokens(self): + subtoken_counts = collections.defaultdict( + int, {"a": 2, "b": 4, "c": 1, "ab": 6, "ac": 3, "abbc": 5}) + min_count = 3 + + subtoken_buckets = tokenizer._filter_and_bucket_subtokens( + subtoken_counts, min_count) + + self.assertEqual(len(subtoken_buckets[0]), 0) + self.assertEqual(set("b"), subtoken_buckets[1]) + self.assertEqual(set(["ab", "ac"]), subtoken_buckets[2]) + self.assertEqual(len(subtoken_buckets[3]), 0) + self.assertEqual(set(["abbc"]), subtoken_buckets[4]) + + def test_gen_new_subtoken_list(self): + subtoken_counts = collections.defaultdict( + int, {"translate": 10, "t": 40, "tr": 16, "tra": 12}) + min_count = 5 + alphabet = set("translate") + reserved_tokens = ["reserved", "tokens"] + + subtoken_list, max_token_length = tokenizer._gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + # Check that "tra" isn"t in the list (its count should be decremented to 2, + # so it should not be added to the canddiate list). + self.assertNotIn("tra", subtoken_list) + + self.assertIn("tr", subtoken_list) + self.assertIn("t", subtoken_list) + + self.assertEqual(len("translate"), max_token_length) + + def test_generate_subtokens(self): + token_counts = {"ab": 1, "bc": 3, "abc": 5} + alphabet = set("abc_") + min_count = 100 + num_iterations = 1 + reserved_tokens = ["reserved", "tokens"] + + vocab_list = tokenizer._generate_subtokens( + token_counts, alphabet, min_count, num_iterations, reserved_tokens) + + # Check that reserved tokens are at the front of the list + self.assertEqual(vocab_list[:2], reserved_tokens) + + # Check that each character in alphabet is in the vocab list + for c in alphabet: + self.assertIn(c, vocab_list) + + +if __name__ == "__main__": + tf.test.main() diff --git a/models/object_detection/__init__.py b/models/object_detection/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/object_detection/__init__.py +++ b/models/object_detection/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/__init__.py b/models/object_detection/tensorflow/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/object_detection/tensorflow/__init__.py +++ b/models/object_detection/tensorflow/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/faster_rcnn/__init__.py b/models/object_detection/tensorflow/faster_rcnn/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/__init__.py b/models/object_detection/tensorflow/faster_rcnn/inference/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/coco_accuracy.sh b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/coco_accuracy.sh deleted file mode 100644 index 442201506..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/coco_accuracy.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -########## Variables to be defined - run it in research folder - -SPLIT=faster-rcnn #change to your favorite name - -FROZEN_GRAPH=$1 -TF_RECORD_FILES=$2 -TF_MODELS_ROOT=$3 - -if [[ -z ${TF_MODELS_ROOT} ]] || [[ ! -d ${TF_MODELS_ROOT} ]]; then - echo "You must specify the root of the tensorflow/models source tree in the TF_MODELS_ROOT environment variable." - exit 1 -fi - -export PYTHONPATH=$PYTHONPATH:${TF_MODELS_ROOT}/research:${TF_MODELS_ROOT}/research/slim:${TF_MODELS_ROOT}/research/object_detection - -python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=${TF_RECORD_FILES} \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=${FROZEN_GRAPH} \ - --discard_image_pixels=True - - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '${TF_MODELS_ROOT}/research/object_detection/data/mscoco_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'coco_detection_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt - - -python -m object_detection.metrics.offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/dataset_util.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/dataset_util.py deleted file mode 100644 index d21ff8bc7..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/dataset_util.py +++ /dev/null @@ -1,160 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for creating TFRecord data sets.""" - -import tensorflow as tf - - -def int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def int64_list_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def bytes_list_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - - -def float_list_feature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -def read_examples_list(path): - """Read list of training or validation examples. - - The file is assumed to contain a single example per line where the first - token in the line is an identifier that allows us to find the image and - annotation xml for that example. - - For example, the line: - xyz 3 - would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). - - Args: - path: absolute path to examples list file. - - Returns: - list of example identifiers (strings). - """ - with tf.gfile.GFile(path) as fid: - lines = fid.readlines() - return [line.strip().split(' ')[0] for line in lines] - - -def recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - """ - if not xml: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} - - -def make_initializable_iterator(dataset): - """Creates an iterator, and initializes tables. - - This is useful in cases where make_one_shot_iterator wouldn't work because - the graph contains a hash table that needs to be initialized. - - Args: - dataset: A `tf.data.Dataset` object. - - Returns: - A `tf.data.Iterator`. - """ - iterator = dataset.make_initializable_iterator() - tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) - return iterator - - -def read_dataset(file_read_func, decode_func, input_files, config): - """Reads a dataset, and handles repetition and shuffling. - - Args: - file_read_func: Function to use in tf.data.Dataset.interleave, to read - every individual file into a tf.data.Dataset. - decode_func: Function to apply to all records. - input_files: A list of file paths to read. - config: A input_reader_builder.InputReader object. - - Returns: - A tf.data.Dataset based on config. - """ - # Shard, shuffle, and read files. - filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], - 0) - filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) - if config.shuffle: - filename_dataset = filename_dataset.shuffle( - config.filenames_shuffle_buffer_size) - elif config.num_readers > 1: - tf.logging.warning('`shuffle` is false, but the input data stream is ' - 'still slightly shuffled since `num_readers` > 1.') - - filename_dataset = filename_dataset.repeat(config.num_epochs or None) - - records_dataset = filename_dataset.apply( - tf.contrib.data.parallel_interleave( - file_read_func, cycle_length=config.num_readers, - block_length=config.read_block_length, sloppy=True)) - if config.shuffle: - records_dataset.shuffle(config.shuffle_buffer_size) - tensor_dataset = records_dataset.map( - decode_func, num_parallel_calls=config.num_parallel_map_calls) - return tensor_dataset.prefetch(config.prefetch_size) diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval.py deleted file mode 100644 index d1885a8c6..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval.py +++ /dev/null @@ -1,165 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Evaluation executable for detection models. - -This executable is used to evaluate DetectionModels. There are two ways of -configuring the eval job. - -1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead. -In this mode, the --eval_training_data flag may be given to force the pipeline -to evaluate on training data instead. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --pipeline_config_path=pipeline_config.pbtxt - -2) Three configuration files may be provided: a model_pb2.DetectionModel -configuration file to define what type of DetectionModel is being evaluated, an -input_reader_pb2.InputReader file to specify what data the model is evaluating -and an eval_pb2.EvalConfig file to configure evaluation parameters. - -Example usage: - ./eval \ - --logtostderr \ - --checkpoint_dir=path/to/checkpoint_dir \ - --eval_dir=path/to/eval_dir \ - --eval_config_path=eval_config.pbtxt \ - --model_config_path=model_config.pbtxt \ - --input_config_path=eval_input_config.pbtxt -""" -import functools -import os -import tensorflow as tf - -import evaluator -import dataset_util -from object_detection.builders import dataset_builder -from object_detection.builders import model_builder -from object_detection.utils import config_util -from object_detection.utils import label_map_util - - -tf.logging.set_verbosity(tf.logging.INFO) - -flags = tf.app.flags -flags.DEFINE_boolean('eval_training_data', False, - 'If training data should be evaluated for this job.') -flags.DEFINE_string('checkpoint_dir', '', - 'Directory containing checkpoints to evaluate, typically ' - 'set to `train_dir` used in the training job.') -flags.DEFINE_string('eval_dir', '', - 'Directory to write eval summaries to.') -flags.DEFINE_string('pipeline_config_path', '', - 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' - 'file. If provided, other configs are ignored') -flags.DEFINE_string('eval_config_path', '', - 'Path to an eval_pb2.EvalConfig config file.') -flags.DEFINE_string('input_config_path', '', - 'Path to an input_reader_pb2.InputReader config file.') -flags.DEFINE_string('model_config_path', '', - 'Path to a model_pb2.DetectionModel config file.') -flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' - 'evaluation. Overrides the `max_evals` parameter in the ' - 'provided config.') - -# add flags for mkl -flags.DEFINE_integer('num_intra_threads', 1, - 'Number of threads to use for intra-op parallelism. If ' - 'set to 0, the system will pick an appropriate number.') -flags.DEFINE_integer('num_inter_threads', 0, - 'Number of threads to use for inter-op parallelism. If ' - 'set to 0, the system will pick an appropriate number.') - -FLAGS = flags.FLAGS - - -def main(unused_argv): - assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' - assert FLAGS.eval_dir, '`eval_dir` is missing.' - tf.gfile.MakeDirs(FLAGS.eval_dir) - if FLAGS.pipeline_config_path: - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.eval_dir, 'pipeline.config'), - overwrite=True) - else: - configs = config_util.get_configs_from_multiple_files( - model_config_path=FLAGS.model_config_path, - eval_config_path=FLAGS.eval_config_path, - eval_input_config_path=FLAGS.input_config_path) - for name, config in [('model.config', FLAGS.model_config_path), - ('eval.config', FLAGS.eval_config_path), - ('input.config', FLAGS.input_config_path)]: - tf.gfile.Copy(config, - os.path.join(FLAGS.eval_dir, name), - overwrite=True) - - model_config = configs['model'] - eval_config = configs['eval_config'] - input_config = configs['eval_input_config'] - if FLAGS.eval_training_data: - input_config = configs['train_input_config'] - - model_fn = functools.partial( - model_builder.build, - model_config=model_config, - is_training=False) - - def get_next(config): - return dataset_util.make_initializable_iterator( - dataset_builder.build(config)).get_next() - - create_input_dict_fn = functools.partial(get_next, input_config) - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - if FLAGS.run_once: - eval_config.max_evals = 1 - - evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, - FLAGS.checkpoint_dir, FLAGS.eval_dir, - inter_op=FLAGS.num_inter_threads, intra_op=FLAGS.num_intra_threads) - - -if __name__ == '__main__': - tf.app.run() diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval_util.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval_util.py deleted file mode 100644 index dffdc601f..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/eval_util.py +++ /dev/null @@ -1,669 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Common utility functions for evaluation.""" -import collections -import logging -import os -import time - -import numpy as np -import tensorflow as tf - -from object_detection.core import box_list -from object_detection.core import box_list_ops -from object_detection.core import keypoint_ops -from object_detection.core import standard_fields as fields -from object_detection.metrics import coco_evaluation -from object_detection.utils import label_map_util -from object_detection.utils import ops -from object_detection.utils import visualization_utils as vis_utils - -slim = tf.contrib.slim - - -def write_metrics(metrics, global_step, summary_dir): - """Write metrics to a summary directory. - - Args: - metrics: A dictionary containing metric names and values. - global_step: Global step at which the metrics are computed. - summary_dir: Directory to write tensorflow summaries to. - """ - logging.info('Writing metrics to tf summary.') - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - for key in sorted(metrics): - summary = tf.Summary(value=[ - tf.Summary.Value(tag=key, simple_value=metrics[key]), - ]) - summary_writer.add_summary(summary, global_step) - logging.info('%s: %f', key, metrics[key]) - logging.info('Metrics written to tf summary.') - - -# TODO(rathodv): Add tests. -def visualize_detection_results(result_dict, - tag, - global_step, - categories, - summary_dir='', - export_dir='', - agnostic_mode=False, - show_groundtruth=False, - groundtruth_box_visualization_color='black', - min_score_thresh=.5, - max_num_predictions=20, - skip_scores=False, - skip_labels=False, - keep_image_id_for_visualization_export=False): - """Visualizes detection results and writes visualizations to image summaries. - - This function visualizes an image with its detected bounding boxes and writes - to image summaries which can be viewed on tensorboard. It optionally also - writes images to a directory. In the case of missing entry in the label map, - unknown class name in the visualization is shown as "N/A". - - Args: - result_dict: a dictionary holding groundtruth and detection - data corresponding to each image being evaluated. The following keys - are required: - 'original_image': a numpy array representing the image with shape - [1, height, width, 3] or [1, height, width, 1] - 'detection_boxes': a numpy array of shape [N, 4] - 'detection_scores': a numpy array of shape [N] - 'detection_classes': a numpy array of shape [N] - The following keys are optional: - 'groundtruth_boxes': a numpy array of shape [N, 4] - 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] - Detections are assumed to be provided in decreasing order of score and for - display, and we assume that scores are probabilities between 0 and 1. - tag: tensorboard tag (string) to associate with image. - global_step: global step at which the visualization are generated. - categories: a list of dictionaries representing all possible categories. - Each dict in this list has the following keys: - 'id': (required) an integer id uniquely identifying this category - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza' - 'supercategory': (optional) string representing the supercategory - e.g., 'animal', 'vehicle', 'food', etc - summary_dir: the output directory to which the image summaries are written. - export_dir: the output directory to which images are written. If this is - empty (default), then images are not exported. - agnostic_mode: boolean (default: False) controlling whether to evaluate in - class-agnostic mode or not. - show_groundtruth: boolean (default: False) controlling whether to show - groundtruth boxes in addition to detected boxes - groundtruth_box_visualization_color: box color for visualizing groundtruth - boxes - min_score_thresh: minimum score threshold for a box to be visualized - max_num_predictions: maximum number of detections to visualize - skip_scores: whether to skip score when drawing a single detection - skip_labels: whether to skip label when drawing a single detection - keep_image_id_for_visualization_export: whether to keep image identifier in - filename when exported to export_dir - Raises: - ValueError: if result_dict does not contain the expected keys (i.e., - 'original_image', 'detection_boxes', 'detection_scores', - 'detection_classes') - """ - detection_fields = fields.DetectionResultFields - input_fields = fields.InputDataFields - if not set([ - input_fields.original_image, - detection_fields.detection_boxes, - detection_fields.detection_scores, - detection_fields.detection_classes, - ]).issubset(set(result_dict.keys())): - raise ValueError('result_dict does not contain all expected keys.') - if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: - raise ValueError('If show_groundtruth is enabled, result_dict must contain ' - 'groundtruth_boxes.') - logging.info('Creating detection visualizations.') - category_index = label_map_util.create_category_index(categories) - - image = np.squeeze(result_dict[input_fields.original_image], axis=0) - if image.shape[2] == 1: # If one channel image, repeat in RGB. - image = np.tile(image, [1, 1, 3]) - detection_boxes = result_dict[detection_fields.detection_boxes] - detection_scores = result_dict[detection_fields.detection_scores] - detection_classes = np.int32((result_dict[ - detection_fields.detection_classes])) - detection_keypoints = result_dict.get(detection_fields.detection_keypoints) - detection_masks = result_dict.get(detection_fields.detection_masks) - detection_boundaries = result_dict.get(detection_fields.detection_boundaries) - - # Plot groundtruth underneath detections - if show_groundtruth: - groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] - groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) - vis_utils.visualize_boxes_and_labels_on_image_array( - image=image, - boxes=groundtruth_boxes, - classes=None, - scores=None, - category_index=category_index, - keypoints=groundtruth_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=None, - groundtruth_box_visualization_color=groundtruth_box_visualization_color) - vis_utils.visualize_boxes_and_labels_on_image_array( - image, - detection_boxes, - detection_classes, - detection_scores, - category_index, - instance_masks=detection_masks, - instance_boundaries=detection_boundaries, - keypoints=detection_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=max_num_predictions, - min_score_thresh=min_score_thresh, - agnostic_mode=agnostic_mode, - skip_scores=skip_scores, - skip_labels=skip_labels) - - if export_dir: - if keep_image_id_for_visualization_export and result_dict[fields. - InputDataFields() - .key]: - export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( - tag, result_dict[fields.InputDataFields().key])) - else: - export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) - vis_utils.save_image_array_as_png(image, export_path) - - summary = tf.Summary(value=[ - tf.Summary.Value( - tag=tag, - image=tf.Summary.Image( - encoded_image_string=vis_utils.encode_image_array_as_png_str( - image))) - ]) - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - summary_writer.add_summary(summary, global_step) - - logging.info('Detection visualizations written to summary with tag %s.', tag) - - -def _run_checkpoint_once(tensor_dict, - evaluators=None, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - master='', - save_graph=False, - save_graph_dir='', - losses_dict=None, inter_op=0, intra_op=1): - """Evaluates metrics defined in evaluators and returns summaries. - - This function loads the latest checkpoint in checkpoint_dirs and evaluates - all metrics defined in evaluators. The metrics are processed in batch by the - batch_processor. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking four arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - To skip an image, it suffices to return an empty dictionary in place of - result_dict. - checkpoint_dirs: list of directories to load into an EnsembleModel. If it - has only one directory, EnsembleModel will not be used -- - a DetectionModel - will be instantiated directly. Not used if restore_fn is set. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: None, or a function that takes a tf.Session object and correctly - restores all necessary variables from the correct checkpoint file. If - None, attempts to restore from the first directory in checkpoint_dirs. - num_batches: the number of batches to use for evaluation. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. - save_graph_dir: where to store the Tensorflow graph on disk. If save_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - global_step: the count of global steps. - all_evaluator_metrics: A dictionary containing metric names and values. - - Raises: - ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least - one element. - ValueError: if save_graph is True and save_graph_dir is not defined. - """ - if save_graph and not save_graph_dir: - raise ValueError('`save_graph_dir` must be defined.') - config = tf.ConfigProto(inter_op_parallelism_threads=inter_op, - intra_op_parallelism_threads=intra_op) - sess = tf.Session(master, graph=tf.get_default_graph(), config=config) - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - sess.run(tf.tables_initializer()) - if restore_fn: - restore_fn(sess) - else: - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) - saver = tf.train.Saver(variables_to_restore) - saver.restore(sess, checkpoint_file) - - if save_graph: - tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') - - counters = {'skipped': 0, 'success': 0} - aggregate_result_losses_dict = collections.defaultdict(list) - with tf.contrib.slim.queues.QueueRunners(sess): - try: - loop_start_time = time.time() - for batch in range(int(num_batches)): - if (batch + 1) % 100 == 0: - logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) - if not batch_processor: - try: - if not losses_dict: - losses_dict = {} - result_dict, result_losses_dict = sess.run([tensor_dict, - losses_dict]) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - result_dict = {} - else: - result_dict, result_losses_dict = batch_processor( - tensor_dict, sess, batch, counters, losses_dict=losses_dict) - if not result_dict: - continue - for key, value in iter(result_losses_dict.items()): - aggregate_result_losses_dict[key].append(value) - for evaluator in evaluators: - # TODO(b/65130867): Use image_id tensor once we fix the input data - # decoders to return correct image_id. - # TODO(akuznetsa): result_dict contains batches of images, while - # add_single_ground_truth_image_info expects a single image. Fix - evaluator.add_single_ground_truth_image_info( - image_id=batch, groundtruth_dict=result_dict) - evaluator.add_single_detected_image_info( - image_id=batch, detections_dict=result_dict) - loop_end_time = time.time() - elapsed_time = loop_end_time - loop_start_time - print('Time spent : %.3f seconds.' % elapsed_time) - print('Time spent per BATCH: %.3f seconds.' % (elapsed_time / num_batches)) - logging.info('Running eval batches done.') - except tf.errors.OutOfRangeError: - logging.info('Done evaluating -- epoch limit reached') - finally: - # When done, ask the threads to stop. - logging.info('# success: %d', counters['success']) - logging.info('# skipped: %d', counters['skipped']) - all_evaluator_metrics = {} - for evaluator in evaluators: - metrics = evaluator.evaluate() - evaluator.clear() - if any(key in all_evaluator_metrics for key in metrics): - raise ValueError('Metric names between evaluators must not collide.') - all_evaluator_metrics.update(metrics) - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - - for key, value in iter(aggregate_result_losses_dict.items()): - all_evaluator_metrics['Losses/' + key] = np.mean(value) - sess.close() - return (global_step, all_evaluator_metrics) - - -# TODO(rathodv): Add tests. -def repeated_checkpoint_run(tensor_dict, - summary_dir, - evaluators, - batch_processor=None, - checkpoint_dirs=None, - variables_to_restore=None, - restore_fn=None, - num_batches=1, - eval_interval_secs=120, - max_number_of_evaluations=None, - master='', - save_graph=False, - save_graph_dir='', - losses_dict=None, - inter_op=0, - intra_op=1): - """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. - - This function repeatedly loads a checkpoint and evaluates a desired - set of tensors (provided by tensor_dict) and hands the resulting numpy - arrays to a function result_processor which can be used to further - process/save/visualize the results. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - summary_dir: a directory to write metrics summaries. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking three arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - checkpoint_dirs: list of directories to load into a DetectionModel or an - EnsembleModel if restore_fn isn't set. Also used to determine when to run - next evaluation. Must have at least one element. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: a function that takes a tf.Session object and correctly restores - all necessary variables from the correct checkpoint file. - num_batches: the number of batches to use for evaluation. - eval_interval_secs: the number of seconds between each evaluation run. - max_number_of_evaluations: the max number of iterations of the evaluation. - If the value is left as None the evaluation continues indefinitely. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. - save_graph_dir: where to save on disk the Tensorflow graph. If store_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - metrics: A dictionary containing metric names and values in the latest - evaluation. - - Raises: - ValueError: if max_num_of_evaluations is not None or a positive number. - ValueError: if checkpoint_dirs doesn't have at least one element. - """ - if max_number_of_evaluations and max_number_of_evaluations <= 0: - raise ValueError( - '`number_of_steps` must be either None or a positive number.') - - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - - last_evaluated_model_path = None - number_of_evaluations = 0 - while True: - start = time.time() - logging.info('Starting evaluation at ' + time.strftime( - '%Y-%m-%d-%H:%M:%S', time.gmtime())) - model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) - if not model_path: - logging.info('No model found in %s. Will try again in %d seconds', - checkpoint_dirs[0], eval_interval_secs) - elif model_path == last_evaluated_model_path: - logging.info('Found already evaluated checkpoint. Will try again in %d ' - 'seconds', eval_interval_secs) - else: - last_evaluated_model_path = model_path - global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, - batch_processor, - checkpoint_dirs, - variables_to_restore, - restore_fn, num_batches, - master, save_graph, - save_graph_dir, - losses_dict=losses_dict, - inter_op=inter_op, - intra_op=intra_op) - write_metrics(metrics, global_step, summary_dir) - number_of_evaluations += 1 - - if (max_number_of_evaluations and - number_of_evaluations >= max_number_of_evaluations): - logging.info('Finished evaluation!') - break - time_to_next_eval = start + eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - return metrics - - -def result_dict_for_single_example(image, - key, - detections, - groundtruth=None, - class_agnostic=False, - scale_to_absolute=False): - """Merges all detection and groundtruth information for a single example. - - Note that evaluation tools require classes that are 1-indexed, and so this - function performs the offset. If `class_agnostic` is True, all output classes - have label 1. - - Args: - image: A single 4D uint8 image tensor of shape [1, H, W, C]. - key: A single string tensor identifying the image. - detections: A dictionary of detections, returned from - DetectionModel.postprocess(). - groundtruth: (Optional) Dictionary of groundtruth items, with fields: - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized coordinates. - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - class_agnostic: Boolean indicating whether the detections are class-agnostic - (i.e. binary). Default False. - scale_to_absolute: Boolean indicating whether boxes and keypoints should be - scaled to absolute coordinates. Note that for IoU based evaluations, it - does not matter whether boxes are expressed in absolute or relative - coordinates. Default False. - - Returns: - A dictionary with: - 'original_image': A [1, H, W, C] uint8 image tensor. - 'key': A string tensor with image identifier. - 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. - 'detection_scores': [max_detections] float32 tensor of scores. - 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. - 'detection_masks': [max_detections, H, W] float32 tensor of binarized - masks, reframed to full image masks. - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. (Optional) - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - (Optional) - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - - """ - label_id_offset = 1 # Applying label id offset (b/63711816) - - input_data_fields = fields.InputDataFields - output_dict = { - input_data_fields.original_image: image, - input_data_fields.key: key, - } - - detection_fields = fields.DetectionResultFields - detection_boxes = detections[detection_fields.detection_boxes][0] - image_shape = tf.shape(image) - detection_scores = detections[detection_fields.detection_scores][0] - - if class_agnostic: - detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) - else: - detection_classes = ( - tf.to_int64(detections[detection_fields.detection_classes][0]) + - label_id_offset) - - num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) - detection_boxes = tf.slice( - detection_boxes, begin=[0, 0], size=[num_detections, -1]) - detection_classes = tf.slice( - detection_classes, begin=[0], size=[num_detections]) - detection_scores = tf.slice( - detection_scores, begin=[0], size=[num_detections]) - - if scale_to_absolute: - absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_boxes] = ( - absolute_detection_boxlist.get()) - else: - output_dict[detection_fields.detection_boxes] = detection_boxes - output_dict[detection_fields.detection_classes] = detection_classes - output_dict[detection_fields.detection_scores] = detection_scores - - if detection_fields.detection_masks in detections: - detection_masks = detections[detection_fields.detection_masks][0] - # TODO(rathodv): This should be done in model's postprocess - # function ideally. - detection_masks = tf.slice( - detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) - detection_masks_reframed = ops.reframe_box_masks_to_image_masks( - detection_masks, detection_boxes, image_shape[1], image_shape[2]) - detection_masks_reframed = tf.cast( - tf.greater(detection_masks_reframed, 0.5), tf.uint8) - output_dict[detection_fields.detection_masks] = detection_masks_reframed - if detection_fields.detection_keypoints in detections: - detection_keypoints = detections[detection_fields.detection_keypoints][0] - output_dict[detection_fields.detection_keypoints] = detection_keypoints - if scale_to_absolute: - absolute_detection_keypoints = keypoint_ops.scale( - detection_keypoints, image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_keypoints] = ( - absolute_detection_keypoints) - - if groundtruth: - if input_data_fields.groundtruth_instance_masks in groundtruth: - groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( - groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) - output_dict.update(groundtruth) - if scale_to_absolute: - groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] - absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) - output_dict[input_data_fields.groundtruth_boxes] = ( - absolute_gt_boxlist.get()) - # For class-agnostic models, groundtruth classes all become 1. - if class_agnostic: - groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] - groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) - output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes - - return output_dict - - -def get_eval_metric_ops_for_evaluators(evaluation_metrics, - categories, - eval_dict, - include_metrics_per_category=False): - """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. - - Args: - evaluation_metrics: List of evaluation metric names. Current options are - 'coco_detection_metrics' and 'coco_mask_metrics'. - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - eval_dict: An evaluation dictionary, returned from - result_dict_for_single_example(). - include_metrics_per_category: If True, include metrics for each category. - - Returns: - A dictionary of metric names to tuple of value_op and update_op that can be - used as eval metric ops in tf.EstimatorSpec. - - Raises: - ValueError: If any of the metrics in `evaluation_metric` is not - 'coco_detection_metrics' or 'coco_mask_metrics'. - """ - evaluation_metrics = list(set(evaluation_metrics)) - - input_data_fields = fields.InputDataFields - detection_fields = fields.DetectionResultFields - eval_metric_ops = {} - for metric in evaluation_metrics: - if metric == 'coco_detection_metrics': - coco_evaluator = coco_evaluation.CocoDetectionEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - detection_boxes=eval_dict[detection_fields.detection_boxes], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes])) - elif metric == 'coco_mask_metrics': - coco_mask_evaluator = coco_evaluation.CocoMaskEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_mask_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - groundtruth_instance_masks=eval_dict[ - input_data_fields.groundtruth_instance_masks], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes], - detection_masks=eval_dict[detection_fields.detection_masks])) - else: - raise ValueError('The only evaluation metrics supported are ' - '"coco_detection_metrics" and "coco_mask_metrics". ' - 'Found {} in the evaluation metrics'.format(metric)) - - return eval_metric_ops diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/evaluator.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/evaluator.py deleted file mode 100644 index 8e9e0bb98..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/evaluator.py +++ /dev/null @@ -1,297 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Detection model evaluator. - -This file provides a generic evaluation method that can be used to evaluate a -DetectionModel. -""" - -import logging -import tensorflow as tf - -import eval_util -from object_detection.core import prefetcher -from object_detection.core import standard_fields as fields -from object_detection.metrics import coco_evaluation -from object_detection.utils import object_detection_evaluation - -# A dictionary of metric names to classes that implement the metric. The classes -# in the dictionary must implement -# utils.object_detection_evaluation.DetectionEvaluator interface. -EVAL_METRICS_CLASS_DICT = { - 'pascal_voc_detection_metrics': - object_detection_evaluation.PascalDetectionEvaluator, - 'weighted_pascal_voc_detection_metrics': - object_detection_evaluation.WeightedPascalDetectionEvaluator, - 'pascal_voc_instance_segmentation_metrics': - object_detection_evaluation.PascalInstanceSegmentationEvaluator, - 'weighted_pascal_voc_instance_segmentation_metrics': - object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator, - 'open_images_detection_metrics': - object_detection_evaluation.OpenImagesDetectionEvaluator, - 'coco_detection_metrics': - coco_evaluation.CocoDetectionEvaluator, - 'coco_mask_metrics': - coco_evaluation.CocoMaskEvaluator, -} - -EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics' - - -def _extract_predictions_and_losses(model, - create_input_dict_fn, - ignore_groundtruth=False): - """Constructs tensorflow detection graph and returns output tensors. - - Args: - model: model to perform predictions with. - create_input_dict_fn: function to create input tensor dictionaries. - ignore_groundtruth: whether groundtruth should be ignored. - - Returns: - prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed - by standard_fields.DetectionResultsFields) and optional groundtruth - tensors (keyed by standard_fields.InputDataFields). - losses_dict: A dictionary containing detection losses. This is empty when - ignore_groundtruth is true. - """ - input_dict = create_input_dict_fn() - prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) - input_dict = prefetch_queue.dequeue() - original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) - preprocessed_image, true_image_shapes = model.preprocess( - tf.to_float(original_image)) - prediction_dict = model.predict(preprocessed_image, true_image_shapes) - detections = model.postprocess(prediction_dict, true_image_shapes) - - groundtruth = None - losses_dict = {} - if not ignore_groundtruth: - groundtruth = { - fields.InputDataFields.groundtruth_boxes: - input_dict[fields.InputDataFields.groundtruth_boxes], - fields.InputDataFields.groundtruth_classes: - input_dict[fields.InputDataFields.groundtruth_classes], - fields.InputDataFields.groundtruth_area: - input_dict[fields.InputDataFields.groundtruth_area], - fields.InputDataFields.groundtruth_is_crowd: - input_dict[fields.InputDataFields.groundtruth_is_crowd], - fields.InputDataFields.groundtruth_difficult: - input_dict[fields.InputDataFields.groundtruth_difficult] - } - if fields.InputDataFields.groundtruth_group_of in input_dict: - groundtruth[fields.InputDataFields.groundtruth_group_of] = ( - input_dict[fields.InputDataFields.groundtruth_group_of]) - groundtruth_masks_list = None - if fields.DetectionResultFields.detection_masks in detections: - groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( - input_dict[fields.InputDataFields.groundtruth_instance_masks]) - groundtruth_masks_list = [ - input_dict[fields.InputDataFields.groundtruth_instance_masks]] - groundtruth_keypoints_list = None - if fields.DetectionResultFields.detection_keypoints in detections: - groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( - input_dict[fields.InputDataFields.groundtruth_keypoints]) - groundtruth_keypoints_list = [ - input_dict[fields.InputDataFields.groundtruth_keypoints]] - label_id_offset = 1 - model.provide_groundtruth( - [input_dict[fields.InputDataFields.groundtruth_boxes]], - [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes] - - label_id_offset, depth=model.num_classes)], - groundtruth_masks_list, groundtruth_keypoints_list) - losses_dict.update(model.loss(prediction_dict, true_image_shapes)) - - result_dict = eval_util.result_dict_for_single_example( - original_image, - input_dict[fields.InputDataFields.source_id], - detections, - groundtruth, - class_agnostic=( - fields.DetectionResultFields.detection_classes not in detections), - scale_to_absolute=True) - return result_dict, losses_dict - - -def get_evaluators(eval_config, categories): - """Returns the evaluator class according to eval_config, valid for categories. - - Args: - eval_config: evaluation configurations. - categories: a list of categories to evaluate. - Returns: - An list of instances of DetectionEvaluator. - - Raises: - ValueError: if metric is not in the metric class dictionary. - """ - eval_metric_fn_keys = eval_config.metrics_set - if not eval_metric_fn_keys: - eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] - evaluators_list = [] - for eval_metric_fn_key in eval_metric_fn_keys: - if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: - raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) - evaluators_list.append( - EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories)) - return evaluators_list - - -def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories, - checkpoint_dir, eval_dir, inter_op=0, intra_op=1, graph_hook_fn=None, evaluator_list=None): - """Evaluation function for detection models. - - Args: - create_input_dict_fn: a function to create a tensor input dictionary. - create_model_fn: a function that creates a DetectionModel. - eval_config: a eval_pb2.EvalConfig protobuf. - categories: a list of category dictionaries. Each dict in the list should - have an integer 'id' field and string 'name' field. - checkpoint_dir: directory to load the checkpoints to evaluate from. - eval_dir: directory to write evaluation metrics summary to. - graph_hook_fn: Optional function that is called after the training graph is - completely built. This is helpful to perform additional changes to the - training graph such as optimizing batchnorm. The function should modify - the default graph. - evaluator_list: Optional list of instances of DetectionEvaluator. If not - given, this list of metrics is created according to the eval_config. - - Returns: - metrics: A dictionary containing metric names and values from the latest - run. - """ - - model = create_model_fn() - - if eval_config.ignore_groundtruth and not eval_config.export_path: - logging.fatal('If ignore_groundtruth=True then an export_path is ' - 'required. Aborting!!!') - - tensor_dict, losses_dict = _extract_predictions_and_losses( - model=model, - create_input_dict_fn=create_input_dict_fn, - ignore_groundtruth=eval_config.ignore_groundtruth) - - def _process_batch(tensor_dict, sess, batch_index, counters, - losses_dict=None): - """Evaluates tensors in tensor_dict, losses_dict and visualizes examples. - - This function calls sess.run on tensor_dict, evaluating the original_image - tensor only on the first K examples and visualizing detections overlaid - on this original_image. - - Args: - tensor_dict: a dictionary of tensors - sess: tensorflow session - batch_index: the index of the batch amongst all batches in the run. - counters: a dictionary holding 'success' and 'skipped' fields which can - be updated to keep track of number of successful and failed runs, - respectively. If these fields are not updated, then the success/skipped - counter values shown at the end of evaluation will be incorrect. - losses_dict: Optional dictonary of scalar loss tensors. - - Returns: - result_dict: a dictionary of numpy arrays - result_losses_dict: a dictionary of scalar losses. This is empty if input - losses_dict is None. - """ - try: - if not losses_dict: - losses_dict = {} - result_dict, result_losses_dict = sess.run([tensor_dict, losses_dict]) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - return {}, {} - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - if batch_index < eval_config.num_visualizations: - tag = 'image-{}'.format(batch_index) - eval_util.visualize_detection_results( - result_dict, - tag, - global_step, - categories=categories, - summary_dir=eval_dir, - export_dir=eval_config.visualization_export_dir, - show_groundtruth=eval_config.visualize_groundtruth_boxes, - groundtruth_box_visualization_color=eval_config. - groundtruth_box_visualization_color, - min_score_thresh=eval_config.min_score_threshold, - max_num_predictions=eval_config.max_num_boxes_to_visualize, - skip_scores=eval_config.skip_scores, - skip_labels=eval_config.skip_labels, - keep_image_id_for_visualization_export=eval_config. - keep_image_id_for_visualization_export) - return result_dict, result_losses_dict - - variables_to_restore = tf.global_variables() - global_step = tf.train.get_or_create_global_step() - variables_to_restore.append(global_step) - - if graph_hook_fn: - graph_hook_fn() - - if eval_config.use_moving_averages: - variable_averages = tf.train.ExponentialMovingAverage(0.0) - variables_to_restore = variable_averages.variables_to_restore() - saver = tf.train.Saver(variables_to_restore) - - def _restore_latest_checkpoint(sess): - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - saver.restore(sess, latest_checkpoint) - - if not evaluator_list: - evaluator_list = get_evaluators(eval_config, categories) - - metrics = eval_util.repeated_checkpoint_run( - tensor_dict=tensor_dict, - summary_dir=eval_dir, - evaluators=evaluator_list, - batch_processor=_process_batch, - checkpoint_dirs=[checkpoint_dir], - variables_to_restore=None, - restore_fn=_restore_latest_checkpoint, - num_batches=eval_config.num_examples, - eval_interval_secs=eval_config.eval_interval_secs, - max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else - eval_config.max_evals - if eval_config.max_evals else None), - master=eval_config.eval_master, - save_graph=eval_config.save_graph, - save_graph_dir=(eval_dir if eval_config.save_graph else ''), - losses_dict=losses_dict, inter_op=inter_op, intra_op=intra_op) - - return metrics diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/run_frozen_graph_rcnn.py b/models/object_detection/tensorflow/faster_rcnn/inference/fp32/run_frozen_graph_rcnn.py deleted file mode 100644 index b4629770a..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/fp32/run_frozen_graph_rcnn.py +++ /dev/null @@ -1,222 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import os -import sys -import tensorflow as tf - -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt -from PIL import Image -import time -import argparse -from tensorflow.python.client import timeline - - -parser = argparse.ArgumentParser() -parser.add_argument('-g', '--graph', help='Path to input graph to run', type=str, required=True) -parser.add_argument('-d', '--dataset', help='Full Path to input dataset to run', type=str, required=True) -parser.add_argument('-s', '--single_image', help='Run for single image onle, if false, run for the whole dataset', action='store_true') -parser.add_argument('-x', '--single_socket', help='Run for single socket, if false, run both sockets', action='store_true') -parser.add_argument('-v', '--visualize', help='Whether to visulize the output image', action='store_true' ) -parser.add_argument('-t', '--timeline', help='Output file name for TF timeline', type=str, default=None) -parser.add_argument('-e', '--evaluate_tensor', help='Full tensor name to evaluate', type=str, default=None) -parser.add_argument('-p', '--print_accuracy', help='Print accuracy results', action='store_true') -parser.add_argument('-n', '--steps', help='Run for n number of steps', type=int, default=None) -parser.add_argument('--num-inter-threads', help='Num inter threads', type=int, default=None, dest="num_inter_threads") -parser.add_argument('--num-intra-threads', help='Num intra threads', type=int, default=None, dest="num_intra_threads") - -args = parser.parse_args() - - -# This is needed since the notebook is stored in the object_detection folder. -sys.path.append("..") - -# This is needed to display the images. -if args.visualize and args.single_image and args.evaluate_tensor is None: - from IPython import get_ipython - get_ipython().run_line_magic('matplotlib', 'tk') - -import importlib -label_map_util = importlib.import_module('..label_map_util', package='object_detection.utils.label_map_util') -vis_util = importlib.import_module('..visualization_utils', package='object_detection.utils.visualization_utils') - - -# List of the strings that is used to add correct label for each box. -PATH_TO_LABELS = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt') - -NUM_CLASSES = 90 - -detection_graph = tf.Graph() -with detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(args.graph, 'rb') as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - -label_map = label_map_util.load_labelmap(PATH_TO_LABELS) -categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes=NUM_CLASSES, use_display_name=True) -category_index = label_map_util.create_category_index(categories) - - -def load_image_into_numpy_array(image): - (im_width, im_height) = image.size - if image.mode == 'L': - np_image = np.array(image.getdata()).reshape( - (im_height, im_width)).astype(np.uint8) - return np.stack((np_image,)*3, -1) - else: - return np.array(image.getdata()).reshape( - (im_height, im_width, 3)).astype(np.uint8) - - -if args.single_image: - TEST_IMAGE_PATHS = [args.dataset + "/000000578871.jpg"] -else: - PATH_TO_TEST_IMAGES_DIR = args.dataset - print(PATH_TO_TEST_IMAGES_DIR) - TEST_IMAGE_PATHS = [] - for root, dirs, files in os.walk(PATH_TO_TEST_IMAGES_DIR): - for file in files: - TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, file)) - -# Size, in inches, of the output images. -IMAGE_SIZE = (12, 8) - - -def run_inference_for_single_image(graph): - sess_config = tf.ConfigProto() - sess_config.intra_op_parallelism_threads = args.num_intra_threads - sess_config.inter_op_parallelism_threads = args.num_inter_threads - if not os.environ.get("OMP_NUM_THREADS"): - os.environ["OMP_NUM_THREADS"] = args.num_intra_threads - - with graph.as_default(): - with tf.Session(config=sess_config) as sess: - # Get handles to input and output tensors - tensor_dict = {} - if args.evaluate_tensor is None: - ops = tf.get_default_graph().get_operations() - all_tensor_names = {output.name for op in ops for output in op.outputs} - for key in [ - 'num_detections', 'detection_boxes', 'detection_scores', - 'detection_classes' - ]: - tensor_name = key + ':0' - if tensor_name in all_tensor_names: - tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( - tensor_name) - else: - our_op = tf.get_default_graph().get_operation_by_name(args.evaluate_tensor) - tensor_names = our_op.outputs - list_ops = [] - for i, tensor in enumerate(tensor_names): - list_ops.append(tensor.name) - tensor_dict[args.evaluate_tensor] = list_ops - - if args.timeline is not None: - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() - total_duration = 0 - if args.steps is not None and args.steps < 20: - print("This model has set 20 warm-up steps, please enlarge your steps number") - for index, image_path in enumerate(TEST_IMAGE_PATHS): - image = Image.open(image_path) - # the array based representation of the image will be used later in order to prepare the - # result image with boxes and labels on it. - image_np = load_image_into_numpy_array(image) - image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') - - # Run inference - start_time = time.time() - if args.timeline is not None: - output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image_np, 0)}, - options=run_options, run_metadata=run_metadata) - else: - output_dict = sess.run(tensor_dict, - feed_dict={image_tensor: np.expand_dims(image_np, 0)}) - step_duration = time.time() - start_time - if index > 20: - total_duration = total_duration + step_duration - - if args.single_image: - if index == 0: - print('Avg. Duration per Step:' + str(total_duration / 1)) - else: - if index % 10 == 0: - print('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') - if index == 4999: - print('Avg. Duration per Step:' + str(total_duration / 5000)) - - if args.steps is not None: - if args.single_image: - sys.exit("single_iamge and steps cannot be both enabled!") - elif index == (args.steps - 1): - print('Avg. Duration per Step:' + - str(total_duration / (args.steps - 20))) - break - - if args.timeline is not None: - trace = timeline.Timeline(step_stats=run_metadata.step_stats) - with open('tl-' + time.strftime("%Y%m%d-%H%M%S") + '-' + args.timeline, 'w') as file: - file.write(trace.generate_chrome_trace_format(show_memory=False)) - - - if args.evaluate_tensor is not None: - for tensor in output_dict[args.evaluate_tensor]: - print(tensor.shape) - return None, None - - # all outputs are float32 numpy arrays, so convert types as appropriate - output_dict['num_detections'] = int(output_dict['num_detections'][0]) - output_dict['detection_classes'] = output_dict[ - 'detection_classes'][0].astype(np.uint8) - output_dict['detection_boxes'] = output_dict['detection_boxes'][0] - output_dict['detection_scores'] = output_dict['detection_scores'][0] - - if args.print_accuracy: - print('num_detections:\n' + str(output_dict['num_detections'])) - print('detection_classes:\n' + str(output_dict['detection_classes'])) - print('detection_boxes:\n' + str(output_dict['detection_boxes'])) - print('detection_scores:\n' + str(output_dict['detection_scores'])) - - if 'detection_masks' in output_dict: - output_dict['detection_masks'] = output_dict['detection_masks'][0] - return output_dict, image_np - - -# Actual detection. -output_dict, image_np = run_inference_for_single_image(detection_graph) - -# Visualization of the results of a detection. -if args.visualize and args.single_image and args.evaluate_tensor is None: - vis_util.visualize_boxes_and_labels_on_image_array( - image_np, - output_dict['detection_boxes'], - output_dict['detection_classes'], - output_dict['detection_scores'], - category_index, - instance_masks=output_dict.get('detection_masks'), - use_normalized_coordinates=True, - line_thickness=8) - plt.figure(figsize=IMAGE_SIZE) - plt.imshow(image_np) diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh b/models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh deleted file mode 100755 index c83de0b58..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh +++ /dev/null @@ -1,50 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -SPLIT=faster-rcnn #change to your favorite name - -FROZEN_GRAPH=$1 -TF_RECORD_FILES=$2 -TF_MODEL_ROOT=$3 - -export PYTHONPATH=${PYTHONPATH}:${TF_MODEL_ROOT}/research:${TF_MODEL_ROOT}/research/object_detection - -python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=${TF_RECORD_FILES} \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=${FROZEN_GRAPH} \ - --discard_image_pixels=True - - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '${TF_MODEL_ROOT}/research/object_detection/data/mscoco_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'coco_detection_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt - - -python -m object_detection.metrics.offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/int8/run_frozen_graph_rcnn.py b/models/object_detection/tensorflow/faster_rcnn/inference/int8/run_frozen_graph_rcnn.py deleted file mode 100644 index b4629770a..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/int8/run_frozen_graph_rcnn.py +++ /dev/null @@ -1,222 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import os -import sys -import tensorflow as tf - -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt -from PIL import Image -import time -import argparse -from tensorflow.python.client import timeline - - -parser = argparse.ArgumentParser() -parser.add_argument('-g', '--graph', help='Path to input graph to run', type=str, required=True) -parser.add_argument('-d', '--dataset', help='Full Path to input dataset to run', type=str, required=True) -parser.add_argument('-s', '--single_image', help='Run for single image onle, if false, run for the whole dataset', action='store_true') -parser.add_argument('-x', '--single_socket', help='Run for single socket, if false, run both sockets', action='store_true') -parser.add_argument('-v', '--visualize', help='Whether to visulize the output image', action='store_true' ) -parser.add_argument('-t', '--timeline', help='Output file name for TF timeline', type=str, default=None) -parser.add_argument('-e', '--evaluate_tensor', help='Full tensor name to evaluate', type=str, default=None) -parser.add_argument('-p', '--print_accuracy', help='Print accuracy results', action='store_true') -parser.add_argument('-n', '--steps', help='Run for n number of steps', type=int, default=None) -parser.add_argument('--num-inter-threads', help='Num inter threads', type=int, default=None, dest="num_inter_threads") -parser.add_argument('--num-intra-threads', help='Num intra threads', type=int, default=None, dest="num_intra_threads") - -args = parser.parse_args() - - -# This is needed since the notebook is stored in the object_detection folder. -sys.path.append("..") - -# This is needed to display the images. -if args.visualize and args.single_image and args.evaluate_tensor is None: - from IPython import get_ipython - get_ipython().run_line_magic('matplotlib', 'tk') - -import importlib -label_map_util = importlib.import_module('..label_map_util', package='object_detection.utils.label_map_util') -vis_util = importlib.import_module('..visualization_utils', package='object_detection.utils.visualization_utils') - - -# List of the strings that is used to add correct label for each box. -PATH_TO_LABELS = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt') - -NUM_CLASSES = 90 - -detection_graph = tf.Graph() -with detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(args.graph, 'rb') as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - -label_map = label_map_util.load_labelmap(PATH_TO_LABELS) -categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes=NUM_CLASSES, use_display_name=True) -category_index = label_map_util.create_category_index(categories) - - -def load_image_into_numpy_array(image): - (im_width, im_height) = image.size - if image.mode == 'L': - np_image = np.array(image.getdata()).reshape( - (im_height, im_width)).astype(np.uint8) - return np.stack((np_image,)*3, -1) - else: - return np.array(image.getdata()).reshape( - (im_height, im_width, 3)).astype(np.uint8) - - -if args.single_image: - TEST_IMAGE_PATHS = [args.dataset + "/000000578871.jpg"] -else: - PATH_TO_TEST_IMAGES_DIR = args.dataset - print(PATH_TO_TEST_IMAGES_DIR) - TEST_IMAGE_PATHS = [] - for root, dirs, files in os.walk(PATH_TO_TEST_IMAGES_DIR): - for file in files: - TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, file)) - -# Size, in inches, of the output images. -IMAGE_SIZE = (12, 8) - - -def run_inference_for_single_image(graph): - sess_config = tf.ConfigProto() - sess_config.intra_op_parallelism_threads = args.num_intra_threads - sess_config.inter_op_parallelism_threads = args.num_inter_threads - if not os.environ.get("OMP_NUM_THREADS"): - os.environ["OMP_NUM_THREADS"] = args.num_intra_threads - - with graph.as_default(): - with tf.Session(config=sess_config) as sess: - # Get handles to input and output tensors - tensor_dict = {} - if args.evaluate_tensor is None: - ops = tf.get_default_graph().get_operations() - all_tensor_names = {output.name for op in ops for output in op.outputs} - for key in [ - 'num_detections', 'detection_boxes', 'detection_scores', - 'detection_classes' - ]: - tensor_name = key + ':0' - if tensor_name in all_tensor_names: - tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( - tensor_name) - else: - our_op = tf.get_default_graph().get_operation_by_name(args.evaluate_tensor) - tensor_names = our_op.outputs - list_ops = [] - for i, tensor in enumerate(tensor_names): - list_ops.append(tensor.name) - tensor_dict[args.evaluate_tensor] = list_ops - - if args.timeline is not None: - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() - total_duration = 0 - if args.steps is not None and args.steps < 20: - print("This model has set 20 warm-up steps, please enlarge your steps number") - for index, image_path in enumerate(TEST_IMAGE_PATHS): - image = Image.open(image_path) - # the array based representation of the image will be used later in order to prepare the - # result image with boxes and labels on it. - image_np = load_image_into_numpy_array(image) - image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') - - # Run inference - start_time = time.time() - if args.timeline is not None: - output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image_np, 0)}, - options=run_options, run_metadata=run_metadata) - else: - output_dict = sess.run(tensor_dict, - feed_dict={image_tensor: np.expand_dims(image_np, 0)}) - step_duration = time.time() - start_time - if index > 20: - total_duration = total_duration + step_duration - - if args.single_image: - if index == 0: - print('Avg. Duration per Step:' + str(total_duration / 1)) - else: - if index % 10 == 0: - print('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') - if index == 4999: - print('Avg. Duration per Step:' + str(total_duration / 5000)) - - if args.steps is not None: - if args.single_image: - sys.exit("single_iamge and steps cannot be both enabled!") - elif index == (args.steps - 1): - print('Avg. Duration per Step:' + - str(total_duration / (args.steps - 20))) - break - - if args.timeline is not None: - trace = timeline.Timeline(step_stats=run_metadata.step_stats) - with open('tl-' + time.strftime("%Y%m%d-%H%M%S") + '-' + args.timeline, 'w') as file: - file.write(trace.generate_chrome_trace_format(show_memory=False)) - - - if args.evaluate_tensor is not None: - for tensor in output_dict[args.evaluate_tensor]: - print(tensor.shape) - return None, None - - # all outputs are float32 numpy arrays, so convert types as appropriate - output_dict['num_detections'] = int(output_dict['num_detections'][0]) - output_dict['detection_classes'] = output_dict[ - 'detection_classes'][0].astype(np.uint8) - output_dict['detection_boxes'] = output_dict['detection_boxes'][0] - output_dict['detection_scores'] = output_dict['detection_scores'][0] - - if args.print_accuracy: - print('num_detections:\n' + str(output_dict['num_detections'])) - print('detection_classes:\n' + str(output_dict['detection_classes'])) - print('detection_boxes:\n' + str(output_dict['detection_boxes'])) - print('detection_scores:\n' + str(output_dict['detection_scores'])) - - if 'detection_masks' in output_dict: - output_dict['detection_masks'] = output_dict['detection_masks'][0] - return output_dict, image_np - - -# Actual detection. -output_dict, image_np = run_inference_for_single_image(detection_graph) - -# Visualization of the results of a detection. -if args.visualize and args.single_image and args.evaluate_tensor is None: - vis_util.visualize_boxes_and_labels_on_image_array( - image_np, - output_dict['detection_boxes'], - output_dict['detection_classes'], - output_dict['detection_scores'], - category_index, - instance_masks=output_dict.get('detection_masks'), - use_normalized_coordinates=True, - line_thickness=8) - plt.figure(figsize=IMAGE_SIZE) - plt.imshow(image_np) diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/tf_models.patch b/models/object_detection/tensorflow/faster_rcnn/inference/tf_models.patch deleted file mode 100644 index 64c44592b..000000000 --- a/models/object_detection/tensorflow/faster_rcnn/inference/tf_models.patch +++ /dev/null @@ -1,68 +0,0 @@ -diff --git a/research/object_detection/inference/detection_inference.py b/research/object_detection/inference/detection_inference.py -old mode 100644 -new mode 100755 -index dc66686f..4d459a55 ---- a/research/object_detection/inference/detection_inference.py -+++ b/research/object_detection/inference/detection_inference.py -@@ -15,9 +15,12 @@ - """Utility functions for detection inference.""" - from __future__ import division - -+import os -+ - import tensorflow as tf - - from object_detection.core import standard_fields -+from object_detection.utils import label_map_util - - - def build_input(tfrecord_paths): -@@ -65,7 +68,7 @@ def build_inference_graph(image_tensor, inference_graph_path): - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - """ -- with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: -+ with tf.gfile.Open(inference_graph_path, 'rb') as graph_def_file: - graph_content = graph_def_file.read() - graph_def = tf.GraphDef() - graph_def.MergeFromString(graph_content) -@@ -134,6 +137,13 @@ def infer_detections_and_add_to_example( - detection_bbox_xmax].float_list.value[:] = detected_boxes[3] - feature[standard_fields.TfExampleFields. - detection_class_label].int64_list.value[:] = detected_classes -+ label_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/mscoco_label_map.pbtxt') -+ if feature[standard_fields.TfExampleFields.object_class_text].HasField("bytes_list"): -+ cl = feature[standard_fields.TfExampleFields.object_class_text].bytes_list.value -+ cl = [x if type(x) == 'str' else x.decode('utf-8') for x in cl] -+ lm = label_map_util.get_label_map_dict(label_file, use_display_name = True) -+ label_id = [lm[x] for x in cl] -+ feature[standard_fields.TfExampleFields.object_class_label].int64_list.value[:] = label_id - - if discard_image_pixels: - del feature[standard_fields.TfExampleFields.image_encoded] -diff --git a/research/object_detection/metrics/tf_example_parser.py b/research/object_detection/metrics/tf_example_parser.py -index 9a5f130f..fa361bf5 100644 ---- a/research/object_detection/metrics/tf_example_parser.py -+++ b/research/object_detection/metrics/tf_example_parser.py -@@ -44,7 +44,7 @@ class StringParser(data_parser.DataToNumpyParser): - self.field_name = field_name - - def parse(self, tf_example): -- return "".join(tf_example.features.feature[self.field_name] -+ return b"".join(tf_example.features.feature[self.field_name] - .bytes_list.value) if tf_example.features.feature[ - self.field_name].HasField("bytes_list") else None - -diff --git a/research/object_detection/utils/object_detection_evaluation.py b/research/object_detection/utils/object_detection_evaluation.py -index 5826c581..450090f0 100644 ---- a/research/object_detection/utils/object_detection_evaluation.py -+++ b/research/object_detection/utils/object_detection_evaluation.py -@@ -304,7 +304,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator): - if idx + self._label_id_offset in category_index: - category_name = category_index[idx + self._label_id_offset]['name'] - try: -- category_name = unicode(category_name, 'utf-8') -+ category_name = str(category_name, 'utf-8') - except TypeError: - pass - category_name = unicodedata.normalize( diff --git a/models/object_detection/tensorflow/rfcn/__init__.py b/models/object_detection/tensorflow/rfcn/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/object_detection/tensorflow/rfcn/__init__.py +++ b/models/object_detection/tensorflow/rfcn/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/rfcn/inference/__init__.py b/models/object_detection/tensorflow/rfcn/inference/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/object_detection/tensorflow/rfcn/inference/__init__.py +++ b/models/object_detection/tensorflow/rfcn/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/__init__.py b/models/object_detection/tensorflow/rfcn/inference/fp32/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/__init__.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/coco_mAP.sh b/models/object_detection/tensorflow/rfcn/inference/fp32/coco_mAP.sh index 13c0ef177..ba2c8dc4a 100755 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/coco_mAP.sh +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/coco_mAP.sh @@ -20,12 +20,14 @@ # ########## Variables to be defined -FROZEN_GRAPH=$1 -TF_RECORD_FILE=$2 -TF_MODELS_ROOT=$3 -SPLIT=$4 -SPLIT="rfcn-${SPLIT}" +SPLIT=${SPLIT:-"RFCN_final_graph"} #change to your favorite room +FROZEN_GRAPH=${FROZEN_GRAPH:-"/in_graph/frozen_inference_graph.pb"} +TF_RECORD_FILE=${TF_RECORD_FILE:-"/dataset/coco_val.record"} +if [[ -z ${TF_MODELS_ROOT} ]] || [[ ! -d ${TF_MODELS_ROOT} ]]; then + echo "You must specify the root of the tensorflow/models source tree in the TF_MODELS_ROOT environment variable." + exit 1 +fi export PYTHONPATH=$PYTHONPATH:${TF_MODELS_ROOT}/research:${TF_MODELS_ROOT}/research/slim:${TF_MODELS_ROOT}/research/object_detection @@ -36,10 +38,10 @@ echo "PYTHONPATH=${PYTHONPATH}" echo "TF_MODELS_ROOT=$TF_MODELS_ROOT" python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=${TF_RECORD_FILE} \ + --input_tfrecord_paths=$TF_RECORD_FILE \ --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=${FROZEN_GRAPH} \ - --discard_image_pixels=True + --inference_graph=$FROZEN_GRAPH \ + --discard_image_pixels mkdir -p ${SPLIT}_eval_metrics diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/dataset_util.py b/models/object_detection/tensorflow/rfcn/inference/fp32/dataset_util.py index d21ff8bc7..d607ba372 100644 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/dataset_util.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/dataset_util.py @@ -39,122 +39,122 @@ def int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def int64_list_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def bytes_list_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) def float_list_feature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) def read_examples_list(path): - """Read list of training or validation examples. + """Read list of training or validation examples. - The file is assumed to contain a single example per line where the first - token in the line is an identifier that allows us to find the image and - annotation xml for that example. + The file is assumed to contain a single example per line where the first + token in the line is an identifier that allows us to find the image and + annotation xml for that example. - For example, the line: - xyz 3 - would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). + For example, the line: + xyz 3 + would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). - Args: - path: absolute path to examples list file. + Args: + path: absolute path to examples list file. - Returns: - list of example identifiers (strings). - """ - with tf.gfile.GFile(path) as fid: - lines = fid.readlines() - return [line.strip().split(' ')[0] for line in lines] + Returns: + list of example identifiers (strings). + """ + with tf.io.gfile.GFile(path) as fid: + lines = fid.readlines() + return [line.strip().split(' ')[0] for line in lines] def recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - """ - if not xml: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} + """Recursively parses XML contents to python dict. + + We assume that `object` tags are the only ones that can appear + multiple times at the same level of a tree. + + Args: + xml: xml tree obtained by parsing XML file contents using lxml.etree + + Returns: + Python dictionary holding XML contents. + """ + if not xml: + return {xml.tag: xml.text} + result = {} + for child in xml: + child_result = recursive_parse_xml_to_dict(child) + if child.tag != 'object': + result[child.tag] = child_result[child.tag] + else: + if child.tag not in result: + result[child.tag] = [] + result[child.tag].append(child_result[child.tag]) + return {xml.tag: result} def make_initializable_iterator(dataset): - """Creates an iterator, and initializes tables. + """Creates an iterator, and initializes tables. - This is useful in cases where make_one_shot_iterator wouldn't work because - the graph contains a hash table that needs to be initialized. + This is useful in cases where make_one_shot_iterator wouldn't work because + the graph contains a hash table that needs to be initialized. - Args: - dataset: A `tf.data.Dataset` object. + Args: + dataset: A `tf.data.Dataset` object. - Returns: - A `tf.data.Iterator`. - """ - iterator = dataset.make_initializable_iterator() - tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) - return iterator + Returns: + A `tf.data.Iterator`. + """ + iterator = tf.compat.v1.data.make_initializable_iterator(dataset) + tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) + return iterator def read_dataset(file_read_func, decode_func, input_files, config): - """Reads a dataset, and handles repetition and shuffling. - - Args: - file_read_func: Function to use in tf.data.Dataset.interleave, to read - every individual file into a tf.data.Dataset. - decode_func: Function to apply to all records. - input_files: A list of file paths to read. - config: A input_reader_builder.InputReader object. - - Returns: - A tf.data.Dataset based on config. - """ - # Shard, shuffle, and read files. - filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], - 0) - filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) - if config.shuffle: - filename_dataset = filename_dataset.shuffle( - config.filenames_shuffle_buffer_size) - elif config.num_readers > 1: - tf.logging.warning('`shuffle` is false, but the input data stream is ' - 'still slightly shuffled since `num_readers` > 1.') - - filename_dataset = filename_dataset.repeat(config.num_epochs or None) - - records_dataset = filename_dataset.apply( - tf.contrib.data.parallel_interleave( - file_read_func, cycle_length=config.num_readers, - block_length=config.read_block_length, sloppy=True)) - if config.shuffle: - records_dataset.shuffle(config.shuffle_buffer_size) - tensor_dataset = records_dataset.map( - decode_func, num_parallel_calls=config.num_parallel_map_calls) - return tensor_dataset.prefetch(config.prefetch_size) + """Reads a dataset, and handles repetition and shuffling. + + Args: + file_read_func: Function to use in tf.data.Dataset.interleave, to read + every individual file into a tf.data.Dataset. + decode_func: Function to apply to all records. + input_files: A list of file paths to read. + config: A input_reader_builder.InputReader object. + + Returns: + A tf.data.Dataset based on config. + """ + # Shard, shuffle, and read files. + filenames = tf.concat([tf.io.matching_files(pattern) for pattern in input_files], + 0) + filename_dataset = tf.data.Dataset.from_tensor_slices(filenames) + if config.shuffle: + filename_dataset = filename_dataset.shuffle( + config.filenames_shuffle_buffer_size) + elif config.num_readers > 1: + tf.compat.v1.logging.warning('`shuffle` is false, but the input data stream is ' + 'still slightly shuffled since `num_readers` > 1.') + + filename_dataset = filename_dataset.repeat(config.num_epochs or None) + + records_dataset = filename_dataset.apply( + tf.data.experimental.parallel_interleave( + file_read_func, cycle_length=config.num_readers, + block_length=config.read_block_length, sloppy=True)) + if config.shuffle: + records_dataset.shuffle(config.shuffle_buffer_size) + tensor_dataset = records_dataset.map( + decode_func, num_parallel_calls=config.num_parallel_map_calls) + return tensor_dataset.prefetch(config.prefetch_size) diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/eval.py b/models/object_detection/tensorflow/rfcn/inference/fp32/eval.py index 8f1c43f55..edc3ddfc9 100644 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/eval.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/eval.py @@ -63,7 +63,6 @@ --model_config_path=model_config.pbtxt \ --input_config_path=eval_input_config.pbtxt """ -import logging import functools import os import tensorflow as tf @@ -76,7 +75,8 @@ from object_detection.utils import label_map_util -tf.logging.set_verbosity(tf.logging.INFO) +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) +import logging # logging.basicConfig(level=logging.INFO) flags = tf.app.flags @@ -109,65 +109,65 @@ def main(unused_argv): - if (FLAGS.omp > 0): - if not os.environ.get("OMP_NUM_THREADS"): - logging.info('OMP_NUM_THREADS value= %d', FLAGS.omp) - os.environ["OMP_NUM_THREADS"] = str(FLAGS.omp) - if not os.environ.get("KMP_BLOCKTIME"): - logging.info('KMP_BLOCKTIME value= %d', FLAGS.blocktime) - os.environ["KMP_BLOCKTIME"] = str(FLAGS.blocktime) - if not os.environ.get("KMP_SETTINGS"): - os.environ["KMP_SETTINGS"] = "1" - # os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0" - assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' - assert FLAGS.eval_dir, '`eval_dir` is missing.' - tf.gfile.MakeDirs(FLAGS.eval_dir) - if FLAGS.pipeline_config_path: - configs = config_util.get_configs_from_pipeline_file( - FLAGS.pipeline_config_path) - tf.gfile.Copy(FLAGS.pipeline_config_path, - os.path.join(FLAGS.eval_dir, 'pipeline.config'), - overwrite=True) - else: - configs = config_util.get_configs_from_multiple_files( - model_config_path=FLAGS.model_config_path, - eval_config_path=FLAGS.eval_config_path, - eval_input_config_path=FLAGS.input_config_path) - for name, config in [('model.config', FLAGS.model_config_path), - ('eval.config', FLAGS.eval_config_path), - ('input.config', FLAGS.input_config_path)]: - tf.gfile.Copy(config, - os.path.join(FLAGS.eval_dir, name), - overwrite=True) - - model_config = configs['model'] - eval_config = configs['eval_config'] - input_config = configs['eval_input_config'] - if FLAGS.eval_training_data: - input_config = configs['train_input_config'] - - model_fn = functools.partial( - model_builder.build, - model_config=model_config, - is_training=False) - - def get_next(config): - return dataset_util.make_initializable_iterator( - dataset_builder.build(config)).get_next() - - create_input_dict_fn = functools.partial(get_next, input_config) - - label_map = label_map_util.load_labelmap(input_config.label_map_path) - max_num_classes = max([item.id for item in label_map.item]) - categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes) - - if FLAGS.run_once: - eval_config.max_evals = 1 - - evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, - FLAGS.checkpoint_dir, FLAGS.eval_dir, intra_op=FLAGS.intra_op, inter_op=FLAGS.inter_op) + if (FLAGS.omp > 0): + if not os.environ.get("OMP_NUM_THREADS"): + logging.info('OMP_NUM_THREADS value= %d', FLAGS.omp) + os.environ["OMP_NUM_THREADS"] = str(FLAGS.omp) + if not os.environ.get("KMP_BLOCKTIME"): + logging.info('KMP_BLOCKTIME value= %d', FLAGS.blocktime) + os.environ["KMP_BLOCKTIME"] = str(FLAGS.blocktime) + if not os.environ.get("KMP_SETTINGS"): + os.environ["KMP_SETTINGS"] = "1" + # os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0" + assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' + assert FLAGS.eval_dir, '`eval_dir` is missing.' + tf.io.gfile.makedirs(FLAGS.eval_dir) + if FLAGS.pipeline_config_path: + configs = config_util.get_configs_from_pipeline_file( + FLAGS.pipeline_config_path) + tf.io.gfile.copy(FLAGS.pipeline_config_path, + os.path.join(FLAGS.eval_dir, 'pipeline.config'), + overwrite=True) + else: + configs = config_util.get_configs_from_multiple_files( + model_config_path=FLAGS.model_config_path, + eval_config_path=FLAGS.eval_config_path, + eval_input_config_path=FLAGS.input_config_path) + for name, config in [('model.config', FLAGS.model_config_path), + ('eval.config', FLAGS.eval_config_path), + ('input.config', FLAGS.input_config_path)]: + tf.io.gfile.copy(config, + os.path.join(FLAGS.eval_dir, name), + overwrite=True) + + model_config = configs['model'] + eval_config = configs['eval_config'] + input_config = configs['eval_input_config'] + if FLAGS.eval_training_data: + input_config = configs['train_input_config'] + + model_fn = functools.partial( + model_builder.build, + model_config=model_config, + is_training=False) + + def get_next(config): + return tf.compat.v1.data.make_initializable_iterator( + dataset_util, dataset_builder.build(config)).get_next() + + create_input_dict_fn = functools.partial(get_next, input_config) + + label_map = label_map_util.load_labelmap(input_config.label_map_path) + max_num_classes = max([item.id for item in label_map.item]) + categories = label_map_util.convert_label_map_to_categories( + label_map, max_num_classes) + + if FLAGS.run_once: + eval_config.max_evals = 1 + + evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, + FLAGS.checkpoint_dir, FLAGS.eval_dir, intra_op=FLAGS.intra_op, inter_op=FLAGS.inter_op) if __name__ == '__main__': - tf.app.run() + tf.compat.v1.app.run() diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/eval_util.py b/models/object_detection/tensorflow/rfcn/inference/fp32/eval_util.py index f065111c8..5f3741e3a 100644 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/eval_util.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/eval_util.py @@ -50,26 +50,23 @@ from object_detection.utils import ops from object_detection.utils import visualization_utils as vis_utils -slim = tf.contrib.slim - - def write_metrics(metrics, global_step, summary_dir): - """Write metrics to a summary directory. - - Args: - metrics: A dictionary containing metric names and values. - global_step: Global step at which the metrics are computed. - summary_dir: Directory to write tensorflow summaries to. - """ - logging.info('Writing metrics to tf summary.') - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - for key in sorted(metrics): - summary = tf.Summary(value=[ - tf.Summary.Value(tag=key, simple_value=metrics[key]), - ]) - summary_writer.add_summary(summary, global_step) - logging.info('%s: %f', key, metrics[key]) - logging.info('Metrics written to tf summary.') + """Write metrics to a summary directory. + + Args: + metrics: A dictionary containing metric names and values. + global_step: Global step at which the metrics are computed. + summary_dir: Directory to write tensorflow summaries to. + """ + logging.info('Writing metrics to tf summary.') + summary_writer = tf.compat.v1.summary.FileWriterCache.get(summary_dir) + for key in sorted(metrics): + summary = tf.compat.v1.Summary(value=[ + tf.compat.v1.Summary.Value(tag=key, simple_value=metrics[key]), + ]) + summary_writer.add_summary(summary, global_step) + logging.info('%s: %f', key, metrics[key]) + logging.info('Metrics written to tf summary.') # TODO(rathodv): Add tests. @@ -87,133 +84,133 @@ def visualize_detection_results(result_dict, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): - """Visualizes detection results and writes visualizations to image summaries. - - This function visualizes an image with its detected bounding boxes and writes - to image summaries which can be viewed on tensorboard. It optionally also - writes images to a directory. In the case of missing entry in the label map, - unknown class name in the visualization is shown as "N/A". - - Args: - result_dict: a dictionary holding groundtruth and detection - data corresponding to each image being evaluated. The following keys - are required: - 'original_image': a numpy array representing the image with shape - [1, height, width, 3] or [1, height, width, 1] - 'detection_boxes': a numpy array of shape [N, 4] - 'detection_scores': a numpy array of shape [N] - 'detection_classes': a numpy array of shape [N] - The following keys are optional: - 'groundtruth_boxes': a numpy array of shape [N, 4] - 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] - Detections are assumed to be provided in decreasing order of score and for - display, and we assume that scores are probabilities between 0 and 1. - tag: tensorboard tag (string) to associate with image. - global_step: global step at which the visualization are generated. - categories: a list of dictionaries representing all possible categories. - Each dict in this list has the following keys: - 'id': (required) an integer id uniquely identifying this category - 'name': (required) string representing category name - e.g., 'cat', 'dog', 'pizza' - 'supercategory': (optional) string representing the supercategory - e.g., 'animal', 'vehicle', 'food', etc - summary_dir: the output directory to which the image summaries are written. - export_dir: the output directory to which images are written. If this is - empty (default), then images are not exported. - agnostic_mode: boolean (default: False) controlling whether to evaluate in - class-agnostic mode or not. - show_groundtruth: boolean (default: False) controlling whether to show - groundtruth boxes in addition to detected boxes - groundtruth_box_visualization_color: box color for visualizing groundtruth - boxes - min_score_thresh: minimum score threshold for a box to be visualized - max_num_predictions: maximum number of detections to visualize - skip_scores: whether to skip score when drawing a single detection - skip_labels: whether to skip label when drawing a single detection - keep_image_id_for_visualization_export: whether to keep image identifier in - filename when exported to export_dir - Raises: - ValueError: if result_dict does not contain the expected keys (i.e., - 'original_image', 'detection_boxes', 'detection_scores', - 'detection_classes') - """ - detection_fields = fields.DetectionResultFields - input_fields = fields.InputDataFields - if not set([ - input_fields.original_image, - detection_fields.detection_boxes, - detection_fields.detection_scores, - detection_fields.detection_classes, - ]).issubset(set(result_dict.keys())): - raise ValueError('result_dict does not contain all expected keys.') - if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: - raise ValueError('If show_groundtruth is enabled, result_dict must contain ' - 'groundtruth_boxes.') - logging.info('Creating detection visualizations.') - category_index = label_map_util.create_category_index(categories) - - image = np.squeeze(result_dict[input_fields.original_image], axis=0) - if image.shape[2] == 1: # If one channel image, repeat in RGB. - image = np.tile(image, [1, 1, 3]) - detection_boxes = result_dict[detection_fields.detection_boxes] - detection_scores = result_dict[detection_fields.detection_scores] - detection_classes = np.int32((result_dict[ - detection_fields.detection_classes])) - detection_keypoints = result_dict.get(detection_fields.detection_keypoints) - detection_masks = result_dict.get(detection_fields.detection_masks) - detection_boundaries = result_dict.get(detection_fields.detection_boundaries) - - # Plot groundtruth underneath detections - if show_groundtruth: - groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] - groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) - vis_utils.visualize_boxes_and_labels_on_image_array( - image=image, - boxes=groundtruth_boxes, - classes=None, - scores=None, - category_index=category_index, - keypoints=groundtruth_keypoints, - use_normalized_coordinates=False, - max_boxes_to_draw=None, - groundtruth_box_visualization_color=groundtruth_box_visualization_color) + """Visualizes detection results and writes visualizations to image summaries. + + This function visualizes an image with its detected bounding boxes and writes + to image summaries which can be viewed on tensorboard. It optionally also + writes images to a directory. In the case of missing entry in the label map, + unknown class name in the visualization is shown as "N/A". + + Args: + result_dict: a dictionary holding groundtruth and detection + data corresponding to each image being evaluated. The following keys + are required: + 'original_image': a numpy array representing the image with shape + [1, height, width, 3] or [1, height, width, 1] + 'detection_boxes': a numpy array of shape [N, 4] + 'detection_scores': a numpy array of shape [N] + 'detection_classes': a numpy array of shape [N] + The following keys are optional: + 'groundtruth_boxes': a numpy array of shape [N, 4] + 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] + Detections are assumed to be provided in decreasing order of score and for + display, and we assume that scores are probabilities between 0 and 1. + tag: tensorboard tag (string) to associate with image. + global_step: global step at which the visualization are generated. + categories: a list of dictionaries representing all possible categories. + Each dict in this list has the following keys: + 'id': (required) an integer id uniquely identifying this category + 'name': (required) string representing category name + e.g., 'cat', 'dog', 'pizza' + 'supercategory': (optional) string representing the supercategory + e.g., 'animal', 'vehicle', 'food', etc + summary_dir: the output directory to which the image summaries are written. + export_dir: the output directory to which images are written. If this is + empty (default), then images are not exported. + agnostic_mode: boolean (default: False) controlling whether to evaluate in + class-agnostic mode or not. + show_groundtruth: boolean (default: False) controlling whether to show + groundtruth boxes in addition to detected boxes + groundtruth_box_visualization_color: box color for visualizing groundtruth + boxes + min_score_thresh: minimum score threshold for a box to be visualized + max_num_predictions: maximum number of detections to visualize + skip_scores: whether to skip score when drawing a single detection + skip_labels: whether to skip label when drawing a single detection + keep_image_id_for_visualization_export: whether to keep image identifier in + filename when exported to export_dir + Raises: + ValueError: if result_dict does not contain the expected keys (i.e., + 'original_image', 'detection_boxes', 'detection_scores', + 'detection_classes') + """ + detection_fields = fields.DetectionResultFields + input_fields = fields.InputDataFields + if not set([ + input_fields.original_image, + detection_fields.detection_boxes, + detection_fields.detection_scores, + detection_fields.detection_classes, + ]).issubset(set(result_dict.keys())): + raise ValueError('result_dict does not contain all expected keys.') + if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: + raise ValueError('If show_groundtruth is enabled, result_dict must contain ' + 'groundtruth_boxes.') + logging.info('Creating detection visualizations.') + category_index = label_map_util.create_category_index(categories) + + image = np.squeeze(result_dict[input_fields.original_image], axis=0) + if image.shape[2] == 1: # If one channel image, repeat in RGB. + image = np.tile(image, [1, 1, 3]) + detection_boxes = result_dict[detection_fields.detection_boxes] + detection_scores = result_dict[detection_fields.detection_scores] + detection_classes = np.int32((result_dict[ + detection_fields.detection_classes])) + detection_keypoints = result_dict.get(detection_fields.detection_keypoints) + detection_masks = result_dict.get(detection_fields.detection_masks) + detection_boundaries = result_dict.get(detection_fields.detection_boundaries) + + # Plot groundtruth underneath detections + if show_groundtruth: + groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] + groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( - image, - detection_boxes, - detection_classes, - detection_scores, - category_index, - instance_masks=detection_masks, - instance_boundaries=detection_boundaries, - keypoints=detection_keypoints, + image=image, + boxes=groundtruth_boxes, + classes=None, + scores=None, + category_index=category_index, + keypoints=groundtruth_keypoints, use_normalized_coordinates=False, - max_boxes_to_draw=max_num_predictions, - min_score_thresh=min_score_thresh, - agnostic_mode=agnostic_mode, - skip_scores=skip_scores, - skip_labels=skip_labels) - - if export_dir: - if keep_image_id_for_visualization_export and result_dict[fields. - InputDataFields() - .key]: - export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( - tag, result_dict[fields.InputDataFields().key])) - else: - export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) - vis_utils.save_image_array_as_png(image, export_path) - - summary = tf.Summary(value=[ - tf.Summary.Value( - tag=tag, - image=tf.Summary.Image( - encoded_image_string=vis_utils.encode_image_array_as_png_str( - image))) - ]) - summary_writer = tf.summary.FileWriterCache.get(summary_dir) - summary_writer.add_summary(summary, global_step) + max_boxes_to_draw=None, + groundtruth_box_visualization_color=groundtruth_box_visualization_color) + vis_utils.visualize_boxes_and_labels_on_image_array( + image, + detection_boxes, + detection_classes, + detection_scores, + category_index, + instance_masks=detection_masks, + instance_boundaries=detection_boundaries, + keypoints=detection_keypoints, + use_normalized_coordinates=False, + max_boxes_to_draw=max_num_predictions, + min_score_thresh=min_score_thresh, + agnostic_mode=agnostic_mode, + skip_scores=skip_scores, + skip_labels=skip_labels) + + if export_dir: + if keep_image_id_for_visualization_export and result_dict[fields. + InputDataFields() + .key]: + export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( + tag, result_dict[fields.InputDataFields().key])) + else: + export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) + vis_utils.save_image_array_as_png(image, export_path) - logging.info('Detection visualizations written to summary with tag %s.', tag) + summary = tf.compat.v1.Summary(value=[ + tf.compat.v1.Summary.Value( + tag=tag, + image=tf.compat.v1.Summary.Image( + encoded_image_string=vis_utils.encode_image_array_as_png_str( + image))) + ]) + summary_writer = tf.compat.v1.summary.FileWriterCache.get(summary_dir) + summary_writer.add_summary(summary, global_step) + + logging.info('Detection visualizations written to summary with tag %s.', tag) def _run_checkpoint_once(tensor_dict, @@ -229,142 +226,143 @@ def _run_checkpoint_once(tensor_dict, losses_dict=None, intra_op=0, inter_op=0): - """Evaluates metrics defined in evaluators and returns summaries. - - This function loads the latest checkpoint in checkpoint_dirs and evaluates - all metrics defined in evaluators. The metrics are processed in batch by the - batch_processor. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking four arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - To skip an image, it suffices to return an empty dictionary in place of - result_dict. - checkpoint_dirs: list of directories to load into an EnsembleModel. If it - has only one directory, EnsembleModel will not be used -- - a DetectionModel - will be instantiated directly. Not used if restore_fn is set. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: None, or a function that takes a tf.Session object and correctly - restores all necessary variables from the correct checkpoint file. If - None, attempts to restore from the first directory in checkpoint_dirs. - num_batches: the number of batches to use for evaluation. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. - save_graph_dir: where to store the Tensorflow graph on disk. If save_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - global_step: the count of global steps. - all_evaluator_metrics: A dictionary containing metric names and values. - - Raises: - ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least - one element. - ValueError: if save_graph is True and save_graph_dir is not defined. - """ - if save_graph and not save_graph_dir: - raise ValueError('`save_graph_dir` must be defined.') - if (inter_op > 0 or intra_op > 0): - config = tf.ConfigProto(inter_op_parallelism_threads=inter_op, - intra_op_parallelism_threads=intra_op) - logging.info('inter_op value= %d', inter_op) - logging.info('intra_op value= %d', intra_op) - else: - config = None - sess = tf.Session(master, graph=tf.get_default_graph(), config=config) - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - sess.run(tf.tables_initializer()) - if restore_fn: - restore_fn(sess) - else: - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) - saver = tf.train.Saver(variables_to_restore) - saver.restore(sess, checkpoint_file) - - if save_graph: - tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') - - counters = {'skipped': 0, 'success': 0} - aggregate_result_losses_dict = collections.defaultdict(list) - with tf.contrib.slim.queues.QueueRunners(sess): - try: - loop_start_time = time.time() - for batch in range(int(num_batches)): - if (batch + 1) % 100 == 0: - logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) - start_time = time.time() - if not batch_processor: - try: - if not losses_dict: - losses_dict = {} - result_dict, result_losses_dict = sess.run([tensor_dict, - losses_dict]) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - result_dict = {} - else: - result_dict, result_losses_dict = batch_processor( - tensor_dict, sess, batch, counters, losses_dict=losses_dict) - if ((batch % 100) == 0): - logging.info('Iteration %d: %.3f sec', batch, time.time() - start_time) - if not result_dict: - continue - for key, value in iter(result_losses_dict.items()): - aggregate_result_losses_dict[key].append(value) - for evaluator in evaluators: - # TODO(b/65130867): Use image_id tensor once we fix the input data - # decoders to return correct image_id. - # TODO(akuznetsa): result_dict contains batches of images, while - # add_single_ground_truth_image_info expects a single image. Fix - evaluator.add_single_ground_truth_image_info( - image_id=batch, groundtruth_dict=result_dict) - evaluator.add_single_detected_image_info( - image_id=batch, detections_dict=result_dict) - loop_end_time = time.time() - total_run_time = loop_end_time - loop_start_time - avg_time_per_batch = total_run_time / num_batches - print('Average time per step: %.3f sec' % avg_time_per_batch) - logging.info('Running eval batches done.') - except tf.errors.OutOfRangeError: - logging.info('Done evaluating -- epoch limit reached') - finally: - # When done, ask the threads to stop. - logging.info('# success: %d', counters['success']) - logging.info('# skipped: %d', counters['skipped']) - all_evaluator_metrics = {} - for evaluator in evaluators: - metrics = evaluator.evaluate() - evaluator.clear() - if any(key in all_evaluator_metrics for key in metrics): - raise ValueError('Metric names between evaluators must not collide.') - all_evaluator_metrics.update(metrics) - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - - for key, value in iter(aggregate_result_losses_dict.items()): - all_evaluator_metrics['Losses/' + key] = np.mean(value) - sess.close() - return (global_step, all_evaluator_metrics) + + """Evaluates metrics defined in evaluators and returns summaries. + + This function loads the latest checkpoint in checkpoint_dirs and evaluates + all metrics defined in evaluators. The metrics are processed in batch by the + batch_processor. + + Args: + tensor_dict: a dictionary holding tensors representing a batch of detections + and corresponding groundtruth annotations. + evaluators: a list of object of type DetectionEvaluator to be used for + evaluation. Note that the metric names produced by different evaluators + must be unique. + batch_processor: a function taking four arguments: + 1. tensor_dict: the same tensor_dict that is passed in as the first + argument to this function. + 2. sess: a tensorflow session + 3. batch_index: an integer representing the index of the batch amongst + all batches + By default, batch_processor is None, which defaults to running: + return sess.run(tensor_dict) + To skip an image, it suffices to return an empty dictionary in place of + result_dict. + checkpoint_dirs: list of directories to load into an EnsembleModel. If it + has only one directory, EnsembleModel will not be used -- + a DetectionModel + will be instantiated directly. Not used if restore_fn is set. + variables_to_restore: None, or a dictionary mapping variable names found in + a checkpoint to model variables. The dictionary would normally be + generated by creating a tf.train.ExponentialMovingAverage object and + calling its variables_to_restore() method. Not used if restore_fn is set. + restore_fn: None, or a function that takes a tf.Session object and correctly + restores all necessary variables from the correct checkpoint file. If + None, attempts to restore from the first directory in checkpoint_dirs. + num_batches: the number of batches to use for evaluation. + master: the location of the Tensorflow session. + save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. + save_graph_dir: where to store the Tensorflow graph on disk. If save_graph + is True this must be non-empty. + losses_dict: optional dictionary of scalar detection losses. + + Returns: + global_step: the count of global steps. + all_evaluator_metrics: A dictionary containing metric names and values. + + Raises: + ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least + one element. + ValueError: if save_graph is True and save_graph_dir is not defined. + """ + if save_graph and not save_graph_dir: + raise ValueError('`save_graph_dir` must be defined.') + if (inter_op > 0 or intra_op > 0): + config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=inter_op, + intra_op_parallelism_threads=intra_op) + logging.info('inter_op value= %d', inter_op) + logging.info('intra_op value= %d', intra_op) + else: + config = None + sess = tf.compat.v1.Session(master, graph=tf.compat.v1.get_default_graph(), config=config) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.local_variables_initializer()) + sess.run(tf.compat.v1.tables_initializer()) + if restore_fn: + restore_fn(sess) + else: + if not checkpoint_dirs: + raise ValueError('`checkpoint_dirs` must have at least one entry.') + checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) + saver = tf.compat.v1.train.Saver(variables_to_restore) + saver.restore(sess, checkpoint_file) + + if save_graph: + tf.io.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') + + counters = {'skipped': 0, 'success': 0} + aggregate_result_losses_dict = collections.defaultdict(list) + with tf.compat.v1.train.QueueRunner(sess): + try: + loop_start_time = time.time() + for batch in range(int(num_batches)): + if (batch + 1) % 100 == 0: + logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) + start_time = time.time() + if not batch_processor: + try: + if not losses_dict: + losses_dict = {} + result_dict, result_losses_dict = sess.run([tensor_dict, + losses_dict]) + counters['success'] += 1 + except tf.errors.InvalidArgumentError: + logging.info('Skipping image') + counters['skipped'] += 1 + result_dict = {} + else: + result_dict, result_losses_dict = batch_processor( + tensor_dict, sess, batch, counters, losses_dict=losses_dict) + if ((batch % 100) == 0): + logging.info('Iteration %d: %.3f sec', batch, time.time() - start_time) + if not result_dict: + continue + for key, value in iter(result_losses_dict.items()): + aggregate_result_losses_dict[key].append(value) + for evaluator in evaluators: + # TODO(b/65130867): Use image_id tensor once we fix the input data + # decoders to return correct image_id. + # TODO(akuznetsa): result_dict contains batches of images, while + # add_single_ground_truth_image_info expects a single image. Fix + evaluator.add_single_ground_truth_image_info( + image_id=batch, groundtruth_dict=result_dict) + evaluator.add_single_detected_image_info( + image_id=batch, detections_dict=result_dict) + loop_end_time = time.time() + total_run_time = loop_end_time - loop_start_time + avg_time_per_batch = total_run_time / num_batches + print('Average time per step: %.3f sec' % avg_time_per_batch) + logging.info('Running eval batches done.') + except tf.errors.OutOfRangeError: + logging.info('Done evaluating -- epoch limit reached') + finally: + # When done, ask the threads to stop. + logging.info('# success: %d', counters['success']) + logging.info('# skipped: %d', counters['skipped']) + all_evaluator_metrics = {} + for evaluator in evaluators: + metrics = evaluator.evaluate() + evaluator.clear() + if any(key in all_evaluator_metrics for key in metrics): + raise ValueError('Metric names between evaluators must not collide.') + all_evaluator_metrics.update(metrics) + global_step = tf.compat.v1.train.global_step(sess, tf.compat.v1.train.get_global_step()) + + for key, value in iter(aggregate_result_losses_dict.items()): + all_evaluator_metrics['Losses/' + key] = np.mean(value) + sess.close() + return (global_step, all_evaluator_metrics) # TODO(rathodv): Add tests. @@ -382,97 +380,97 @@ def repeated_checkpoint_run(tensor_dict, save_graph=False, save_graph_dir='', losses_dict=None): - """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. - - This function repeatedly loads a checkpoint and evaluates a desired - set of tensors (provided by tensor_dict) and hands the resulting numpy - arrays to a function result_processor which can be used to further - process/save/visualize the results. - - Args: - tensor_dict: a dictionary holding tensors representing a batch of detections - and corresponding groundtruth annotations. - summary_dir: a directory to write metrics summaries. - evaluators: a list of object of type DetectionEvaluator to be used for - evaluation. Note that the metric names produced by different evaluators - must be unique. - batch_processor: a function taking three arguments: - 1. tensor_dict: the same tensor_dict that is passed in as the first - argument to this function. - 2. sess: a tensorflow session - 3. batch_index: an integer representing the index of the batch amongst - all batches - By default, batch_processor is None, which defaults to running: - return sess.run(tensor_dict) - checkpoint_dirs: list of directories to load into a DetectionModel or an - EnsembleModel if restore_fn isn't set. Also used to determine when to run - next evaluation. Must have at least one element. - variables_to_restore: None, or a dictionary mapping variable names found in - a checkpoint to model variables. The dictionary would normally be - generated by creating a tf.train.ExponentialMovingAverage object and - calling its variables_to_restore() method. Not used if restore_fn is set. - restore_fn: a function that takes a tf.Session object and correctly restores - all necessary variables from the correct checkpoint file. - num_batches: the number of batches to use for evaluation. - eval_interval_secs: the number of seconds between each evaluation run. - max_number_of_evaluations: the max number of iterations of the evaluation. - If the value is left as None the evaluation continues indefinitely. - master: the location of the Tensorflow session. - save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. - save_graph_dir: where to save on disk the Tensorflow graph. If store_graph - is True this must be non-empty. - losses_dict: optional dictionary of scalar detection losses. - - Returns: - metrics: A dictionary containing metric names and values in the latest - evaluation. - - Raises: - ValueError: if max_num_of_evaluations is not None or a positive number. - ValueError: if checkpoint_dirs doesn't have at least one element. - """ - if max_number_of_evaluations and max_number_of_evaluations <= 0: - raise ValueError( - '`number_of_steps` must be either None or a positive number.') - - if not checkpoint_dirs: - raise ValueError('`checkpoint_dirs` must have at least one entry.') - - last_evaluated_model_path = None - number_of_evaluations = 0 - while True: - start = time.time() - logging.info('Starting evaluation at ' + time.strftime( - '%Y-%m-%d-%H:%M:%S', time.gmtime())) - model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) - if not model_path: - logging.info('No model found in %s. Will try again in %d seconds', - checkpoint_dirs[0], eval_interval_secs) - elif model_path == last_evaluated_model_path: - logging.info('Found already evaluated checkpoint. Will try again in %d ' - 'seconds', eval_interval_secs) - else: - last_evaluated_model_path = model_path - global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, - batch_processor, - checkpoint_dirs, - variables_to_restore, - restore_fn, num_batches, - master, save_graph, - save_graph_dir, - losses_dict=losses_dict) - write_metrics(metrics, global_step, summary_dir) - number_of_evaluations += 1 - - if (max_number_of_evaluations and - number_of_evaluations >= max_number_of_evaluations): - logging.info('Finished evaluation!') - break - time_to_next_eval = start + eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - return metrics + """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. + + This function repeatedly loads a checkpoint and evaluates a desired + set of tensors (provided by tensor_dict) and hands the resulting numpy + arrays to a function result_processor which can be used to further + process/save/visualize the results. + + Args: + tensor_dict: a dictionary holding tensors representing a batch of detections + and corresponding groundtruth annotations. + summary_dir: a directory to write metrics summaries. + evaluators: a list of object of type DetectionEvaluator to be used for + evaluation. Note that the metric names produced by different evaluators + must be unique. + batch_processor: a function taking three arguments: + 1. tensor_dict: the same tensor_dict that is passed in as the first + argument to this function. + 2. sess: a tensorflow session + 3. batch_index: an integer representing the index of the batch amongst + all batches + By default, batch_processor is None, which defaults to running: + return sess.run(tensor_dict) + checkpoint_dirs: list of directories to load into a DetectionModel or an + EnsembleModel if restore_fn isn't set. Also used to determine when to run + next evaluation. Must have at least one element. + variables_to_restore: None, or a dictionary mapping variable names found in + a checkpoint to model variables. The dictionary would normally be + generated by creating a tf.train.ExponentialMovingAverage object and + calling its variables_to_restore() method. Not used if restore_fn is set. + restore_fn: a function that takes a tf.Session object and correctly restores + all necessary variables from the correct checkpoint file. + num_batches: the number of batches to use for evaluation. + eval_interval_secs: the number of seconds between each evaluation run. + max_number_of_evaluations: the max number of iterations of the evaluation. + If the value is left as None the evaluation continues indefinitely. + master: the location of the Tensorflow session. + save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. + save_graph_dir: where to save on disk the Tensorflow graph. If store_graph + is True this must be non-empty. + losses_dict: optional dictionary of scalar detection losses. + + Returns: + metrics: A dictionary containing metric names and values in the latest + evaluation. + + Raises: + ValueError: if max_num_of_evaluations is not None or a positive number. + ValueError: if checkpoint_dirs doesn't have at least one element. + """ + if max_number_of_evaluations and max_number_of_evaluations <= 0: + raise ValueError( + '`number_of_steps` must be either None or a positive number.') + + if not checkpoint_dirs: + raise ValueError('`checkpoint_dirs` must have at least one entry.') + + last_evaluated_model_path = None + number_of_evaluations = 0 + while True: + start = time.time() + logging.info('Starting evaluation at ' + time.strftime( + '%Y-%m-%d-%H:%M:%S', time.gmtime())) + model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) + if not model_path: + logging.info('No model found in %s. Will try again in %d seconds', + checkpoint_dirs[0], eval_interval_secs) + elif model_path == last_evaluated_model_path: + logging.info('Found already evaluated checkpoint. Will try again in %d ' + 'seconds', eval_interval_secs) + else: + last_evaluated_model_path = model_path + global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, + batch_processor, + checkpoint_dirs, + variables_to_restore, + restore_fn, num_batches, + master, save_graph, + save_graph_dir, + losses_dict=losses_dict) + write_metrics(metrics, global_step, summary_dir) + number_of_evaluations += 1 + + if (max_number_of_evaluations and + number_of_evaluations >= max_number_of_evaluations): + logging.info('Finished evaluation!') + break + time_to_next_eval = start + eval_interval_secs - time.time() + if time_to_next_eval > 0: + time.sleep(time_to_next_eval) + + return metrics def result_dict_for_single_example(image, @@ -481,195 +479,195 @@ def result_dict_for_single_example(image, groundtruth=None, class_agnostic=False, scale_to_absolute=False): - """Merges all detection and groundtruth information for a single example. - - Note that evaluation tools require classes that are 1-indexed, and so this - function performs the offset. If `class_agnostic` is True, all output classes - have label 1. - - Args: - image: A single 4D uint8 image tensor of shape [1, H, W, C]. - key: A single string tensor identifying the image. - detections: A dictionary of detections, returned from - DetectionModel.postprocess(). - groundtruth: (Optional) Dictionary of groundtruth items, with fields: - 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized coordinates. - 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) - 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) - 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) - 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) - 'groundtruth_instance_masks': 3D int64 tensor of instance masks - (Optional). - class_agnostic: Boolean indicating whether the detections are class-agnostic - (i.e. binary). Default False. - scale_to_absolute: Boolean indicating whether boxes and keypoints should be - scaled to absolute coordinates. Note that for IoU based evaluations, it - does not matter whether boxes are expressed in absolute or relative - coordinates. Default False. - - Returns: - A dictionary with: - 'original_image': A [1, H, W, C] uint8 image tensor. - 'key': A string tensor with image identifier. - 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. - 'detection_scores': [max_detections] float32 tensor of scores. - 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. - 'detection_masks': [max_detections, H, W] float32 tensor of binarized - masks, reframed to full image masks. + """Merges all detection and groundtruth information for a single example. + + Note that evaluation tools require classes that are 1-indexed, and so this + function performs the offset. If `class_agnostic` is True, all output classes + have label 1. + + Args: + image: A single 4D uint8 image tensor of shape [1, H, W, C]. + key: A single string tensor identifying the image. + detections: A dictionary of detections, returned from + DetectionModel.postprocess(). + groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in - normalized or absolute coordinates, depending on the value of - `scale_to_absolute`. (Optional) + normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. - (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). - - """ - label_id_offset = 1 # Applying label id offset (b/63711816) - - input_data_fields = fields.InputDataFields - output_dict = { - input_data_fields.original_image: image, - input_data_fields.key: key, - } - - detection_fields = fields.DetectionResultFields - detection_boxes = detections[detection_fields.detection_boxes][0] - image_shape = tf.shape(image) - detection_scores = detections[detection_fields.detection_scores][0] - + class_agnostic: Boolean indicating whether the detections are class-agnostic + (i.e. binary). Default False. + scale_to_absolute: Boolean indicating whether boxes and keypoints should be + scaled to absolute coordinates. Note that for IoU based evaluations, it + does not matter whether boxes are expressed in absolute or relative + coordinates. Default False. + + Returns: + A dictionary with: + 'original_image': A [1, H, W, C] uint8 image tensor. + 'key': A string tensor with image identifier. + 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in + normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. + 'detection_scores': [max_detections] float32 tensor of scores. + 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. + 'detection_masks': [max_detections, H, W] float32 tensor of binarized + masks, reframed to full image masks. + 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in + normalized or absolute coordinates, depending on the value of + `scale_to_absolute`. (Optional) + 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. + (Optional) + 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) + 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) + 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) + 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) + 'groundtruth_instance_masks': 3D int64 tensor of instance masks + (Optional). + + """ + label_id_offset = 1 # Applying label id offset (b/63711816) + + input_data_fields = fields.InputDataFields + output_dict = { + input_data_fields.original_image: image, + input_data_fields.key: key, + } + + detection_fields = fields.DetectionResultFields + detection_boxes = detections[detection_fields.detection_boxes][0] + image_shape = tf.shape(input=image) + detection_scores = detections[detection_fields.detection_scores][0] + + if class_agnostic: + detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) + else: + detection_classes = ( + tf.cast(detections[detection_fields.detection_classes][0], dtype=tf.int64) + + label_id_offset) + + num_detections = tf.cast(detections[detection_fields.num_detections][0], dtype=tf.int32) + detection_boxes = tf.slice( + detection_boxes, begin=[0, 0], size=[num_detections, -1]) + detection_classes = tf.slice( + detection_classes, begin=[0], size=[num_detections]) + detection_scores = tf.slice( + detection_scores, begin=[0], size=[num_detections]) + + if scale_to_absolute: + absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( + box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) + output_dict[detection_fields.detection_boxes] = ( + absolute_detection_boxlist.get()) + else: + output_dict[detection_fields.detection_boxes] = detection_boxes + output_dict[detection_fields.detection_classes] = detection_classes + output_dict[detection_fields.detection_scores] = detection_scores + + if detection_fields.detection_masks in detections: + detection_masks = detections[detection_fields.detection_masks][0] + # TODO(rathodv): This should be done in model's postprocess + # function ideally. + detection_masks = tf.slice( + detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) + detection_masks_reframed = ops.reframe_box_masks_to_image_masks( + detection_masks, detection_boxes, image_shape[1], image_shape[2]) + detection_masks_reframed = tf.cast( + tf.greater(detection_masks_reframed, 0.5), tf.uint8) + output_dict[detection_fields.detection_masks] = detection_masks_reframed + if detection_fields.detection_keypoints in detections: + detection_keypoints = detections[detection_fields.detection_keypoints][0] + output_dict[detection_fields.detection_keypoints] = detection_keypoints + if scale_to_absolute: + absolute_detection_keypoints = keypoint_ops.scale( + detection_keypoints, image_shape[1], image_shape[2]) + output_dict[detection_fields.detection_keypoints] = ( + absolute_detection_keypoints) + + if groundtruth: + if input_data_fields.groundtruth_instance_masks in groundtruth: + groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( + groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) + output_dict.update(groundtruth) + if scale_to_absolute: + groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] + absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( + box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) + output_dict[input_data_fields.groundtruth_boxes] = ( + absolute_gt_boxlist.get()) + # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: - detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) - else: - detection_classes = ( - tf.to_int64(detections[detection_fields.detection_classes][0]) + - label_id_offset) - - num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) - detection_boxes = tf.slice( - detection_boxes, begin=[0, 0], size=[num_detections, -1]) - detection_classes = tf.slice( - detection_classes, begin=[0], size=[num_detections]) - detection_scores = tf.slice( - detection_scores, begin=[0], size=[num_detections]) + groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] + groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) + output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes - if scale_to_absolute: - absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_boxes] = ( - absolute_detection_boxlist.get()) - else: - output_dict[detection_fields.detection_boxes] = detection_boxes - output_dict[detection_fields.detection_classes] = detection_classes - output_dict[detection_fields.detection_scores] = detection_scores - - if detection_fields.detection_masks in detections: - detection_masks = detections[detection_fields.detection_masks][0] - # TODO(rathodv): This should be done in model's postprocess - # function ideally. - detection_masks = tf.slice( - detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) - detection_masks_reframed = ops.reframe_box_masks_to_image_masks( - detection_masks, detection_boxes, image_shape[1], image_shape[2]) - detection_masks_reframed = tf.cast( - tf.greater(detection_masks_reframed, 0.5), tf.uint8) - output_dict[detection_fields.detection_masks] = detection_masks_reframed - if detection_fields.detection_keypoints in detections: - detection_keypoints = detections[detection_fields.detection_keypoints][0] - output_dict[detection_fields.detection_keypoints] = detection_keypoints - if scale_to_absolute: - absolute_detection_keypoints = keypoint_ops.scale( - detection_keypoints, image_shape[1], image_shape[2]) - output_dict[detection_fields.detection_keypoints] = ( - absolute_detection_keypoints) - - if groundtruth: - if input_data_fields.groundtruth_instance_masks in groundtruth: - groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( - groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) - output_dict.update(groundtruth) - if scale_to_absolute: - groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] - absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( - box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) - output_dict[input_data_fields.groundtruth_boxes] = ( - absolute_gt_boxlist.get()) - # For class-agnostic models, groundtruth classes all become 1. - if class_agnostic: - groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] - groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) - output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes - - return output_dict + return output_dict def get_eval_metric_ops_for_evaluators(evaluation_metrics, categories, eval_dict, include_metrics_per_category=False): - """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. - - Args: - evaluation_metrics: List of evaluation metric names. Current options are - 'coco_detection_metrics' and 'coco_mask_metrics'. - categories: A list of dicts, each of which has the following keys - - 'id': (required) an integer id uniquely identifying this category. - 'name': (required) string representing category name e.g., 'cat', 'dog'. - eval_dict: An evaluation dictionary, returned from - result_dict_for_single_example(). - include_metrics_per_category: If True, include metrics for each category. - - Returns: - A dictionary of metric names to tuple of value_op and update_op that can be - used as eval metric ops in tf.EstimatorSpec. - - Raises: - ValueError: If any of the metrics in `evaluation_metric` is not - 'coco_detection_metrics' or 'coco_mask_metrics'. - """ - evaluation_metrics = list(set(evaluation_metrics)) - - input_data_fields = fields.InputDataFields - detection_fields = fields.DetectionResultFields - eval_metric_ops = {} - for metric in evaluation_metrics: - if metric == 'coco_detection_metrics': - coco_evaluator = coco_evaluation.CocoDetectionEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - detection_boxes=eval_dict[detection_fields.detection_boxes], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes])) - elif metric == 'coco_mask_metrics': - coco_mask_evaluator = coco_evaluation.CocoMaskEvaluator( - categories, include_metrics_per_category=include_metrics_per_category) - eval_metric_ops.update( - coco_mask_evaluator.get_estimator_eval_metric_ops( - image_id=eval_dict[input_data_fields.key], - groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], - groundtruth_classes=eval_dict[ - input_data_fields.groundtruth_classes], - groundtruth_instance_masks=eval_dict[ - input_data_fields.groundtruth_instance_masks], - detection_scores=eval_dict[detection_fields.detection_scores], - detection_classes=eval_dict[detection_fields.detection_classes], - detection_masks=eval_dict[detection_fields.detection_masks])) - else: - raise ValueError('The only evaluation metrics supported are ' - '"coco_detection_metrics" and "coco_mask_metrics". ' - 'Found {} in the evaluation metrics'.format(metric)) + """Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`. + + Args: + evaluation_metrics: List of evaluation metric names. Current options are + 'coco_detection_metrics' and 'coco_mask_metrics'. + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + eval_dict: An evaluation dictionary, returned from + result_dict_for_single_example(). + include_metrics_per_category: If True, include metrics for each category. + + Returns: + A dictionary of metric names to tuple of value_op and update_op that can be + used as eval metric ops in tf.EstimatorSpec. + + Raises: + ValueError: If any of the metrics in `evaluation_metric` is not + 'coco_detection_metrics' or 'coco_mask_metrics'. + """ + evaluation_metrics = list(set(evaluation_metrics)) + + input_data_fields = fields.InputDataFields + detection_fields = fields.DetectionResultFields + eval_metric_ops = {} + for metric in evaluation_metrics: + if metric == 'coco_detection_metrics': + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + categories, include_metrics_per_category=include_metrics_per_category) + eval_metric_ops.update( + coco_evaluator.get_estimator_eval_metric_ops( + image_id=eval_dict[input_data_fields.key], + groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], + groundtruth_classes=eval_dict[ + input_data_fields.groundtruth_classes], + detection_boxes=eval_dict[detection_fields.detection_boxes], + detection_scores=eval_dict[detection_fields.detection_scores], + detection_classes=eval_dict[detection_fields.detection_classes])) + elif metric == 'coco_mask_metrics': + coco_mask_evaluator = coco_evaluation.CocoMaskEvaluator( + categories, include_metrics_per_category=include_metrics_per_category) + eval_metric_ops.update( + coco_mask_evaluator.get_estimator_eval_metric_ops( + image_id=eval_dict[input_data_fields.key], + groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes], + groundtruth_classes=eval_dict[ + input_data_fields.groundtruth_classes], + groundtruth_instance_masks=eval_dict[ + input_data_fields.groundtruth_instance_masks], + detection_scores=eval_dict[detection_fields.detection_scores], + detection_classes=eval_dict[detection_fields.detection_classes], + detection_masks=eval_dict[detection_fields.detection_masks])) + else: + raise ValueError('The only evaluation metrics supported are ' + '"coco_detection_metrics" and "coco_mask_metrics". ' + 'Found {} in the evaluation metrics'.format(metric)) - return eval_metric_ops + return eval_metric_ops diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/evaluator.py b/models/object_detection/tensorflow/rfcn/inference/fp32/evaluator.py index 0db392a65..61a2136ea 100644 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/evaluator.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/evaluator.py @@ -76,243 +76,242 @@ def _extract_predictions_and_losses(model, create_input_dict_fn, ignore_groundtruth=False): - """Constructs tensorflow detection graph and returns output tensors. - - Args: - model: model to perform predictions with. - create_input_dict_fn: function to create input tensor dictionaries. - ignore_groundtruth: whether groundtruth should be ignored. - - Returns: - prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed - by standard_fields.DetectionResultsFields) and optional groundtruth - tensors (keyed by standard_fields.InputDataFields). - losses_dict: A dictionary containing detection losses. This is empty when - ignore_groundtruth is true. - """ - input_dict = create_input_dict_fn() - prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) - input_dict = prefetch_queue.dequeue() - original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) - preprocessed_image, true_image_shapes = model.preprocess( - tf.to_float(original_image)) - prediction_dict = model.predict(preprocessed_image, true_image_shapes) - detections = model.postprocess(prediction_dict, true_image_shapes) - - groundtruth = None - losses_dict = {} - if not ignore_groundtruth: - groundtruth = { - fields.InputDataFields.groundtruth_boxes: - input_dict[fields.InputDataFields.groundtruth_boxes], - fields.InputDataFields.groundtruth_classes: - input_dict[fields.InputDataFields.groundtruth_classes], - fields.InputDataFields.groundtruth_area: - input_dict[fields.InputDataFields.groundtruth_area], - fields.InputDataFields.groundtruth_is_crowd: - input_dict[fields.InputDataFields.groundtruth_is_crowd], - fields.InputDataFields.groundtruth_difficult: - input_dict[fields.InputDataFields.groundtruth_difficult] - } - if fields.InputDataFields.groundtruth_group_of in input_dict: - groundtruth[fields.InputDataFields.groundtruth_group_of] = ( - input_dict[fields.InputDataFields.groundtruth_group_of]) - groundtruth_masks_list = None - if fields.DetectionResultFields.detection_masks in detections: - groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( - input_dict[fields.InputDataFields.groundtruth_instance_masks]) - groundtruth_masks_list = [ - input_dict[fields.InputDataFields.groundtruth_instance_masks]] - groundtruth_keypoints_list = None - if fields.DetectionResultFields.detection_keypoints in detections: - groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( - input_dict[fields.InputDataFields.groundtruth_keypoints]) - groundtruth_keypoints_list = [ - input_dict[fields.InputDataFields.groundtruth_keypoints]] - label_id_offset = 1 - model.provide_groundtruth( - [input_dict[fields.InputDataFields.groundtruth_boxes]], - [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes] - - label_id_offset, depth=model.num_classes)], - groundtruth_masks_list, groundtruth_keypoints_list) - losses_dict.update(model.loss(prediction_dict, true_image_shapes)) - - result_dict = eval_util.result_dict_for_single_example( - original_image, - input_dict[fields.InputDataFields.source_id], - detections, - groundtruth, - class_agnostic=( - fields.DetectionResultFields.detection_classes not in detections), - scale_to_absolute=True) - return result_dict, losses_dict + """Constructs tensorflow detection graph and returns output tensors. + + Args: + model: model to perform predictions with. + create_input_dict_fn: function to create input tensor dictionaries. + ignore_groundtruth: whether groundtruth should be ignored. + + Returns: + prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed + by standard_fields.DetectionResultsFields) and optional groundtruth + tensors (keyed by standard_fields.InputDataFields). + losses_dict: A dictionary containing detection losses. This is empty when + ignore_groundtruth is true. + """ + input_dict = create_input_dict_fn() + prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) + input_dict = prefetch_queue.dequeue() + original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) + preprocessed_image, true_image_shapes = model.preprocess( + tf.cast(original_image, dtype=tf.float32)) + prediction_dict = model.predict(preprocessed_image, true_image_shapes) + detections = model.postprocess(prediction_dict, true_image_shapes) + + groundtruth = None + losses_dict = {} + if not ignore_groundtruth: + groundtruth = { + fields.InputDataFields.groundtruth_boxes: + input_dict[fields.InputDataFields.groundtruth_boxes], + fields.InputDataFields.groundtruth_classes: + input_dict[fields.InputDataFields.groundtruth_classes], + fields.InputDataFields.groundtruth_area: + input_dict[fields.InputDataFields.groundtruth_area], + fields.InputDataFields.groundtruth_is_crowd: + input_dict[fields.InputDataFields.groundtruth_is_crowd], + fields.InputDataFields.groundtruth_difficult: + input_dict[fields.InputDataFields.groundtruth_difficult] + } + if fields.InputDataFields.groundtruth_group_of in input_dict: + groundtruth[fields.InputDataFields.groundtruth_group_of] = ( + input_dict[fields.InputDataFields.groundtruth_group_of]) + groundtruth_masks_list = None + if fields.DetectionResultFields.detection_masks in detections: + groundtruth[fields.InputDataFields.groundtruth_instance_masks] = ( + input_dict[fields.InputDataFields.groundtruth_instance_masks]) + groundtruth_masks_list = [ + input_dict[fields.InputDataFields.groundtruth_instance_masks]] + groundtruth_keypoints_list = None + if fields.DetectionResultFields.detection_keypoints in detections: + groundtruth[fields.InputDataFields.groundtruth_keypoints] = ( + input_dict[fields.InputDataFields.groundtruth_keypoints]) + groundtruth_keypoints_list = [ + input_dict[fields.InputDataFields.groundtruth_keypoints]] + label_id_offset = 1 + model.provide_groundtruth( + [input_dict[fields.InputDataFields.groundtruth_boxes]], + [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes] + - label_id_offset, depth=model.num_classes)], + groundtruth_masks_list, groundtruth_keypoints_list) + losses_dict.update(model.loss(prediction_dict, true_image_shapes)) + + result_dict = eval_util.result_dict_for_single_example( + original_image, + input_dict[fields.InputDataFields.source_id], + detections, + groundtruth, + class_agnostic=( + fields.DetectionResultFields.detection_classes not in detections), + scale_to_absolute=True) + return result_dict, losses_dict def get_evaluators(eval_config, categories): - """Returns the evaluator class according to eval_config, valid for categories. - - Args: - eval_config: evaluation configurations. - categories: a list of categories to evaluate. - Returns: - An list of instances of DetectionEvaluator. - - Raises: - ValueError: if metric is not in the metric class dictionary. - """ - eval_metric_fn_keys = eval_config.metrics_set - if not eval_metric_fn_keys: - eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] - evaluators_list = [] - for eval_metric_fn_key in eval_metric_fn_keys: - if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: - raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) - evaluators_list.append( - EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories)) - return evaluators_list + """Returns the evaluator class according to eval_config, valid for categories. + + Args: + eval_config: evaluation configurations. + categories: a list of categories to evaluate. + Returns: + An list of instances of DetectionEvaluator. + + Raises: + ValueError: if metric is not in the metric class dictionary. + """ + eval_metric_fn_keys = eval_config.metrics_set + if not eval_metric_fn_keys: + eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] + evaluators_list = [] + for eval_metric_fn_key in eval_metric_fn_keys: + if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: + raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) + evaluators_list.append( + EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories)) + return evaluators_list def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories, checkpoint_dir, eval_dir, graph_hook_fn=None, evaluator_list=None, intra_op=0, inter_op=0): - """Evaluation function for detection models. + """Evaluation function for detection models. + + Args: + create_input_dict_fn: a function to create a tensor input dictionary. + create_model_fn: a function that creates a DetectionModel. + eval_config: a eval_pb2.EvalConfig protobuf. + categories: a list of category dictionaries. Each dict in the list should + have an integer 'id' field and string 'name' field. + checkpoint_dir: directory to load the checkpoints to evaluate from. + eval_dir: directory to write evaluation metrics summary to. + graph_hook_fn: Optional function that is called after the training graph is + completely built. This is helpful to perform additional changes to the + training graph such as optimizing batchnorm. The function should modify + the default graph. + evaluator_list: Optional list of instances of DetectionEvaluator. If not + given, this list of metrics is created according to the eval_config. + + Returns: + metrics: A dictionary containing metric names and values from the latest + run. + """ + + model = create_model_fn() + + if eval_config.ignore_groundtruth and not eval_config.export_path: + logging.fatal('If ignore_groundtruth=True then an export_path is ' + 'required. Aborting!!!') + + tensor_dict, losses_dict = _extract_predictions_and_losses( + model=model, + create_input_dict_fn=create_input_dict_fn, + ignore_groundtruth=eval_config.ignore_groundtruth) + + def _process_batch(tensor_dict, sess, batch_index, counters, + losses_dict=None): + """Evaluates tensors in tensor_dict, losses_dict and visualizes examples. + + This function calls sess.run on tensor_dict, evaluating the original_image + tensor only on the first K examples and visualizing detections overlaid + on this original_image. Args: - create_input_dict_fn: a function to create a tensor input dictionary. - create_model_fn: a function that creates a DetectionModel. - eval_config: a eval_pb2.EvalConfig protobuf. - categories: a list of category dictionaries. Each dict in the list should - have an integer 'id' field and string 'name' field. - checkpoint_dir: directory to load the checkpoints to evaluate from. - eval_dir: directory to write evaluation metrics summary to. - graph_hook_fn: Optional function that is called after the training graph is - completely built. This is helpful to perform additional changes to the - training graph such as optimizing batchnorm. The function should modify - the default graph. - evaluator_list: Optional list of instances of DetectionEvaluator. If not - given, this list of metrics is created according to the eval_config. + tensor_dict: a dictionary of tensors + sess: tensorflow session + batch_index: the index of the batch amongst all batches in the run. + counters: a dictionary holding 'success' and 'skipped' fields which can + be updated to keep track of number of successful and failed runs, + respectively. If these fields are not updated, then the success/skipped + counter values shown at the end of evaluation will be incorrect. + losses_dict: Optional dictonary of scalar loss tensors. Returns: - metrics: A dictionary containing metric names and values from the latest - run. + result_dict: a dictionary of numpy arrays + result_losses_dict: a dictionary of scalar losses. This is empty if input + losses_dict is None. """ - - model = create_model_fn() - - if eval_config.ignore_groundtruth and not eval_config.export_path: - logging.fatal('If ignore_groundtruth=True then an export_path is ' - 'required. Aborting!!!') - - tensor_dict, losses_dict = _extract_predictions_and_losses( - model=model, - create_input_dict_fn=create_input_dict_fn, - ignore_groundtruth=eval_config.ignore_groundtruth) - - def _process_batch(tensor_dict, sess, batch_index, counters, - losses_dict=None): - """Evaluates tensors in tensor_dict, losses_dict and visualizes examples. - - This function calls sess.run on tensor_dict, evaluating the original_image - tensor only on the first K examples and visualizing detections overlaid - on this original_image. - - Args: - tensor_dict: a dictionary of tensors - sess: tensorflow session - batch_index: the index of the batch amongst all batches in the run. - counters: a dictionary holding 'success' and 'skipped' fields which can - be updated to keep track of number of successful and failed runs, - respectively. If these fields are not updated, then the success/skipped - counter values shown at the end of evaluation will be incorrect. - losses_dict: Optional dictonary of scalar loss tensors. - - Returns: - result_dict: a dictionary of numpy arrays - result_losses_dict: a dictionary of scalar losses. This is empty if input - losses_dict is None. - """ - try: - if not losses_dict: - losses_dict = {} - trace = False - if batch_index == 0 and trace: - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() - else: - run_options = None - run_metadata = None - start_time = time.time() - result_dict, result_losses_dict = sess.run( - [tensor_dict, losses_dict], options=run_options, run_metadata=run_metadata) - if (batch_index % 100 == 0): - logging.info('Step %d: %.3f sec', batch_index, time.time() - start_time) - if batch_index == 0 and trace: - trace = timeline.Timeline(step_stats=run_metadata.step_stats) - dir = 'logs' - if not os.path.exists(dir): - os.makedirs(dir) - with open(dir + '/rfcn-timeline-' + time.strftime("%Y%m%d-%H%M%S") + '.json', 'w') as file: - file.write(trace.generate_chrome_trace_format(show_memory=False)) - counters['success'] += 1 - except tf.errors.InvalidArgumentError: - logging.info('Skipping image') - counters['skipped'] += 1 - return {}, {} - global_step = tf.train.global_step(sess, tf.train.get_global_step()) - if batch_index < eval_config.num_visualizations: - tag = 'image-{}'.format(batch_index) - eval_util.visualize_detection_results( - result_dict, - tag, - global_step, - categories=categories, - summary_dir=eval_dir, - export_dir=eval_config.visualization_export_dir, - show_groundtruth=eval_config.visualize_groundtruth_boxes, - groundtruth_box_visualization_color=eval_config. - groundtruth_box_visualization_color, - min_score_thresh=eval_config.min_score_threshold, - max_num_predictions=eval_config.max_num_boxes_to_visualize, - skip_scores=eval_config.skip_scores, - skip_labels=eval_config.skip_labels, - keep_image_id_for_visualization_export=eval_config. - keep_image_id_for_visualization_export) - return result_dict, result_losses_dict - - variables_to_restore = tf.global_variables() - global_step = tf.train.get_or_create_global_step() - variables_to_restore.append(global_step) - - if graph_hook_fn: - graph_hook_fn() - - if eval_config.use_moving_averages: - variable_averages = tf.train.ExponentialMovingAverage(0.0) - variables_to_restore = variable_averages.variables_to_restore() - saver = tf.train.Saver(variables_to_restore) - - def _restore_latest_checkpoint(sess): - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - saver.restore(sess, latest_checkpoint) - - if not evaluator_list: - evaluator_list = get_evaluators(eval_config, categories) - - metrics = eval_util.repeated_checkpoint_run( - tensor_dict=tensor_dict, - summary_dir=eval_dir, - evaluators=evaluator_list, - batch_processor=_process_batch, - checkpoint_dirs=[checkpoint_dir], - variables_to_restore=None, - restore_fn=_restore_latest_checkpoint, - num_batches=eval_config.num_examples, - eval_interval_secs=eval_config.eval_interval_secs, - max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else - eval_config.max_evals - if eval_config.max_evals else None), - master=eval_config.eval_master, - save_graph=eval_config.save_graph, - save_graph_dir=(eval_dir if eval_config.save_graph else ''), - losses_dict=losses_dict) - - return metrics + try: + if not losses_dict: + losses_dict = {} + trace = False + if batch_index == 0 and trace: + run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + run_metadata = tf.compat.v1.RunMetadata() + else: + run_options = None + run_metadata = None + start_time = time.time() + result_dict, result_losses_dict = sess.run( + [tensor_dict, losses_dict], options=run_options, run_metadata=run_metadata) + if (batch_index % 100 == 0): + logging.info('Step %d: %.3f sec', batch_index, time.time() - start_time) + if batch_index == 0 and trace: + trace = timeline.Timeline(step_stats=run_metadata.step_stats) + dir = 'logs' + if not os.path.exists(dir): + os.makedirs(dir) + with open(dir + '/rfcn-timeline-' + time.strftime("%Y%m%d-%H%M%S") + '.json', 'w') as file: + file.write(trace.generate_chrome_trace_format(show_memory=False)) + counters['success'] += 1 + except tf.errors.InvalidArgumentError: + logging.info('Skipping image') + counters['skipped'] += 1 + return {}, {} + global_step = tf.compat.v1.train.global_step(sess, tf.compat.v1.train.get_global_step()) + if batch_index < eval_config.num_visualizations: + tag = 'image-{}'.format(batch_index) + eval_util.visualize_detection_results( + result_dict, + tag, + global_step, + categories=categories, + summary_dir=eval_dir, + export_dir=eval_config.visualization_export_dir, + show_groundtruth=eval_config.visualize_groundtruth_boxes, + groundtruth_box_visualization_color=eval_config. + groundtruth_box_visualization_color, + min_score_thresh=eval_config.min_score_threshold, + max_num_predictions=eval_config.max_num_boxes_to_visualize, + skip_scores=eval_config.skip_scores, + skip_labels=eval_config.skip_labels, + keep_image_id_for_visualization_export=eval_config. + keep_image_id_for_visualization_export) + return result_dict, result_losses_dict + + variables_to_restore = tf.compat.v1.global_variables() + global_step = tf.compat.v1.train.get_or_create_global_step() + variables_to_restore.append(global_step) + + if graph_hook_fn: graph_hook_fn() + + if eval_config.use_moving_averages: + variable_averages = tf.train.ExponentialMovingAverage(0.0) + variables_to_restore = variable_averages.variables_to_restore() + saver = tf.compat.v1.train.Saver(variables_to_restore) + + def _restore_latest_checkpoint(sess): + latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) + saver.restore(sess, latest_checkpoint) + + if not evaluator_list: + evaluator_list = get_evaluators(eval_config, categories) + + metrics = eval_util.repeated_checkpoint_run( + tensor_dict=tensor_dict, + summary_dir=eval_dir, + evaluators=evaluator_list, + batch_processor=_process_batch, + checkpoint_dirs=[checkpoint_dir], + variables_to_restore=None, + restore_fn=_restore_latest_checkpoint, + num_batches=eval_config.num_examples, + eval_interval_secs=eval_config.eval_interval_secs, + max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else + eval_config.max_evals + if eval_config.max_evals else None), + master=eval_config.eval_master, + save_graph=eval_config.save_graph, + save_graph_dir=(eval_dir if eval_config.save_graph else ''), + losses_dict=losses_dict) + + return metrics diff --git a/models/object_detection/tensorflow/rfcn/inference/fp32/run_rfcn_inference.py b/models/object_detection/tensorflow/rfcn/inference/fp32/run_rfcn_inference.py old mode 100755 new mode 100644 index a953175cd..e3be1bb75 --- a/models/object_detection/tensorflow/rfcn/inference/fp32/run_rfcn_inference.py +++ b/models/object_detection/tensorflow/rfcn/inference/fp32/run_rfcn_inference.py @@ -15,11 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# import numpy as np import os +import six.moves.urllib as urllib import sys +import tarfile import tensorflow as tf +import zipfile +import subprocess + +from collections import defaultdict +from io import StringIO import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt @@ -29,7 +37,6 @@ from tensorflow.python.client import timeline import importlib - class RFCNRunner: '''Add code here to detect the environment and set necessary variables before launching the model''' args=None @@ -61,7 +68,7 @@ def __init__(self, args): def parse_args(self): parser = argparse.ArgumentParser() mutex_group = parser.add_mutually_exclusive_group() - mutex_group.add_argument('-x', '--steps', help='Run for n number of steps', type=int, default=None) + mutex_group.add_argument('-x', '--number_of_steps', help='Run for n number of steps', type=int, default=None) mutex_group.add_argument('-z', '--visualize', help='Whether to visulize the output image', action='store_true' ) parser.add_argument('-v', '--verbose', help='Print some useful info.', action='store_true' ) parser.add_argument('-t', '--timeline', help='Output file name for TF timeline', type=str, default=None) @@ -69,8 +76,8 @@ def parse_args(self): parser.add_argument('-p', '--print_accuracy', help='Print accuracy results', action='store_true') parser.add_argument('-g', '--input_graph', help='The input frozen graph pb file', dest='input_graph', required=True, default=None) parser.add_argument('-d', '--data_location', help='The location of the image data to be analyzed.', dest='data_location', default=None, required=True) - parser.add_argument('-m', '--tensorflow-models-path', - help='Path to the tensorflow-models directory (or clone of github.com/tensorflow/models', + parser.add_argument('-m', '--tensorflow-models-path', + help='Path to the tensorflow-models directory (or clone of github.com/tensorflow/models', dest='tf_models_path', default=None, required=True) parser.add_argument( '--num-inter-threads', dest='num_inter_threads', @@ -85,26 +92,25 @@ def parse_args(self): self.finish_import() def log(self, msg): - if self.args.verbose: - print(msg) + if self.args.verbose: print(msg) def validate_args(self): self.log('Validating Args...') self.research_dir = os.path.join(self.args.tf_models_path, self.RESEARCH_DIR) - if not ( self.args.data_location and + if not ( self.args.data_location and os.path.exists(os.path.join(self.args.data_location, self.TEST_IMG_FILE))): raise ValueError ("Unable to locate images for evaluation at {}".format(self.args.data_location)) if os.path.isdir(self.research_dir): # List of the strings that is used to add correct label for each box. - self.label_map_file = os.path.join(self.research_dir, - self.OBJ_DETECTION_DIR, - self.DATA_DIR, + self.label_map_file = os.path.join(self.research_dir, + self.OBJ_DETECTION_DIR, + self.DATA_DIR, self.label_map_file) if not os.path.exists(self.label_map_file): raise ValueError ("Unable to locate label map file at {}".format(self.label_map_file)) else: raise ValueError ("{} is not a valid path to the TensorFlow models.".format(self.args.tf_models_path)) - + if not os.path.exists(self.args.input_graph): raise ValueError("Unable to find the input graph protobuf file: {}".format(self.args.input_graph)) @@ -121,17 +127,16 @@ def finish_import(self): def run(self): self.log("Running performance test") self.read_graph() - self.load_label_map() self.get_image_paths() - self.load_label_map() + #self.load_label_map() # Actual detection. output_dict, image_np = self.run_inference(self.detection_graph) self.visualize(output_dict, image_np) def visualize(self, output_dict, image_np): # Visualization of the results of a detection. - if (self.args.visualize and - self.args.evaluate_tensor is None and + if (self.args.visualize and + self.args.evaluate_tensor is None and self.category_index and output_dict and image_np ): @@ -150,15 +155,15 @@ def visualize(self, output_dict, image_np): def read_graph(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(self.args.input_graph, 'rb') as fid: + od_graph_def = tf.compat.v1.GraphDef() + with tf.io.gfile.GFile(self.args.input_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') def get_image_paths(self): - if self.args.visualize: + if (self.args.visualize): self.test_image_paths = [os.path.join(self.args.data_location, self.TEST_IMG_FILE)] else: self.test_image_paths = [] @@ -170,7 +175,7 @@ def load_label_map(self): label_map = self.label_map_util.load_labelmap(self.label_map_file) categories = self.label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) - if self.args.visualize and self.args.evaluate_tensor is None: + if (self.args.visualize and self.args.evaluate_tensor is None): self.category_index = self.label_map_util.create_category_index(categories) def load_image_into_numpy_array(self, image): @@ -184,23 +189,23 @@ def load_image_into_numpy_array(self, image): (im_height, im_width, 3)).astype(np.uint8) def run_inference(self,graph): - sess_config = tf.ConfigProto() + sess_config = tf.compat.v1.ConfigProto() sess_config.intra_op_parallelism_threads = self.args.num_intra_threads sess_config.inter_op_parallelism_threads = self.args.num_inter_threads with self.detection_graph.as_default(): - with tf.Session(config=sess_config) as sess: + with tf.compat.v1.Session(config=sess_config) as sess: # Get handles to input and output tensors tensor_dict = {} if not self.args.evaluate_tensor: - ops = tf.get_default_graph().get_operations() + ops = tf.compat.v1.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} for key in self.RFCN_OUTPUTS: tensor_name = key + ':0' if tensor_name in all_tensor_names: - tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( + tensor_dict[key] = tf.compat.v1.get_default_graph().get_tensor_by_name( tensor_name) else: - our_op = tf.get_default_graph().get_operation_by_name(self.args.evaluate_tensor) + our_op = tf.compat.v1.get_default_graph().get_operation_by_name(self.args.evaluate_tensor) tensor_names = our_op.outputs list_ops = [] for i, tensor in enumerate(tensor_names): @@ -210,8 +215,8 @@ def run_inference(self,graph): run_options = None run_metadata = None if self.args.timeline: - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() + run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + run_metadata = tf.compat.v1.RunMetadata() total_duration = 0 for index, image_path in enumerate(self.test_image_paths): @@ -219,7 +224,7 @@ def run_inference(self,graph): # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = self.load_image_into_numpy_array(image) - image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') + image_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference start_time = time.time() @@ -235,16 +240,16 @@ def run_inference(self,graph): if (self.args.visualize): if index == 0: - print('Avg. Duration per Step:' + str(total_duration / 1)) + print ('Avg. Duration per Step:' + str(total_duration / 1)) else: if (index % self.STEP_SIZE == 0): - print('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') + print ('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') if index == self.MAX_STEPS - 1: - print('Avg. Duration per Step:' + str(total_duration / self.MAX_STEPS)) + print ('Avg. Duration per Step:' + str(total_duration / self.MAX_STEPS)) - if self.args.steps and index == (self.args.steps - 1): - print('Avg. Duration per Step:' + - str(total_duration / self.args.steps)) + if self.args.number_of_steps and index == (self.args.number_of_steps - 1): + print ('Avg. Duration per Step:' + + str(total_duration / self.args.number_of_steps)) break if self.args.timeline: @@ -254,7 +259,7 @@ def run_inference(self,graph): if self.args.evaluate_tensor: for tensor in output_dict[self.args.evaluate_tensor]: - print(tensor.shape) + print (tensor.shape) return None, None # all outputs are float32 numpy arrays, so convert types as appropriate @@ -264,17 +269,16 @@ def run_inference(self,graph): output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] - if self.args.print_accuracy: - print('num_detections:\n' + str(output_dict['num_detections'])) - print('detection_classes:\n' + str(output_dict['detection_classes'])) - print('detection_boxes:\n' + str(output_dict['detection_boxes'])) - print('detection_scores:\n' + str(output_dict['detection_scores'])) + if (self.args.print_accuracy): + print ('num_detections:\n' + str(output_dict['num_detections'])) + print ('detection_classes:\n' + str(output_dict['detection_classes'])) + print ('detection_boxes:\n' + str(output_dict['detection_boxes'])) + print ('detection_scores:\n' + str(output_dict['detection_scores'])) if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict, image_np - if __name__ == "__main__": rr = RFCNRunner(sys.argv) rr.run() diff --git a/models/object_detection/tensorflow/rfcn/inference/int8/__init__.py b/models/object_detection/tensorflow/rfcn/inference/int8/__init__.py index 8cb0c8d8d..c4fdb7d61 100644 --- a/models/object_detection/tensorflow/rfcn/inference/int8/__init__.py +++ b/models/object_detection/tensorflow/rfcn/inference/int8/__init__.py @@ -15,3 +15,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +# diff --git a/models/object_detection/tensorflow/rfcn/inference/int8/coco_mAP.sh b/models/object_detection/tensorflow/rfcn/inference/int8/coco_mAP.sh index fabd224c1..94b4d8b05 100755 --- a/models/object_detection/tensorflow/rfcn/inference/int8/coco_mAP.sh +++ b/models/object_detection/tensorflow/rfcn/inference/int8/coco_mAP.sh @@ -17,14 +17,19 @@ # limitations under the License. # -FROZEN_GRAPH=$1 -TF_RECORD_FILE=$2 -TF_MODELS_ROOT=$3 -SPLIT=$4 +# + +########## Variables to be defined -SPLIT="rfcn-${SPLIT}" +SPLIT=${SPLIT:-"RFCN_final_graph"} #change to your favorite room +FROZEN_GRAPH=${FROZEN_GRAPH:-"/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb"} +TF_RECORD_FILE=${TF_RECORD_FILE:-"/dataset/coco_val.record"} +if [[ -z ${TF_MODELS_ROOT} ]] || [[ ! -d ${TF_MODELS_ROOT} ]]; then + echo "You must specify the root of the tensorflow/models source tree in the TF_MODELS_ROOT environment variable." + exit 1 +fi -export PYTHONPATH=${PYTHONPATH}:${TF_MODELS_ROOT}/research:${TF_MODELS_ROOT}/research/object_detection +export PYTHONPATH=$PYTHONPATH:${TF_MODELS_ROOT}/research:${TF_MODELS_ROOT}/research/slim:${TF_MODELS_ROOT}/research/object_detection echo "SPLIT=${SPLIT}" echo "FROZEN_GRAPH=${FROZEN_GRAPH}" @@ -33,10 +38,10 @@ echo "PYTHONPATH=${PYTHONPATH}" echo "TF_MODELS_ROOT=$TF_MODELS_ROOT" python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=${TF_RECORD_FILE} \ + --input_tfrecord_paths=$TF_RECORD_FILE \ --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=${FROZEN_GRAPH} \ - --discard_image_pixels=True + --inference_graph=$FROZEN_GRAPH \ + --discard_image_pixels mkdir -p ${SPLIT}_eval_metrics diff --git a/models/object_detection/tensorflow/rfcn/inference/int8/run_rfcn_inference.py b/models/object_detection/tensorflow/rfcn/inference/int8/run_rfcn_inference.py old mode 100755 new mode 100644 index a953175cd..e2ff30c20 --- a/models/object_detection/tensorflow/rfcn/inference/int8/run_rfcn_inference.py +++ b/models/object_detection/tensorflow/rfcn/inference/int8/run_rfcn_inference.py @@ -15,11 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# import numpy as np import os +import six.moves.urllib as urllib import sys +import tarfile import tensorflow as tf +import zipfile +import subprocess + +from collections import defaultdict +from io import StringIO import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt @@ -29,7 +37,6 @@ from tensorflow.python.client import timeline import importlib - class RFCNRunner: '''Add code here to detect the environment and set necessary variables before launching the model''' args=None @@ -61,7 +68,7 @@ def __init__(self, args): def parse_args(self): parser = argparse.ArgumentParser() mutex_group = parser.add_mutually_exclusive_group() - mutex_group.add_argument('-x', '--steps', help='Run for n number of steps', type=int, default=None) + mutex_group.add_argument('-x', '--number_of_steps', help='Run for n number of steps', type=int, default=None) mutex_group.add_argument('-z', '--visualize', help='Whether to visulize the output image', action='store_true' ) parser.add_argument('-v', '--verbose', help='Print some useful info.', action='store_true' ) parser.add_argument('-t', '--timeline', help='Output file name for TF timeline', type=str, default=None) @@ -85,8 +92,7 @@ def parse_args(self): self.finish_import() def log(self, msg): - if self.args.verbose: - print(msg) + if self.args.verbose: print(msg) def validate_args(self): self.log('Validating Args...') @@ -121,9 +127,8 @@ def finish_import(self): def run(self): self.log("Running performance test") self.read_graph() - self.load_label_map() self.get_image_paths() - self.load_label_map() + #self.load_label_map() # Actual detection. output_dict, image_np = self.run_inference(self.detection_graph) self.visualize(output_dict, image_np) @@ -150,15 +155,15 @@ def visualize(self, output_dict, image_np): def read_graph(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(self.args.input_graph, 'rb') as fid: + od_graph_def = tf.compat.v1.GraphDef() + with tf.io.gfile.GFile(self.args.input_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') def get_image_paths(self): - if self.args.visualize: + if (self.args.visualize): self.test_image_paths = [os.path.join(self.args.data_location, self.TEST_IMG_FILE)] else: self.test_image_paths = [] @@ -170,7 +175,7 @@ def load_label_map(self): label_map = self.label_map_util.load_labelmap(self.label_map_file) categories = self.label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) - if self.args.visualize and self.args.evaluate_tensor is None: + if (self.args.visualize and self.args.evaluate_tensor is None): self.category_index = self.label_map_util.create_category_index(categories) def load_image_into_numpy_array(self, image): @@ -184,23 +189,23 @@ def load_image_into_numpy_array(self, image): (im_height, im_width, 3)).astype(np.uint8) def run_inference(self,graph): - sess_config = tf.ConfigProto() + sess_config = tf.compat.v1.ConfigProto() sess_config.intra_op_parallelism_threads = self.args.num_intra_threads sess_config.inter_op_parallelism_threads = self.args.num_inter_threads with self.detection_graph.as_default(): - with tf.Session(config=sess_config) as sess: + with tf.compat.v1.Session(config=sess_config) as sess: # Get handles to input and output tensors tensor_dict = {} if not self.args.evaluate_tensor: - ops = tf.get_default_graph().get_operations() + ops = tf.compat.v1.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} for key in self.RFCN_OUTPUTS: tensor_name = key + ':0' if tensor_name in all_tensor_names: - tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( + tensor_dict[key] = tf.compat.v1.get_default_graph().get_tensor_by_name( tensor_name) else: - our_op = tf.get_default_graph().get_operation_by_name(self.args.evaluate_tensor) + our_op = tf.compat.v1.get_default_graph().get_operation_by_name(self.args.evaluate_tensor) tensor_names = our_op.outputs list_ops = [] for i, tensor in enumerate(tensor_names): @@ -210,8 +215,8 @@ def run_inference(self,graph): run_options = None run_metadata = None if self.args.timeline: - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() + run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + run_metadata = tf.compat.v1.RunMetadata() total_duration = 0 for index, image_path in enumerate(self.test_image_paths): @@ -219,7 +224,7 @@ def run_inference(self,graph): # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = self.load_image_into_numpy_array(image) - image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') + image_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference start_time = time.time() @@ -235,16 +240,16 @@ def run_inference(self,graph): if (self.args.visualize): if index == 0: - print('Avg. Duration per Step:' + str(total_duration / 1)) + print ('Avg. Duration per Step:' + str(total_duration / 1)) else: if (index % self.STEP_SIZE == 0): - print('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') + print ('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') if index == self.MAX_STEPS - 1: - print('Avg. Duration per Step:' + str(total_duration / self.MAX_STEPS)) + print ('Avg. Duration per Step:' + str(total_duration / self.MAX_STEPS)) - if self.args.steps and index == (self.args.steps - 1): - print('Avg. Duration per Step:' + - str(total_duration / self.args.steps)) + if self.args.number_of_steps and index == (self.args.number_of_steps - 1): + print ('Avg. Duration per Step:' + + str(total_duration / self.args.number_of_steps)) break if self.args.timeline: @@ -254,7 +259,7 @@ def run_inference(self,graph): if self.args.evaluate_tensor: for tensor in output_dict[self.args.evaluate_tensor]: - print(tensor.shape) + print (tensor.shape) return None, None # all outputs are float32 numpy arrays, so convert types as appropriate @@ -264,17 +269,16 @@ def run_inference(self,graph): output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] - if self.args.print_accuracy: - print('num_detections:\n' + str(output_dict['num_detections'])) - print('detection_classes:\n' + str(output_dict['detection_classes'])) - print('detection_boxes:\n' + str(output_dict['detection_boxes'])) - print('detection_scores:\n' + str(output_dict['detection_scores'])) + if (self.args.print_accuracy): + print ('num_detections:\n' + str(output_dict['num_detections'])) + print ('detection_classes:\n' + str(output_dict['detection_classes'])) + print ('detection_boxes:\n' + str(output_dict['detection_boxes'])) + print ('detection_scores:\n' + str(output_dict['detection_scores'])) if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict, image_np - if __name__ == "__main__": rr = RFCNRunner(sys.argv) rr.run() diff --git a/models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch b/models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch new file mode 100644 index 000000000..adb5c9566 --- /dev/null +++ b/models/object_detection/tensorflow/rfcn/inference/tf-2.0.patch @@ -0,0 +1,617 @@ +diff --git a/research/object_detection/eval_util.py b/research/object_detection/eval_util.py +index 51c29455..e3970e4f 100644 +--- a/research/object_detection/eval_util.py ++++ b/research/object_detection/eval_util.py +@@ -30,8 +30,6 @@ from object_detection.utils import label_map_util + from object_detection.utils import ops + from object_detection.utils import visualization_utils as vis_utils + +-slim = tf.contrib.slim +- + + def write_metrics(metrics, global_step, summary_dir): + """Write metrics to a summary directory. +@@ -42,10 +40,10 @@ def write_metrics(metrics, global_step, summary_dir): + summary_dir: Directory to write tensorflow summaries to. + """ + logging.info('Writing metrics to tf summary.') +- summary_writer = tf.summary.FileWriterCache.get(summary_dir) ++ summary_writer = tf.compat.v1.summary.FileWriterCache.get(summary_dir) + for key in sorted(metrics): +- summary = tf.Summary(value=[ +- tf.Summary.Value(tag=key, simple_value=metrics[key]), ++ summary = tf.compat.v1.Summary(value=[ ++ tf.compat.v1.Summary.Value(tag=key, simple_value=metrics[key]), + ]) + summary_writer.add_summary(summary, global_step) + logging.info('%s: %f', key, metrics[key]) +@@ -183,14 +181,14 @@ def visualize_detection_results(result_dict, + export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) + vis_utils.save_image_array_as_png(image, export_path) + +- summary = tf.Summary(value=[ +- tf.Summary.Value( ++ summary = tf.compat.v1.Summary(value=[ ++ tf.compat.v1.Summary.Value( + tag=tag, +- image=tf.Summary.Image( ++ image=tf.compat.v1.Summary.Image( + encoded_image_string=vis_utils.encode_image_array_as_png_str( + image))) + ]) +- summary_writer = tf.summary.FileWriterCache.get(summary_dir) ++ summary_writer = tf.compat.v1.summary.FileWriterCache.get(summary_dir) + summary_writer.add_summary(summary, global_step) + + logging.info('Detection visualizations written to summary with tag %s.', tag) +@@ -258,25 +256,25 @@ def _run_checkpoint_once(tensor_dict, + """ + if save_graph and not save_graph_dir: + raise ValueError('`save_graph_dir` must be defined.') +- sess = tf.Session(master, graph=tf.get_default_graph()) +- sess.run(tf.global_variables_initializer()) +- sess.run(tf.local_variables_initializer()) +- sess.run(tf.tables_initializer()) ++ sess = tf.compat.v1.Session(master, graph=tf.compat.v1.get_default_graph()) ++ sess.run(tf.compat.v1.global_variables_initializer()) ++ sess.run(tf.compat.v1.local_variables_initializer()) ++ sess.run(tf.compat.v1.tables_initializer()) + if restore_fn: + restore_fn(sess) + else: + if not checkpoint_dirs: + raise ValueError('`checkpoint_dirs` must have at least one entry.') + checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) +- saver = tf.train.Saver(variables_to_restore) ++ saver = tf.compat.v1.train.Saver(variables_to_restore) + saver.restore(sess, checkpoint_file) + + if save_graph: +- tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') ++ tf.io.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') + + counters = {'skipped': 0, 'success': 0} + aggregate_result_losses_dict = collections.defaultdict(list) +- with tf.contrib.slim.queues.QueueRunners(sess): ++ with queues.QueueRunners(sess): + try: + for batch in range(int(num_batches)): + if (batch + 1) % 100 == 0: +@@ -322,7 +320,7 @@ def _run_checkpoint_once(tensor_dict, + if any(key in all_evaluator_metrics for key in metrics): + raise ValueError('Metric names between evaluators must not collide.') + all_evaluator_metrics.update(metrics) +- global_step = tf.train.global_step(sess, tf.train.get_global_step()) ++ global_step = tf.compat.v1.train.global_step(sess, tf.compat.v1.train.get_global_step()) + + for key, value in iter(aggregate_result_losses_dict.items()): + all_evaluator_metrics['Losses/' + key] = np.mean(value) +@@ -506,17 +504,17 @@ def result_dict_for_single_example(image, + + detection_fields = fields.DetectionResultFields + detection_boxes = detections[detection_fields.detection_boxes][0] +- image_shape = tf.shape(image) ++ image_shape = tf.shape(input=image) + detection_scores = detections[detection_fields.detection_scores][0] + + if class_agnostic: + detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) + else: + detection_classes = ( +- tf.to_int64(detections[detection_fields.detection_classes][0]) + ++ tf.cast(detections[detection_fields.detection_classes][0], dtype=tf.int64) + + label_id_offset) + +- num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) ++ num_detections = tf.cast(detections[detection_fields.num_detections][0], dtype=tf.int32) + detection_boxes = tf.slice( + detection_boxes, begin=[0, 0], size=[num_detections, -1]) + detection_classes = tf.slice( +@@ -558,11 +556,10 @@ def result_dict_for_single_example(image, + if input_data_fields.groundtruth_instance_masks in groundtruth: + masks = groundtruth[input_data_fields.groundtruth_instance_masks] + masks = tf.expand_dims(masks, 3) +- masks = tf.image.resize_images( ++ masks = tf.image.resize( + masks, + image_shape[1:3], +- method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, +- align_corners=True) ++ method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + masks = tf.squeeze(masks, 3) + groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( + masks, tf.uint8) +diff --git a/research/object_detection/inference/detection_inference.py b/research/object_detection/inference/detection_inference.py +index dc66686f..cbdf106b 100644 +--- a/research/object_detection/inference/detection_inference.py ++++ b/research/object_detection/inference/detection_inference.py +@@ -31,16 +31,16 @@ def build_input(tfrecord_paths): + image_tensor: The decoded image of the example. Uint8 tensor, + shape=[1, None, None,3] + """ +- filename_queue = tf.train.string_input_producer( ++ filename_queue = tf.compat.v1.train.string_input_producer( + tfrecord_paths, shuffle=False, num_epochs=1) + +- tf_record_reader = tf.TFRecordReader() ++ tf_record_reader = tf.compat.v1.TFRecordReader() + _, serialized_example_tensor = tf_record_reader.read(filename_queue) +- features = tf.parse_single_example( +- serialized_example_tensor, ++ features = tf.io.parse_single_example( ++ serialized=serialized_example_tensor, + features={ + standard_fields.TfExampleFields.image_encoded: +- tf.FixedLenFeature([], tf.string), ++ tf.io.FixedLenFeature([], tf.string), + }) + encoded_image = features[standard_fields.TfExampleFields.image_encoded] + image_tensor = tf.image.decode_image(encoded_image, channels=3) +@@ -65,15 +65,15 @@ def build_inference_graph(image_tensor, inference_graph_path): + detected_labels_tensor: Detected labels. Int64 tensor, + shape=[num_detections] + """ +- with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: ++ with tf.io.gfile.GFile(inference_graph_path, 'rb') as graph_def_file: + graph_content = graph_def_file.read() +- graph_def = tf.GraphDef() ++ graph_def = tf.compat.v1.GraphDef() + graph_def.MergeFromString(graph_content) + + tf.import_graph_def( + graph_def, name='', input_map={'image_tensor': image_tensor}) + +- g = tf.get_default_graph() ++ g = tf.compat.v1.get_default_graph() + + num_detections_tensor = tf.squeeze( + g.get_tensor_by_name('num_detections:0'), 0) +@@ -114,7 +114,7 @@ def infer_detections_and_add_to_example( + """ + tf_example = tf.train.Example() + (serialized_example, detected_boxes, detected_scores, +- detected_classes) = tf.get_default_session().run([ ++ detected_classes) = tf.compat.v1.get_default_session().run([ + serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor, + detected_labels_tensor + ]) +diff --git a/research/object_detection/inference/infer_detections.py b/research/object_detection/inference/infer_detections.py +index a251009e..23a61f20 100644 +--- a/research/object_detection/inference/infer_detections.py ++++ b/research/object_detection/inference/infer_detections.py +@@ -37,24 +37,27 @@ metrics). + import itertools + import tensorflow as tf + from object_detection.inference import detection_inference ++import argparse + +-tf.flags.DEFINE_string('input_tfrecord_paths', None, +- 'A comma separated list of paths to input TFRecords.') +-tf.flags.DEFINE_string('output_tfrecord_path', None, +- 'Path to the output TFRecord.') +-tf.flags.DEFINE_string('inference_graph', None, +- 'Path to the inference graph with embedded weights.') +-tf.flags.DEFINE_boolean('discard_image_pixels', False, +- 'Discards the images in the output TFExamples. This' +- ' significantly reduces the output size and is useful' +- ' if the subsequent tools don\'t need access to the' +- ' images (e.g. when computing evaluation measures).') +- +-FLAGS = tf.flags.FLAGS +- ++parser = argparse.ArgumentParser() ++parser.add_argument('-i', '--input_tfrecord_paths', ++ help='A comma separated list of paths to input TFRecords.', ++ default=None) ++parser.add_argument('-o', '--output_tfrecord_path', ++ help='Path to the output TFRecord.', default=None) ++parser.add_argument('-g', '--inference_graph', ++ help='Path to the inference graph with embedded weights.', ++ default=None) ++parser.add_argument('-d', '--discard_image_pixels', ++ help='Discards the images in the output TFExamples. This' ++ ' significantly reduces the output size and is useful' ++ ' if the subsequent tools don\'t need access to the' ++ ' images (e.g. when computing evaluation measures).', ++ action='store_true', default=False) ++FLAGS = parser.parse_args() + + def main(_): +- tf.logging.set_verbosity(tf.logging.INFO) ++ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + + required_flags = ['input_tfrecord_paths', 'output_tfrecord_path', + 'inference_graph'] +@@ -62,26 +65,26 @@ def main(_): + if not getattr(FLAGS, flag_name): + raise ValueError('Flag --{} is required'.format(flag_name)) + +- with tf.Session() as sess: ++ with tf.compat.v1.Session() as sess: + input_tfrecord_paths = [ + v for v in FLAGS.input_tfrecord_paths.split(',') if v] +- tf.logging.info('Reading input from %d files', len(input_tfrecord_paths)) ++ tf.compat.v1.logging.info('Reading input from %d files', len(input_tfrecord_paths)) + serialized_example_tensor, image_tensor = detection_inference.build_input( + input_tfrecord_paths) +- tf.logging.info('Reading graph and building model...') ++ tf.compat.v1.logging.info('Reading graph and building model...') + (detected_boxes_tensor, detected_scores_tensor, + detected_labels_tensor) = detection_inference.build_inference_graph( + image_tensor, FLAGS.inference_graph) + +- tf.logging.info('Running inference and writing output to {}'.format( ++ tf.compat.v1.logging.info('Running inference and writing output to {}'.format( + FLAGS.output_tfrecord_path)) +- sess.run(tf.local_variables_initializer()) +- tf.train.start_queue_runners() +- with tf.python_io.TFRecordWriter( ++ sess.run(tf.compat.v1.local_variables_initializer()) ++ tf.compat.v1.train.start_queue_runners() ++ with tf.io.TFRecordWriter( + FLAGS.output_tfrecord_path) as tf_record_writer: + try: + for counter in itertools.count(): +- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10, ++ tf.compat.v1.logging.log_every_n(tf.compat.v1.logging.INFO, 'Processed %d images...', 10, + counter) + tf_example = detection_inference.infer_detections_and_add_to_example( + serialized_example_tensor, detected_boxes_tensor, +@@ -89,8 +92,8 @@ def main(_): + FLAGS.discard_image_pixels) + tf_record_writer.write(tf_example.SerializeToString()) + except tf.errors.OutOfRangeError: +- tf.logging.info('Finished processing records') ++ tf.compat.v1.logging.info('Finished processing records') + + + if __name__ == '__main__': +- tf.app.run() ++ tf.compat.v1.app.run() +diff --git a/research/object_detection/metrics/coco_tools.py b/research/object_detection/metrics/coco_tools.py +index 71b747bc..145e93ba 100644 +--- a/research/object_detection/metrics/coco_tools.py ++++ b/research/object_detection/metrics/coco_tools.py +@@ -106,7 +106,7 @@ class COCOWrapper(coco.COCO): + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + +- tf.logging.info('Loading and preparing annotation results...') ++ tf.compat.v1.logging.info('Loading and preparing annotation results...') + tic = time.time() + + if not isinstance(annotations, list): +@@ -128,7 +128,7 @@ class COCOWrapper(coco.COCO): + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 +- tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) ++ tf.compat.v1.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() +@@ -479,7 +479,7 @@ def ExportGroundtruthToCOCO(image_ids, + 'categories': categories + } + if output_path: +- with tf.gfile.GFile(output_path, 'w') as fid: ++ with tf.io.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2) + return groundtruth_dict + +@@ -666,7 +666,7 @@ def ExportDetectionsToCOCO(image_ids, + scores, + classes)) + if output_path: +- with tf.gfile.GFile(output_path, 'w') as fid: ++ with tf.io.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2) + return detections_export_list + +@@ -746,7 +746,7 @@ def ExportSegmentsToCOCO(image_ids, + image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes)) + + if output_path: +- with tf.gfile.GFile(output_path, 'w') as fid: ++ with tf.io.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2) + return segment_export_list + +@@ -845,6 +845,6 @@ def ExportKeypointsToCOCO(image_ids, + }) + + if output_path: +- with tf.gfile.GFile(output_path, 'w') as fid: ++ with tf.io.gfile.GFile(output_path, 'w') as fid: + json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2) + return keypoints_export_list +diff --git a/research/object_detection/metrics/offline_eval_map_corloc.py b/research/object_detection/metrics/offline_eval_map_corloc.py +index ff2efbaf..dbf6a82d 100644 +--- a/research/object_detection/metrics/offline_eval_map_corloc.py ++++ b/research/object_detection/metrics/offline_eval_map_corloc.py +@@ -35,6 +35,7 @@ import csv + import os + import re + import tensorflow as tf ++import argparse + + from object_detection.core import standard_fields + from object_detection.legacy import evaluator +@@ -42,16 +43,16 @@ from object_detection.metrics import tf_example_parser + from object_detection.utils import config_util + from object_detection.utils import label_map_util + +-flags = tf.app.flags +-tf.logging.set_verbosity(tf.logging.INFO) ++tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + +-flags.DEFINE_string('eval_dir', None, 'Directory to write eval summaries to.') +-flags.DEFINE_string('eval_config_path', None, +- 'Path to an eval_pb2.EvalConfig config file.') +-flags.DEFINE_string('input_config_path', None, +- 'Path to an eval_pb2.InputConfig config file.') +- +-FLAGS = flags.FLAGS ++parser = argparse.ArgumentParser() ++parser.add_argument('-d', '--eval_dir', ++ help='Directory to write eval summaries to.', default=None) ++parser.add_argument('-e', '--eval_config_path', ++ help='Path to an eval_pb2.EvalConfig config file.', default=None) ++parser.add_argument('-p', '--input_config_path', ++ help='Path to an eval_pb2.InputConfig config file.', default=None) ++FLAGS = parser.parse_args() + + + def _generate_sharded_filenames(filename): +@@ -104,13 +105,13 @@ def read_data_and_evaluate(input_config, eval_config): + skipped_images = 0 + processed_images = 0 + for input_path in _generate_filenames(input_paths): +- tf.logging.info('Processing file: {0}'.format(input_path)) ++ tf.compat.v1.logging.info('Processing file: {0}'.format(input_path)) + +- record_iterator = tf.python_io.tf_record_iterator(path=input_path) ++ record_iterator = tf.compat.v1.python_io.tf_record_iterator(path=input_path) + data_parser = tf_example_parser.TfExampleDetectionAndGTParser() + + for string_record in record_iterator: +- tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, ++ tf.compat.v1.logging.log_every_n(tf.compat.v1.logging.INFO, 'Processed %d images...', 1000, + processed_images) + processed_images += 1 + +@@ -127,7 +128,7 @@ def read_data_and_evaluate(input_config, eval_config): + decoded_dict) + else: + skipped_images += 1 +- tf.logging.info('Skipped images: {0}'.format(skipped_images)) ++ tf.compat.v1.logging.info('Skipped images: {0}'.format(skipped_images)) + + return object_detection_evaluator.evaluate() + +@@ -141,7 +142,7 @@ def write_metrics(metrics, output_dir): + metrics: A dictionary containing metric names and values. + output_dir: Directory to write metrics to. + """ +- tf.logging.info('Writing metrics.') ++ tf.compat.v1.logging.info('Writing metrics.') + + with open(os.path.join(output_dir, 'metrics.csv'), 'w') as csvfile: + metrics_writer = csv.writer(csvfile, delimiter=',') +@@ -170,4 +171,4 @@ def main(argv): + + + if __name__ == '__main__': +- tf.app.run(main) ++ tf.compat.v1.app.run(main) +diff --git a/research/object_detection/metrics/tf_example_parser.py b/research/object_detection/metrics/tf_example_parser.py +index 9a5f130f..fa361bf5 100644 +--- a/research/object_detection/metrics/tf_example_parser.py ++++ b/research/object_detection/metrics/tf_example_parser.py +@@ -44,7 +44,7 @@ class StringParser(data_parser.DataToNumpyParser): + self.field_name = field_name + + def parse(self, tf_example): +- return "".join(tf_example.features.feature[self.field_name] ++ return b"".join(tf_example.features.feature[self.field_name] + .bytes_list.value) if tf_example.features.feature[ + self.field_name].HasField("bytes_list") else None + +diff --git a/research/object_detection/queues.py b/research/object_detection/queues.py +new file mode 100644 +index 00000000..1bc8d2c8 +--- /dev/null ++++ b/research/object_detection/queues.py +@@ -0,0 +1,74 @@ ++# Copyright 2015 The TensorFlow Authors. All Rights Reserved. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ============================================================================== ++"""Contains a helper context for running queue runners. ++ ++@@NestedQueueRunnerError ++@@QueueRunners ++""" ++ ++from __future__ import absolute_import ++from __future__ import division ++from __future__ import print_function ++ ++from contextlib import contextmanager ++import threading ++ ++from tensorflow.python.framework import ops ++from tensorflow.python.training import coordinator ++ ++__all__ = [ ++ 'NestedQueueRunnerError', ++ 'QueueRunners', ++] ++ ++_queue_runner_lock = threading.Lock() ++ ++ ++class NestedQueueRunnerError(Exception): ++ pass ++ ++ ++@contextmanager ++def QueueRunners(session): ++ """Creates a context manager that handles starting and stopping queue runners. ++ ++ Args: ++ session: the currently running session. ++ ++ Yields: ++ a context in which queues are run. ++ ++ Raises: ++ NestedQueueRunnerError: if a QueueRunners context is nested within another. ++ """ ++ if not _queue_runner_lock.acquire(False): ++ raise NestedQueueRunnerError('QueueRunners cannot be nested') ++ ++ coord = coordinator.Coordinator() ++ threads = [] ++ for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS): ++ threads.extend( ++ qr.create_threads( ++ session, coord=coord, daemon=True, start=True)) ++ try: ++ yield ++ finally: ++ coord.request_stop() ++ try: ++ coord.join(threads, stop_grace_period_secs=120) ++ except RuntimeError: ++ session.close() ++ ++ _queue_runner_lock.release() +diff --git a/research/object_detection/utils/config_util.py b/research/object_detection/utils/config_util.py +index e7835223..4e8125ab 100644 +--- a/research/object_detection/utils/config_util.py ++++ b/research/object_detection/utils/config_util.py +@@ -89,7 +89,7 @@ def get_configs_from_pipeline_file(pipeline_config_path): + corresponding config objects. + """ + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() +- with tf.gfile.GFile(pipeline_config_path, "r") as f: ++ with tf.io.gfile.GFile(pipeline_config_path, "r") as f: + proto_str = f.read() + text_format.Merge(proto_str, pipeline_config) + return create_configs_from_pipeline_proto(pipeline_config) +@@ -128,7 +128,7 @@ def get_graph_rewriter_config_from_file(graph_rewriter_config_file): + graph_rewriter_pb2.GraphRewriter proto + """ + graph_rewriter_config = graph_rewriter_pb2.GraphRewriter() +- with tf.gfile.GFile(graph_rewriter_config_file, "r") as f: ++ with tf.io.gfile.GFile(graph_rewriter_config_file, "r") as f: + text_format.Merge(f.read(), graph_rewriter_config) + return graph_rewriter_config + +@@ -168,8 +168,8 @@ def save_pipeline_config(pipeline_config, directory): + file_io.recursive_create_dir(directory) + pipeline_config_path = os.path.join(directory, "pipeline.config") + config_text = text_format.MessageToString(pipeline_config) +- with tf.gfile.Open(pipeline_config_path, "wb") as f: +- tf.logging.info("Writing pipeline config file to %s", ++ with tf.io.gfile.GFile(pipeline_config_path, "wb") as f: ++ tf.compat.v1.logging.info("Writing pipeline config file to %s", + pipeline_config_path) + f.write(config_text) + +@@ -198,31 +198,31 @@ def get_configs_from_multiple_files(model_config_path="", + configs = {} + if model_config_path: + model_config = model_pb2.DetectionModel() +- with tf.gfile.GFile(model_config_path, "r") as f: ++ with tf.io.gfile.GFile(model_config_path, "r") as f: + text_format.Merge(f.read(), model_config) + configs["model"] = model_config + + if train_config_path: + train_config = train_pb2.TrainConfig() +- with tf.gfile.GFile(train_config_path, "r") as f: ++ with tf.io.gfile.GFile(train_config_path, "r") as f: + text_format.Merge(f.read(), train_config) + configs["train_config"] = train_config + + if train_input_config_path: + train_input_config = input_reader_pb2.InputReader() +- with tf.gfile.GFile(train_input_config_path, "r") as f: ++ with tf.io.gfile.GFile(train_input_config_path, "r") as f: + text_format.Merge(f.read(), train_input_config) + configs["train_input_config"] = train_input_config + + if eval_config_path: + eval_config = eval_pb2.EvalConfig() +- with tf.gfile.GFile(eval_config_path, "r") as f: ++ with tf.io.gfile.GFile(eval_config_path, "r") as f: + text_format.Merge(f.read(), eval_config) + configs["eval_config"] = eval_config + + if eval_input_config_path: + eval_input_config = input_reader_pb2.InputReader() +- with tf.gfile.GFile(eval_input_config_path, "r") as f: ++ with tf.io.gfile.GFile(eval_input_config_path, "r") as f: + text_format.Merge(f.read(), eval_input_config) + configs["eval_input_config"] = eval_input_config + +@@ -325,7 +325,7 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs): + if hparams: + kwargs.update(hparams.values()) + for key, value in kwargs.items(): +- tf.logging.info("Maybe overwriting %s: %s", key, value) ++ tf.compat.v1.logging.info("Maybe overwriting %s: %s", key, value) + # pylint: disable=g-explicit-bool-comparison + if value == "" or value is None: + continue +@@ -366,7 +366,7 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs): + elif _is_generic_key(key): + _update_generic(configs, key, value) + else: +- tf.logging.info("Ignoring config override key: %s", key) ++ tf.compat.v1.logging.info("Ignoring config override key: %s", key) + return configs + + +diff --git a/research/object_detection/utils/label_map_util.py b/research/object_detection/utils/label_map_util.py +index aef46c1d..8ccad0cb 100644 +--- a/research/object_detection/utils/label_map_util.py ++++ b/research/object_detection/utils/label_map_util.py +@@ -128,7 +128,7 @@ def load_labelmap(path): + Returns: + a StringIntLabelMapProto + """ +- with tf.gfile.GFile(path, 'r') as fid: ++ with tf.io.gfile.GFile(path, 'r') as fid: + label_map_string = fid.read() + label_map = string_int_label_map_pb2.StringIntLabelMap() + try: +diff --git a/research/object_detection/utils/object_detection_evaluation.py b/research/object_detection/utils/object_detection_evaluation.py +index 8a38d8c2..99af2324 100644 +--- a/research/object_detection/utils/object_detection_evaluation.py ++++ b/research/object_detection/utils/object_detection_evaluation.py +@@ -839,9 +839,9 @@ class ObjectDetectionEvaluation(object): + if self.use_weighted_mean_ap: + all_scores = np.append(all_scores, scores) + all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) +- print 'Scores and tpfp per class label: {}'.format(class_index) +- print tp_fp_labels +- print scores ++ print('Scores and tpfp per class label: {}'.format(class_index)) ++ print(tp_fp_labels) ++ print(scores) + precision, recall = metrics.compute_precision_recall( + scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) + self.precisions_per_class.append(precision) diff --git a/models/object_detection/tensorflow/rfcn/inference/tf_models.patch b/models/object_detection/tensorflow/rfcn/inference/tf_models.patch deleted file mode 100644 index 64c44592b..000000000 --- a/models/object_detection/tensorflow/rfcn/inference/tf_models.patch +++ /dev/null @@ -1,68 +0,0 @@ -diff --git a/research/object_detection/inference/detection_inference.py b/research/object_detection/inference/detection_inference.py -old mode 100644 -new mode 100755 -index dc66686f..4d459a55 ---- a/research/object_detection/inference/detection_inference.py -+++ b/research/object_detection/inference/detection_inference.py -@@ -15,9 +15,12 @@ - """Utility functions for detection inference.""" - from __future__ import division - -+import os -+ - import tensorflow as tf - - from object_detection.core import standard_fields -+from object_detection.utils import label_map_util - - - def build_input(tfrecord_paths): -@@ -65,7 +68,7 @@ def build_inference_graph(image_tensor, inference_graph_path): - detected_labels_tensor: Detected labels. Int64 tensor, - shape=[num_detections] - """ -- with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: -+ with tf.gfile.Open(inference_graph_path, 'rb') as graph_def_file: - graph_content = graph_def_file.read() - graph_def = tf.GraphDef() - graph_def.MergeFromString(graph_content) -@@ -134,6 +137,13 @@ def infer_detections_and_add_to_example( - detection_bbox_xmax].float_list.value[:] = detected_boxes[3] - feature[standard_fields.TfExampleFields. - detection_class_label].int64_list.value[:] = detected_classes -+ label_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/mscoco_label_map.pbtxt') -+ if feature[standard_fields.TfExampleFields.object_class_text].HasField("bytes_list"): -+ cl = feature[standard_fields.TfExampleFields.object_class_text].bytes_list.value -+ cl = [x if type(x) == 'str' else x.decode('utf-8') for x in cl] -+ lm = label_map_util.get_label_map_dict(label_file, use_display_name = True) -+ label_id = [lm[x] for x in cl] -+ feature[standard_fields.TfExampleFields.object_class_label].int64_list.value[:] = label_id - - if discard_image_pixels: - del feature[standard_fields.TfExampleFields.image_encoded] -diff --git a/research/object_detection/metrics/tf_example_parser.py b/research/object_detection/metrics/tf_example_parser.py -index 9a5f130f..fa361bf5 100644 ---- a/research/object_detection/metrics/tf_example_parser.py -+++ b/research/object_detection/metrics/tf_example_parser.py -@@ -44,7 +44,7 @@ class StringParser(data_parser.DataToNumpyParser): - self.field_name = field_name - - def parse(self, tf_example): -- return "".join(tf_example.features.feature[self.field_name] -+ return b"".join(tf_example.features.feature[self.field_name] - .bytes_list.value) if tf_example.features.feature[ - self.field_name].HasField("bytes_list") else None - -diff --git a/research/object_detection/utils/object_detection_evaluation.py b/research/object_detection/utils/object_detection_evaluation.py -index 5826c581..450090f0 100644 ---- a/research/object_detection/utils/object_detection_evaluation.py -+++ b/research/object_detection/utils/object_detection_evaluation.py -@@ -304,7 +304,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator): - if idx + self._label_id_offset in category_index: - category_name = category_index[idx + self._label_id_offset]['name'] - try: -- category_name = unicode(category_name, 'utf-8') -+ category_name = str(category_name, 'utf-8') - except TypeError: - pass - category_name = unicodedata.normalize( diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_detection_evaluator.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_detection_evaluator.py new file mode 100644 index 000000000..d2582ea5a --- /dev/null +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_detection_evaluator.py @@ -0,0 +1,105 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +from inference import coco_tools +from inference import coco_label_map + +class CocoDetectionEvaluator: + """Class to evaluate COCO detection metrics.""" + + def __init__(self): + self._image_ids = {} + self._groundtruth_list = [] + self._detection_boxes_list = [] + self._annotation_id = 1 + self._category_id_set = set([cat for cat in coco_label_map.category_map]) + self._groundtruth_list = [] + self._detection_boxes_list = [] + + def add_single_ground_truth_image_info(self, + image_id, + groundtruth_dict): + if image_id in self._image_ids: + return + + self._groundtruth_list.extend( + coco_tools.ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self._annotation_id, + category_id_set=self._category_id_set, + groundtruth_boxes=groundtruth_dict['boxes'], + groundtruth_classes=groundtruth_dict['classes'])) + self._annotation_id += groundtruth_dict['boxes'].shape[0] + + self._image_ids[image_id] = False + is_debug = False + if image_id == '000000059386.jpg': + is_debug = True + if is_debug: + is_debug = False + print(groundtruth_dict['boxes']) + print(groundtruth_dict['classes']) + print(image_id) + + def add_single_detected_image_info(self, + image_id, + detections_dict): + assert (image_id in self._image_ids) + + if self._image_ids[image_id]: + return + + self._detection_boxes_list.extend( + coco_tools.ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self._category_id_set, + detection_boxes=detections_dict['boxes'], + detection_scores=detections_dict['scores'], + detection_classes=detections_dict['classes'])) + + self._image_ids[image_id] = True + is_debug = False + if image_id == '000000059386.jpg': + is_debug = True + if is_debug: + is_debug = False + print(detections_dict['boxes']) + print(detections_dict['classes']) + print(detections_dict['classes']) + print(image_id) + + def evaluate(self): + groundtruth_dict = { + 'annotations': self._groundtruth_list, + 'images': [{'id': image_id} for image_id in self._image_ids], + 'categories': [{'id': k, 'name': v} for k, v in coco_label_map.category_map.items()] + } + coco_wrapped_groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self._detection_boxes_list) + box_evaluator = coco_tools.COCOEvalWrapper( + coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, + all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = {'DetectionBoxes_'+ key: value + for key, value in iter(box_metrics.items())} + return box_metrics + diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_label_map.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_label_map.py new file mode 100644 index 000000000..6127c2aab --- /dev/null +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} + + diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_tools.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_tools.py new file mode 100644 index 000000000..58e9483b7 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/coco_tools.py @@ -0,0 +1,530 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + +""" +from collections import OrderedDict +import copy +import time +import numpy as np + +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask + +import tensorflow as tf + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class.""" + + def __init__(self, dataset, detection_type='bbox'): + """COCOWrapper constructor. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations): + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if annotations is not a list + ValueError: if annotations do not correspond to the images contained + in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + tf.compat.v1.logging.info('Loading and preparing annotation results...') + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + tf.compat.v1.logging.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, groundtruth=None, detections=None, agnostic_mode=False, + iou_type='bbox'): + """COCOEvalWrapper constructor. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + + def GetCategory(self, category_id): + """Fetches dictionary holding category information given category id. + + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self): + """Returns true if COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self): + """Returns list of valid category ids.""" + return self.params.catIds + + def ComputeMetrics(self, + include_metrics_per_category=False, + all_metrics_per_category=False): + """Computes detection metrics. + + Args: + include_metrics_per_category: If True, will include metrics per category. + all_metrics_per_category: If true, include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + 1. summary_metrics: a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments + 'Precision/mAP@.50IOU': mean average precision at 50% IOU + 'Precision/mAP@.75IOU': mean average precision at 75% IOU + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels) + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels) + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels) + 'Recall/AR@1': average recall with 1 detection + 'Recall/AR@10': average recall with 10 detections + 'Recall/AR@100': average recall with 100 detections + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections + 2. per_category_ap: a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. + If evaluating class-agnostic mode, per_category_ap is an empty + dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), + ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a [ymin, xmin, ymax, xmax] numpy array + + Returns: + a list of floats representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), + float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id, + next_annotation_id, + category_id_set, + groundtruth_boxes, + groundtruth_classes, + groundtruth_masks=None, + groundtruth_is_crowd=None): + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + a list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' + 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' + 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' + 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError('Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % ( + groundtruth_classes.shape[0], + groundtruth_boxes.shape[0], image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id, + category_id_set, + detection_boxes, + detection_scores, + detection_classes): + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + a list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % ( + detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0] + )) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': image_id, + 'category_id': int(detection_classes[i]), + 'bbox': list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id, + category_id_set, + detection_masks, + detection_scores, + detection_classes): + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + a list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % ( + detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0] + )) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': image_id, + 'category_id': int(detection_classes[i]), + 'segmentation': _RleCompress(detection_masks[i]), + 'score': float(detection_scores[i]) + }) + return detections_list diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/detection_inference.patch b/models/object_detection/tensorflow/ssd-mobilenet/inference/detection_inference.patch deleted file mode 100644 index 0c3e8b11d..000000000 --- a/models/object_detection/tensorflow/ssd-mobilenet/inference/detection_inference.patch +++ /dev/null @@ -1,33 +0,0 @@ -diff --git a/research/object_detection/inference/detection_inference.py b/research/object_detection/inference/detection_inference.py -old mode 100644 -new mode 100755 -index dc66686f..84e5973c ---- a/research/object_detection/inference/detection_inference.py -+++ b/research/object_detection/inference/detection_inference.py -@@ -15,9 +15,12 @@ - """Utility functions for detection inference.""" - from __future__ import division - -+import os -+ - import tensorflow as tf - - from object_detection.core import standard_fields -+from object_detection.utils import label_map_util - - - def build_input(tfrecord_paths): -@@ -134,6 +137,13 @@ def infer_detections_and_add_to_example( - detection_bbox_xmax].float_list.value[:] = detected_boxes[3] - feature[standard_fields.TfExampleFields. - detection_class_label].int64_list.value[:] = detected_classes -+ label_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/mscoco_label_map.pbtxt') -+ if feature[standard_fields.TfExampleFields.object_class_text].HasField("bytes_list"): -+ cl = feature[standard_fields.TfExampleFields.object_class_text].bytes_list.value -+ cl = [x if type(x) == 'str' else x.decode('utf-8') for x in cl] -+ lm = label_map_util.get_label_map_dict(label_file, use_display_name = True) -+ label_id = [lm[x] for x in cl] -+ feature[standard_fields.TfExampleFields.object_class_label].int64_list.value[:] = label_id - - if discard_image_pixels: - del feature[standard_fields.TfExampleFields.image_encoded] diff --git a/benchmarks/content_creation/tensorflow/draw/inference/__init__.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/__init__.py similarity index 100% rename from benchmarks/content_creation/tensorflow/draw/inference/__init__.py rename to models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/__init__.py diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py new file mode 100644 index 000000000..b3e1c0867 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py @@ -0,0 +1,269 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +from __future__ import division + +import tensorflow as tf +from tensorflow.python.data.experimental import parallel_interleave +from tensorflow.python.data.experimental import map_and_batch +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes +import time + +from argparse import ArgumentParser +from inference.coco_detection_evaluator import CocoDetectionEvaluator +from inference.coco_label_map import category_map + +IMAGE_SIZE = 300 +COCO_NUM_VAL_IMAGES = 4952 + +import os + +import numpy as np + +def parse_and_preprocess(serialized_example): + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.compat.v1.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/object/class/text': tf.compat.v1.VarLenFeature(dtype=tf.string), + 'image/source_id': tf.compat.v1.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.compat.v1.parse_single_example(serialized_example, feature_map) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features['image/encoded'] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + label = features['image/object/class/text'].values + + image_id = features['image/source_id'] + + return image_tensor, bbox[0], label, image_id + +class model_infer: + + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph.', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument('-i', "--iter", + help='For accuracy measurement only.', + dest='total_iter', default=1000, type=int) + + arg_parser.add_argument('-w', "--warmup_iter", + help='For accuracy measurement only.', + dest='warmup_iter', default=200, type=int) + + # parse the arguments + self.args = arg_parser.parse_args() + + self.config = tf.compat.v1.ConfigProto() + self.config.intra_op_parallelism_threads = self.args.num_intra_threads + self.config.inter_op_parallelism_threads = self.args.num_inter_threads + self.config.use_per_session_threads = 1 + + self.input_layer = 'image_tensor' + self.output_layers = ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'] + self.load_graph() + + if self.args.batch_size == -1: + self.args.batch_size = 1 + + + self.input_tensor = self.infer_graph.get_tensor_by_name(self.input_layer + ":0") + self.output_tensors = [self.infer_graph.get_tensor_by_name(x + ":0") for x in self.output_layers] + + self.category_map_reverse = {v : k for k, v in category_map.items()} + + def build_data_sess(self): + data_graph = tf.Graph() + with data_graph.as_default(): + self.input_images, self.bbox, self.label, self.image_id = self.get_input() + self.data_sess = tf.compat.v1.Session(graph=data_graph, config=self.config) + + def load_graph(self): + print('load graph from: ' + self.args.input_graph) + + self.infer_graph = tf.Graph() + with self.infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + output_graph = optimize_for_inference(graph_def, [self.input_layer], + self.output_layers, dtypes.uint8.as_datatype_enum, False) + tf.import_graph_def(output_graph, name='') + + def run_benchmark(self): + if self.args.data_location: + print("Inference with real data.") + else: + print("Inference with dummy data.") + + with tf.compat.v1.Session(graph=self.infer_graph, config=self.config) as sess: + + if self.args.data_location: + self.build_data_sess() + else: + input_images = sess.run(tf.random.truncated_normal( + [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], + dtype=tf.float32, + stddev=10, + name='synthetic_images')) + + total_iter = self.args.total_iter + warmup_iter = self.args.warmup_iter + ttime = 0.0 + + print('total iteration is {0}'.format(str(total_iter))) + print('warm up iteration is {0}'.format(str(warmup_iter))) + + for step in range(total_iter): + start_time = time.time() + if self.args.data_location: + input_images = self.data_sess.run([self.input_images]) + input_images = input_images[0] + _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) + end_time = time.time() + + duration = end_time - start_time + if (step + 1) % 10 == 0: + print('steps = {0}, {1} sec'.format(str(step), str(duration))) + + if step + 1 > warmup_iter: + ttime += duration + + total_batches = total_iter - warmup_iter + print ('Batchsize: {0}'.format(str(self.args.batch_size))) + print ('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) + print ('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) + + + def get_input(self): + tfrecord_paths = [self.args.data_location] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=28, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + ds = ds.prefetch(buffer_size=10000) + ds = ds.apply( + map_and_batch( + map_func=parse_and_preprocess, + batch_size=self.args.batch_size, + num_parallel_batches=28, + num_parallel_calls=None)) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, bbox, label, image_id = ds_iterator.get_next() + + return images, bbox, label, image_id + + def accuracy_check(self): + print("Inference for accuracy check.") + self.build_data_sess() + evaluator = CocoDetectionEvaluator() + with tf.compat.v1.Session(graph=self.infer_graph, config=self.config) as sess: + iter = 0 + while True: + print('Run {0} iter'.format(iter)) + iter += 1 + input_images, bbox, label, image_id = self.data_sess.run([self.input_images, self.bbox, self.label, self.image_id]) + ground_truth = {} + ground_truth['boxes'] = np.asarray(bbox[0]) + label = [x if type(x) == 'str' else x.decode('utf-8') for x in label[0]] + ground_truth['classes'] = np.asarray([self.category_map_reverse[x] for x in label]) + image_id = image_id[0] if type(image_id[0]) == 'str' else image_id[0].decode('utf-8') + evaluator.add_single_ground_truth_image_info(image_id, ground_truth) + num, boxes, scores, labels = sess.run(self.output_tensors, {self.input_tensor: input_images}) + detection = {} + num = int(num[0]) + detection['boxes'] = np.asarray(boxes[0])[0:num] + detection['scores'] = np.asarray(scores[0])[0:num] + detection['classes'] = np.asarray(labels[0])[0:num] + evaluator.add_single_detected_image_info(image_id, detection) + if iter * self.args.batch_size >= COCO_NUM_VAL_IMAGES: + evaluator.evaluate() + break + + def run(self): + if self.args.accuracy_only: + self.accuracy_check() + else: + self.run_benchmark() + + + +if __name__ == "__main__": + infer = model_infer() + infer.run() + diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/coco_int8.sh b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/coco_int8.sh deleted file mode 100644 index 08f7466bc..000000000 --- a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/coco_int8.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -########## Variables to be defined - run it in research folder - -SPLIT=ssdmob - -FROZEN_GRAPH=$1 -TF_RECORD_FILES=$2 - -python -m object_detection.inference.infer_detections \ - --input_tfrecord_paths=$TF_RECORD_FILES \ - --output_tfrecord_path=${SPLIT}_detections.tfrecord \ - --inference_graph=$FROZEN_GRAPH \ - --discard_image_pixels=True - -mkdir -p ${SPLIT}_eval_metrics - -echo " -label_map_path: '${PWD}/object_detection/data/mscoco_label_map.pbtxt' -tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord' } -" > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt - -echo " -metrics_set: 'coco_detection_metrics' -" > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt - -python -m object_detection.metrics.offline_eval_map_corloc \ - --eval_dir=${SPLIT}_eval_metrics \ - --eval_config_path=${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt \ - --input_config_path=${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/infer_detections.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/infer_detections.py new file mode 100644 index 000000000..506ca66d7 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/infer_detections.py @@ -0,0 +1,282 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +from __future__ import division + +import tensorflow as tf +from tensorflow.python.data.experimental import parallel_interleave +from tensorflow.python.data.experimental import map_and_batch +import time + +from argparse import ArgumentParser +from inference.coco_detection_evaluator import CocoDetectionEvaluator +from inference.coco_label_map import category_map + +IMAGE_SIZE = 300 +COCO_NUM_VAL_IMAGES = 4952 + +import os + +import numpy as np + +def parse_and_preprocess(serialized_example): + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.compat.v1.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/object/class/text': tf.compat.v1.VarLenFeature(dtype=tf.string), + 'image/source_id': tf.compat.v1.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.compat.v1.parse_single_example(serialized_example, feature_map) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features['image/encoded'] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + label = features['image/object/class/text'].values + + image_id = features['image/source_id'] + + return image_tensor, bbox[0], label, image_id + +class model_infer: + + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph.', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument('-i', "--iter", + help='For accuracy measurement only.', + dest='total_iter', default=1000, type=int) + + arg_parser.add_argument('-w', "--warmup_iter", + help='For accuracy measurement only.', + dest='warmup_iter', default=200, type=int) + + # parse the arguments + self.args = arg_parser.parse_args() + + self.config = tf.compat.v1.ConfigProto() + self.config.intra_op_parallelism_threads = self.args.num_intra_threads + self.config.inter_op_parallelism_threads = self.args.num_inter_threads + self.config.use_per_session_threads = 1 + + self.load_graph() + + if self.args.batch_size == -1: + self.args.batch_size = 1 + + input_layer = 'Preprocessor/subpart2' + output_layers = ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes'] + self.input_tensor = self.infer_graph.get_tensor_by_name(input_layer + ":0") + self.output_tensors = [self.infer_graph.get_tensor_by_name(x + ":0") for x in output_layers] + + self.category_map_reverse = {v : k for k, v in category_map.items()} + + def build_data_sess(self): + data_graph = tf.Graph() + with data_graph.as_default(): + self.input_images, self.bbox, self.label, self.image_id = self.get_input() + self.data_sess = tf.compat.v1.Session(graph=data_graph, config=self.config) + + dir_path = os.path.dirname(os.path.realpath(__file__)) + preprocess_graph = tf.Graph() + with preprocess_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(os.path.join(os.path.dirname(dir_path), 'ssdmobilenet_preprocess.pb'), 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + tf.import_graph_def(graph_def, name='') + + self.pre_sess = tf.compat.v1.Session(graph=preprocess_graph, config=self.config) + self.pre_output = preprocess_graph.get_tensor_by_name("Preprocessor/sub:0") + self.pre_input = preprocess_graph.get_tensor_by_name("image_tensor:0") + + + def load_graph(self): + print('load graph from: ' + self.args.input_graph) + + self.infer_graph = tf.Graph() + with self.infer_graph.as_default(): + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + tf.import_graph_def(graph_def, name='') + + def run_benchmark(self): + if self.args.data_location: + print("Inference with real data.") + else: + print("Inference with dummy data.") + + with tf.compat.v1.Session(graph=self.infer_graph, config=self.config) as sess: + + if self.args.data_location: + self.build_data_sess() + else: + input_images = sess.run(tf.random.truncated_normal( + [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], + dtype=tf.float32, + stddev=10, + name='synthetic_images')) + + total_iter = self.args.total_iter + warmup_iter = self.args.warmup_iter + ttime = 0.0 + + print('total iteration is {0}'.format(str(total_iter))) + print('warm up iteration is {0}'.format(str(warmup_iter))) + + for step in range(total_iter): + start_time = time.time() + if self.args.data_location: + input_images = self.data_sess.run([self.input_images]) + input_images = input_images[0] + input_images = self.pre_sess.run(self.pre_output, {self.pre_input: input_images}) + _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) + end_time = time.time() + + duration = end_time - start_time + if (step + 1) % 10 == 0: + print('steps = {0}, {1} sec'.format(str(step), str(duration))) + + if step + 1 > warmup_iter: + ttime += duration + + total_batches = total_iter - warmup_iter + print ('Batchsize: {0}'.format(str(self.args.batch_size))) + print ('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) + print ('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) + + + def get_input(self): + tfrecord_paths = [self.args.data_location] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=28, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + ds = ds.prefetch(buffer_size=10000) + ds = ds.apply( + map_and_batch( + map_func=parse_and_preprocess, + batch_size=self.args.batch_size, + num_parallel_batches=28, + num_parallel_calls=None)) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds) + images, bbox, label, image_id = ds_iterator.get_next() + + return images, bbox, label, image_id + + def accuracy_check(self): + print("Inference for accuracy check.") + self.build_data_sess() + evaluator = CocoDetectionEvaluator() + with tf.compat.v1.Session(graph=self.infer_graph, config=self.config) as sess: + iter = 0 + while True: + print('Run {0} iter'.format(iter)) + iter += 1 + input_images, bbox, label, image_id = self.data_sess.run([self.input_images, self.bbox, self.label, self.image_id]) + ground_truth = {} + ground_truth['boxes'] = np.asarray(bbox[0]) + label = [x if type(x) == 'str' else x.decode('utf-8') for x in label[0]] + ground_truth['classes'] = np.asarray([self.category_map_reverse[x] for x in label]) + image_id = image_id[0] if type(image_id[0]) == 'str' else image_id[0].decode('utf-8') + evaluator.add_single_ground_truth_image_info(image_id, ground_truth) + input_images = self.pre_sess.run(self.pre_output, {self.pre_input: input_images}) + num, boxes, scores, labels = sess.run(self.output_tensors, {self.input_tensor: input_images}) + detection = {} + num = int(num[0]) + detection['boxes'] = np.asarray(boxes[0])[0:num] + detection['scores'] = np.asarray(scores[0])[0:num] + detection['classes'] = np.asarray(labels[0])[0:num] + evaluator.add_single_detected_image_info(image_id, detection) + if iter * self.args.batch_size >= COCO_NUM_VAL_IMAGES: + evaluator.evaluate() + break + + def run(self): + if self.args.accuracy_only: + self.accuracy_check() + else: + self.run_benchmark() + + + +if __name__ == "__main__": + infer = model_infer() + infer.run() + diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py deleted file mode 100644 index 241c63c05..000000000 --- a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py +++ /dev/null @@ -1,224 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -from utils import visualization_utils as vis_util -from utils import label_map_util -from tensorflow.python.client import timeline -import argparse -import time -from PIL import Image -from matplotlib import pyplot as plt -import numpy as np -import os -import six.moves.urllib as urllib -import sys -import tarfile -import tensorflow as tf -import zipfile - -from collections import defaultdict -from io import StringIO -import matplotlib -matplotlib.use('Agg') - -parser = argparse.ArgumentParser() -parser.add_argument('-g', '--graph', help='Path to input graph to run', type=str, required=True) -parser.add_argument('-d', '--dataset', help='Full Path to input dataset to run', type=str, required=True) -parser.add_argument('-s', '--single_image', - help='Run for single image onle, if false, run for the whole dataset', action='store_true') -parser.add_argument('-x', '--single_socket', - help='Run for single socket, if false, run both sockets', action='store_true') -parser.add_argument('-v', '--visualize', help='Whether to visulize the output image', action='store_true') -parser.add_argument('-t', '--timeline', help='Output file name for TF timeline', type=str, default=None) -parser.add_argument('-e', '--evaluate_tensor', help='Full tensor name to evaluate', type=str, default=None) -parser.add_argument('-p', '--print_accuracy', help='Print accuracy results', action='store_true') -parser.add_argument('-n', '--number_of_steps', help='Run for n number of steps', type=int, default=None) -parser.add_argument('--num-inter-threads', - help='Number of inter threads', type=int, default=None, dest="num_inter_threads") -parser.add_argument('--num-intra-threads', - help='Number of intra threads', type=int, default=None, dest="num_intra_threads") - -args = parser.parse_args() - -# This is needed to display the images. -if (args.visualize and args.single_image and args.evaluate_tensor is None): - from IPython import get_ipython - get_ipython().run_line_magic('matplotlib', 'tk') - - -# List of the strings that is used to add correct label for each box. -PATH_TO_LABELS = os.path.join(os.getcwd(), 'object_detection', 'data', - 'mscoco_label_map.pbtxt') - -NUM_CLASSES = 90 - -detection_graph = tf.Graph() -with detection_graph.as_default(): - od_graph_def = tf.GraphDef() - with tf.gfile.GFile(args.graph, 'rb') as fid: - serialized_graph = fid.read() - od_graph_def.ParseFromString(serialized_graph) - tf.import_graph_def(od_graph_def, name='') - -label_map = label_map_util.load_labelmap(PATH_TO_LABELS) -categories = label_map_util.convert_label_map_to_categories( - label_map, max_num_classes=NUM_CLASSES, use_display_name=True) -category_index = label_map_util.create_category_index(categories) - - -def load_image_into_numpy_array(image): - (im_width, im_height) = image.size - if image.mode == 'L': - np_image = np.array(image.getdata()).reshape( - (im_height, im_width)).astype(np.uint8) - return np.stack((np_image,) * 3, -1) - else: - return np.array(image.getdata()).reshape( - (im_height, im_width, 3)).astype(np.uint8) - - -if (args.single_image): - TEST_IMAGE_PATHS = [args.dataset + "/000000578871.jpg"] -else: - PATH_TO_TEST_IMAGES_DIR = args.dataset - print(PATH_TO_TEST_IMAGES_DIR) - TEST_IMAGE_PATHS = [] - for root, dirs, files in os.walk(PATH_TO_TEST_IMAGES_DIR): - for file in files: - TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, file)) - -# Size, in inches, of the output images. -IMAGE_SIZE = (12, 8) - - -def run_inference_for_single_image(graph): - sess_config = tf.ConfigProto() - sess_config.intra_op_parallelism_threads = int(args.num_intra_threads) - sess_config.inter_op_parallelism_threads = int(args.num_inter_threads) - print("intra_op_parallelism_threads {}".format( - sess_config.intra_op_parallelism_threads)) - - with graph.as_default(): - with tf.Session(config=sess_config) as sess: - # Get handles to input and output tensors - tensor_dict = {} - if (args.evaluate_tensor is None): - ops = tf.get_default_graph().get_operations() - all_tensor_names = {output.name for op in ops for output in op.outputs} - for key in [ - 'num_detections', 'detection_boxes', 'detection_scores', - 'detection_classes' - ]: - tensor_name = key + ':0' - if tensor_name in all_tensor_names: - tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( - tensor_name) - else: - our_op = tf.get_default_graph().get_operation_by_name(args.evaluate_tensor) - tensor_names = our_op.outputs - list_ops = [] - for i, tensor in enumerate(tensor_names): - list_ops.append(tensor.name) - tensor_dict[args.evaluate_tensor] = list_ops - - if (args.timeline is not None): - run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_metadata = tf.RunMetadata() - total_duration = 0 - for index, image_path in enumerate(TEST_IMAGE_PATHS): - image = Image.open(image_path) - # the array based representation of the image will be used later in order to prepare the - # result image with boxes and labels on it. - image_np = load_image_into_numpy_array(image) - image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') - - # Run inference - start_time = time.time() - if (args.timeline is not None): - output_dict = sess.run(tensor_dict, - feed_dict={image_tensor: np.expand_dims(image_np, 0)}, options=run_options, run_metadata=run_metadata) - else: - output_dict = sess.run(tensor_dict, - feed_dict={image_tensor: np.expand_dims(image_np, 0)}) - step_duration = time.time() - start_time - if(index > 20): - total_duration = total_duration + step_duration - - if (args.single_image): - if index == 0: - print('Avg. Duration per Step:' + str(total_duration / 1)) - else: - if (index % 10 == 0): - print('Step ' + str(index) + ': ' + str(step_duration) + ' seconds') - if index == 4999: - print('Avg. Duration per Step:' + str(total_duration / 5000)) - - if (args.number_of_steps is not None): - if (args.single_image): - sys.exit("single_iamge and number_of_steps cannot be both enabled!") - elif (index == (args.number_of_steps - 1)): - print('Avg. Duration per Step:' + - str(total_duration / (args.number_of_steps - 20))) - break - - if (args.timeline is not None): - trace = timeline.Timeline(step_stats=run_metadata.step_stats) - with open('tl-' + time.strftime("%Y%m%d-%H%M%S") + '-' + args.timeline, 'w') as file: - file.write(trace.generate_chrome_trace_format(show_memory=False)) - - if (args.evaluate_tensor is not None): - for tensor in output_dict[args.evaluate_tensor]: - print(tensor.shape) - return None, None - - # all outputs are float32 numpy arrays, so convert types as appropriate - output_dict['num_detections'] = int(output_dict['num_detections'][0]) - output_dict['detection_classes'] = output_dict[ - 'detection_classes'][0].astype(np.uint8) - output_dict['detection_boxes'] = output_dict['detection_boxes'][0] - output_dict['detection_scores'] = output_dict['detection_scores'][0] - - if (args.print_accuracy): - print('num_detections:\n' + str(output_dict['num_detections'])) - print('detection_classes:\n' + str(output_dict['detection_classes'])) - print('detection_boxes:\n' + str(output_dict['detection_boxes'])) - print('detection_scores:\n' + str(output_dict['detection_scores'])) - - if 'detection_masks' in output_dict: - output_dict['detection_masks'] = output_dict['detection_masks'][0] - return output_dict, image_np - - -# Actual detection. -output_dict, image_np = run_inference_for_single_image(detection_graph) - -# Visualization of the results of a detection. -if (args.visualize and args.single_image and args.evaluate_tensor is None): - vis_util.visualize_boxes_and_labels_on_image_array( - image_np, - output_dict['detection_boxes'], - output_dict['detection_classes'], - output_dict['detection_scores'], - category_index, - instance_masks=output_dict.get('detection_masks'), - use_normalized_coordinates=True, - line_thickness=8) - plt.figure(figsize=IMAGE_SIZE) - plt.imshow(image_np) diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_preprocess.pb b/models/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_preprocess.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8b51b171de104b8eb55270db380116db03d42bd GIT binary patch literal 9162 zcmdT~%a7Yc7|$fxZus`0?Q~lfLrY8P14BuZEu}aj(QrV+_p{yoM^m?XDa^$|ImmJ#Y#z3T!t$!z1F>K zb#)JyFX*nP%!>*eCslxQ(`l_yE6eIURMwoH-O@d3_3;Ax?xO3l?+~1aeNkWat!mRZ zZ_0mB0zK+9Apa32uG{mpJ_sdPg_mO@T+cBLVrZ^qwn)|W^wwqqfF*Tb0N~upOr?a& zxL83ARDd-&nnk8o3#29sLLzFV%u?&Qq<&X~1EY<_vrUdilfj`#5LYt6sAL2t<9&5? z0pJeVFm0`)yPLSUR8aT89I-5O%QfA|MVs8y^lkAXO$A~bgPZy};wS4>+aTLC$5@aT z+_q!dn~|px{EiZUTk}iiyyNIQG`e%b$6L!v3eM`Yu$z~hruzbHy+c!;*5y#uQvXPY z*q+8KvD_#wA!cUFBD$mL>mG45hv)_^7o=jCHFl^Zv!%J_eO^KbV2S3p?U^mz(jvlK z_6&TE;6yAZ1Yer0BC3vVZ}`>WctOV1V3*dibl1hl;0Qi!bz6;dqXcO+YG1zzJ+m@R zYp6i$NA$M@-zDLEuWeeSddX(4(tP*dCUI@l@yb_38>hOhI z%iOuZwRq|XPf!#mZG3>#qIxk5X#>vS)5#&_Uab&2 zLhts6RFF)mnejL+GiZP1nGx?lS4*h+DS3v|*Pk0mGvE=)I?J3W@u0R%$fmy#+4 zenRkqM5yn106=EVLEw`-n0S`Ky3~5zFseI!7?cCIB|bLqnO>`%fXvY7G(nL_pn7oW zykRIWN^amP8Z||Rvu|k#8+@3sp5RfwL7T$Wpt0b&$VX*_eFNE_Na4CpoDG7f{1>UE zycT55D`F4Rd4u-8;z0TL2o6awx<*%;Y5gj)rV6FT>jX_c%R2D<2*YuinJ)#6Lz?b+ z=n~w8{Ug-TWdTp9-8h3oAT#v1H-lOy)p_AWwiNl17x*wt=rwA zjn%-F;GNuy&=&XxNde=3*gMZA)gd z-?nH$#J3KKc803i8^4Faw@8|@0c2s0$*c&+cGNRcl}P|NMo5Kodlpvt&WQpU%9{736DM&#*pF_5bQ z4PtqEu#<{c;LsM~SCp*;ebxg{tupdM$=eT<{8^Pphwm{XNY_lmC$Hdd1n*=?WTA~U zDbq@KDIz7&zjSLEFUD`A#Ma0jtz;?$AJ&krO<2ANibWzv372vUFpp>brCgs|hQAP; zj5*H-t{_buI;)~y21}uR&+6%I|?EL zh~N=|GmG$%v^#gmRy(q(@@+%B%*{^3XQrOIolP+gNW~iS(g*88V$UQr=X@1LI_Bw@rMb$3nIz5dQMVWM&{`R=-&ad%x0RkJr) zch~idyX(4?g$?+mf%@FL>pHwE<(V#Rn;tz!DzjIq4gG;OfB=3#upBEap<4JknRtPo zkI+?=ICY!;rG)SZk5TeN#>sKZ)l6taB<4Egi!7z{>q;l+9~I$PI+BsKcfJ!!Wt>1M znRUcg-s42%t0KIS4uTNHE4tSv4o#>%2^UjSDl6cV&pmN*fYd|mz+ODiUTDnYQsr@x z|DnGgflXM50@Jf_>3Yvn5`OBUjE7;^Sz!1FG;Z%sNbn~7W~NYha5mx)TpqHP3c#gi P?~W34V#t19Q)tM)4dT^s literal 0 HcmV?d00001 diff --git a/models/object_detection/tensorflow/ssd-resnet34/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/__init__.py +++ b/models/object_detection/tensorflow/ssd-resnet34/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/inference/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/__init__.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/__init__.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py index e544758fd..384d6ca1d 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py @@ -18,7 +18,6 @@ # -import os import tensorflow as tf import time @@ -32,173 +31,181 @@ IMAGE_SIZE = 300 +import os class ssd_resnet34_infer: - def __init__(self): - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--inter-op-parallelism-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--intra-op-parallelism-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph.', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - - arg_parser.add_argument("--results-file-path", - help="File path for the inference results", - dest="results_file_path", default=None) - - # parse the arguments - self.args = arg_parser.parse_args() - - self.freeze_graph = self.load_graph(self.args.input_graph) - self.config = tf.ConfigProto() - self.config.intra_op_parallelism_threads = self.args.num_intra_threads - self.config.inter_op_parallelism_threads = self.args.num_inter_threads - - if self.args.batch_size == -1: - self.args.batch_size = 64 - - self.num_batches = (ssd_constants.COCO_NUM_VAL_IMAGES // self.args.batch_size) + \ - (ssd_constants.COCO_NUM_VAL_IMAGES % self.args.batch_size > 0) - - input_layer = 'input' - output_layers = ['v/stack', 'v/Softmax'] - self.input_tensor = self.freeze_graph.get_tensor_by_name(input_layer + ":0") - self.output_tensors = [self.freeze_graph.get_tensor_by_name(x + ":0") for x in output_layers] - - def load_graph(self, frozen_graph_filename): - print('load graph from: ' + frozen_graph_filename) - with tf.gfile.GFile(frozen_graph_filename, "rb") as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - - # Then, we import the graph_def into a new Graph and returns it - with tf.Graph().as_default() as graph: - # Since we load everything in a new graph, this is not needed - tf.import_graph_def(graph_def, name='') - return graph - - def run_benchmark(self): - print("Inference with dummy data.") - with tf.Session(graph=self.freeze_graph, config=self.config) as sess: - - input_images = sess.run(tf.truncated_normal( - [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], - dtype=tf.float32, - stddev=10, - name='synthetic_images')) - - total_iter = 1000 - warmup_iter = 200 - ttime = 0.0 - - print('total iteration is {0}'.format(str(total_iter))) - print('warm up iteration is {0}'.format(str(warmup_iter))) - - for step in range(total_iter): - start_time = time.time() - _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) - end_time = time.time() - - duration = end_time - start_time - if (step + 1) % 10 == 0: - print('steps = {0}, {1} sec'.format(str(step), str(duration))) - - if step + 1 > warmup_iter: - ttime += duration - - total_batches = total_iter - warmup_iter - print('Batchsize: {0}'.format(str(self.args.batch_size))) - print('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) - print('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) - - def __get_input(self): - preprocessor = COCOPreprocessor( - batch_size=self.args.batch_size, - output_shapes=[[self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]], - num_splits=1, - dtype=tf.float32, - train=False, - distortions=True, - resize_method=None, - shift_ratio=0 - ) - - class params: - datasets_repeat_cached_sample = False - - self.params = params() - self.dataset = datasets.create_dataset(self.args.data_location, 'coco') - - return preprocessor.minibatch( - self.dataset, - subset='validation', - params=self.params, - shift_ratio=0) - - def accuracy_check(self): - print(self.args) - input_list = self.__get_input() - ds_init = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS) - - ds_sess = tf.Session() - params = benchmark_cnn.make_params(data_dir=self.args.data_location) - self.model = ssd_model.SSD300Model(params=params) - - print("Inference for accuracy check.") - with tf.Session(graph=self.freeze_graph, config=self.config) as sess: - ds_sess.run(ds_init) - global_step = 0 - - for _ in range(self.num_batches): - results = {} - input_lists = ds_sess.run(input_list) - input_images = input_lists[0][0] - input_ids = input_lists[3][0] - input_raw_shapes = input_lists[4][0] - - result = sess.run(self.output_tensors, {self.input_tensor: input_images}) - # Make global_step available in results for postprocessing. - results['global_step'] = global_step - results[ssd_constants.SOURCE_ID] = input_ids - results[ssd_constants.RAW_SHAPE] = input_raw_shapes - - results[ssd_constants.PRED_BOXES] = result[0] - results[ssd_constants.PRED_SCORES] = result[1] - - results = self.model.postprocess(results) - - def run(self): - if self.args.accuracy_only: - self.accuracy_check() - else: - self.run_benchmark() + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph.', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + + # parse the arguments + self.args = arg_parser.parse_args() + + self.freeze_graph = self.load_graph(self.args.input_graph) + self.config = tf.compat.v1.ConfigProto() + self.config.intra_op_parallelism_threads = self.args.num_intra_threads + self.config.inter_op_parallelism_threads = self.args.num_inter_threads + + if self.args.batch_size == -1: + self.args.batch_size = 64 + + self.num_batches = (ssd_constants.COCO_NUM_VAL_IMAGES // self.args.batch_size) + \ + (ssd_constants.COCO_NUM_VAL_IMAGES % self.args.batch_size > 0) + + input_layer = 'input' + output_layers = ['v/stack', 'v/Softmax'] + self.input_tensor = self.freeze_graph.get_tensor_by_name(input_layer + ":0") + self.output_tensors = [self.freeze_graph.get_tensor_by_name(x + ":0") for x in output_layers] + + + def load_graph(self, frozen_graph_filename): + print('load graph from: ' + frozen_graph_filename) + with tf.io.gfile.GFile(frozen_graph_filename, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + + # Then, we import the graph_def into a new Graph and returns it + with tf.Graph().as_default() as graph: + # Since we load everything in a new graph, this is not needed + tf.import_graph_def(graph_def, name='') + return graph + + def run_benchmark(self): + print("Inference with dummy data.") + with tf.compat.v1.Session(graph=self.freeze_graph, config=self.config) as sess: + + input_images = sess.run(tf.random.truncated_normal( + [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], + dtype=tf.float32, + stddev=10, + name='synthetic_images')) + + total_iter = 1000 + warmup_iter = 200 + ttime = 0.0 + + print('total iteration is {0}'.format(str(total_iter))) + print('warm up iteration is {0}'.format(str(warmup_iter))) + + for step in range(total_iter): + start_time = time.time() + _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) + end_time = time.time() + + duration = end_time - start_time + if (step + 1) % 10 == 0: + print('steps = {0}, {1} sec'.format(str(step), str(duration))) + + if step + 1 > warmup_iter: + ttime += duration + + total_batches = total_iter - warmup_iter + print ('Batchsize: {0}'.format(str(self.args.batch_size))) + print ('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) + print ('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) + + + def __get_input(self): + preprocessor = COCOPreprocessor( + batch_size=self.args.batch_size, + output_shapes=[[self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]], + num_splits=1, + dtype=tf.float32, + train=False, + distortions=True, + resize_method=None, + shift_ratio=0 + ) + + class params: + datasets_repeat_cached_sample = False + + self.params = params() + self.dataset = datasets.create_dataset(self.args.data_location, 'coco') + + return preprocessor.minibatch( + self.dataset, + subset='validation', + params=self.params, + shift_ratio=0) + + + def accuracy_check(self): + print(self.args) + input_list = self.__get_input() + ds_init = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) + + ds_sess = tf.compat.v1.Session() + params = benchmark_cnn.make_params(data_dir=self.args.data_location) + self.model = ssd_model.SSD300Model(params=params) + + print("Inference for accuracy check.") + with tf.compat.v1.Session(graph=self.freeze_graph, config=self.config) as sess: + ds_sess.run(ds_init) + global_step = 0 + + for _ in range(self.num_batches): + results = {} + input_lists = ds_sess.run(input_list) + input_images = input_lists[0][0] + input_ids = input_lists[3][0] + input_raw_shapes = input_lists[4][0] + + result = sess.run(self.output_tensors, {self.input_tensor: input_images}) + # Make global_step available in results for postprocessing. + results['global_step'] = global_step + results[ssd_constants.SOURCE_ID] = input_ids + results[ssd_constants.RAW_SHAPE] = input_raw_shapes + + results[ssd_constants.PRED_BOXES] = result[0] + results[ssd_constants.PRED_SCORES] = result[1] + + results = self.model.postprocess(results) + + + + def run(self): + if self.args.accuracy_only: + self.accuracy_check() + else: + self.run_benchmark() + if __name__ == "__main__": - infer = ssd_resnet34_infer() - infer.run() + infer = ssd_resnet34_infer() + infer.run() + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py index fd96ab7af..5b3325518 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py index e544758fd..384d6ca1d 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py @@ -18,7 +18,6 @@ # -import os import tensorflow as tf import time @@ -32,173 +31,181 @@ IMAGE_SIZE = 300 +import os class ssd_resnet34_infer: - def __init__(self): - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=-1) - - arg_parser.add_argument('-e', "--inter-op-parallelism-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--intra-op-parallelism-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph.', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - - arg_parser.add_argument("--results-file-path", - help="File path for the inference results", - dest="results_file_path", default=None) - - # parse the arguments - self.args = arg_parser.parse_args() - - self.freeze_graph = self.load_graph(self.args.input_graph) - self.config = tf.ConfigProto() - self.config.intra_op_parallelism_threads = self.args.num_intra_threads - self.config.inter_op_parallelism_threads = self.args.num_inter_threads - - if self.args.batch_size == -1: - self.args.batch_size = 64 - - self.num_batches = (ssd_constants.COCO_NUM_VAL_IMAGES // self.args.batch_size) + \ - (ssd_constants.COCO_NUM_VAL_IMAGES % self.args.batch_size > 0) - - input_layer = 'input' - output_layers = ['v/stack', 'v/Softmax'] - self.input_tensor = self.freeze_graph.get_tensor_by_name(input_layer + ":0") - self.output_tensors = [self.freeze_graph.get_tensor_by_name(x + ":0") for x in output_layers] - - def load_graph(self, frozen_graph_filename): - print('load graph from: ' + frozen_graph_filename) - with tf.gfile.GFile(frozen_graph_filename, "rb") as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - - # Then, we import the graph_def into a new Graph and returns it - with tf.Graph().as_default() as graph: - # Since we load everything in a new graph, this is not needed - tf.import_graph_def(graph_def, name='') - return graph - - def run_benchmark(self): - print("Inference with dummy data.") - with tf.Session(graph=self.freeze_graph, config=self.config) as sess: - - input_images = sess.run(tf.truncated_normal( - [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], - dtype=tf.float32, - stddev=10, - name='synthetic_images')) - - total_iter = 1000 - warmup_iter = 200 - ttime = 0.0 - - print('total iteration is {0}'.format(str(total_iter))) - print('warm up iteration is {0}'.format(str(warmup_iter))) - - for step in range(total_iter): - start_time = time.time() - _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) - end_time = time.time() - - duration = end_time - start_time - if (step + 1) % 10 == 0: - print('steps = {0}, {1} sec'.format(str(step), str(duration))) - - if step + 1 > warmup_iter: - ttime += duration - - total_batches = total_iter - warmup_iter - print('Batchsize: {0}'.format(str(self.args.batch_size))) - print('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) - print('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) - - def __get_input(self): - preprocessor = COCOPreprocessor( - batch_size=self.args.batch_size, - output_shapes=[[self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]], - num_splits=1, - dtype=tf.float32, - train=False, - distortions=True, - resize_method=None, - shift_ratio=0 - ) - - class params: - datasets_repeat_cached_sample = False - - self.params = params() - self.dataset = datasets.create_dataset(self.args.data_location, 'coco') - - return preprocessor.minibatch( - self.dataset, - subset='validation', - params=self.params, - shift_ratio=0) - - def accuracy_check(self): - print(self.args) - input_list = self.__get_input() - ds_init = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS) - - ds_sess = tf.Session() - params = benchmark_cnn.make_params(data_dir=self.args.data_location) - self.model = ssd_model.SSD300Model(params=params) - - print("Inference for accuracy check.") - with tf.Session(graph=self.freeze_graph, config=self.config) as sess: - ds_sess.run(ds_init) - global_step = 0 - - for _ in range(self.num_batches): - results = {} - input_lists = ds_sess.run(input_list) - input_images = input_lists[0][0] - input_ids = input_lists[3][0] - input_raw_shapes = input_lists[4][0] - - result = sess.run(self.output_tensors, {self.input_tensor: input_images}) - # Make global_step available in results for postprocessing. - results['global_step'] = global_step - results[ssd_constants.SOURCE_ID] = input_ids - results[ssd_constants.RAW_SHAPE] = input_raw_shapes - - results[ssd_constants.PRED_BOXES] = result[0] - results[ssd_constants.PRED_SCORES] = result[1] - - results = self.model.postprocess(results) - - def run(self): - if self.args.accuracy_only: - self.accuracy_check() - else: - self.run_benchmark() + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph.', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + + # parse the arguments + self.args = arg_parser.parse_args() + + self.freeze_graph = self.load_graph(self.args.input_graph) + self.config = tf.compat.v1.ConfigProto() + self.config.intra_op_parallelism_threads = self.args.num_intra_threads + self.config.inter_op_parallelism_threads = self.args.num_inter_threads + + if self.args.batch_size == -1: + self.args.batch_size = 64 + + self.num_batches = (ssd_constants.COCO_NUM_VAL_IMAGES // self.args.batch_size) + \ + (ssd_constants.COCO_NUM_VAL_IMAGES % self.args.batch_size > 0) + + input_layer = 'input' + output_layers = ['v/stack', 'v/Softmax'] + self.input_tensor = self.freeze_graph.get_tensor_by_name(input_layer + ":0") + self.output_tensors = [self.freeze_graph.get_tensor_by_name(x + ":0") for x in output_layers] + + + def load_graph(self, frozen_graph_filename): + print('load graph from: ' + frozen_graph_filename) + with tf.io.gfile.GFile(frozen_graph_filename, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + + # Then, we import the graph_def into a new Graph and returns it + with tf.Graph().as_default() as graph: + # Since we load everything in a new graph, this is not needed + tf.import_graph_def(graph_def, name='') + return graph + + def run_benchmark(self): + print("Inference with dummy data.") + with tf.compat.v1.Session(graph=self.freeze_graph, config=self.config) as sess: + + input_images = sess.run(tf.random.truncated_normal( + [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], + dtype=tf.float32, + stddev=10, + name='synthetic_images')) + + total_iter = 1000 + warmup_iter = 200 + ttime = 0.0 + + print('total iteration is {0}'.format(str(total_iter))) + print('warm up iteration is {0}'.format(str(warmup_iter))) + + for step in range(total_iter): + start_time = time.time() + _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) + end_time = time.time() + + duration = end_time - start_time + if (step + 1) % 10 == 0: + print('steps = {0}, {1} sec'.format(str(step), str(duration))) + + if step + 1 > warmup_iter: + ttime += duration + + total_batches = total_iter - warmup_iter + print ('Batchsize: {0}'.format(str(self.args.batch_size))) + print ('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) + print ('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) + + + def __get_input(self): + preprocessor = COCOPreprocessor( + batch_size=self.args.batch_size, + output_shapes=[[self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]], + num_splits=1, + dtype=tf.float32, + train=False, + distortions=True, + resize_method=None, + shift_ratio=0 + ) + + class params: + datasets_repeat_cached_sample = False + + self.params = params() + self.dataset = datasets.create_dataset(self.args.data_location, 'coco') + + return preprocessor.minibatch( + self.dataset, + subset='validation', + params=self.params, + shift_ratio=0) + + + def accuracy_check(self): + print(self.args) + input_list = self.__get_input() + ds_init = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) + + ds_sess = tf.compat.v1.Session() + params = benchmark_cnn.make_params(data_dir=self.args.data_location) + self.model = ssd_model.SSD300Model(params=params) + + print("Inference for accuracy check.") + with tf.compat.v1.Session(graph=self.freeze_graph, config=self.config) as sess: + ds_sess.run(ds_init) + global_step = 0 + + for _ in range(self.num_batches): + results = {} + input_lists = ds_sess.run(input_list) + input_images = input_lists[0][0] + input_ids = input_lists[3][0] + input_raw_shapes = input_lists[4][0] + + result = sess.run(self.output_tensors, {self.input_tensor: input_images}) + # Make global_step available in results for postprocessing. + results['global_step'] = global_step + results[ssd_constants.SOURCE_ID] = input_ids + results[ssd_constants.RAW_SHAPE] = input_raw_shapes + + results[ssd_constants.PRED_BOXES] = result[0] + results[ssd_constants.PRED_SCORES] = result[1] + + results = self.model.postprocess(results) + + + + def run(self): + if self.args.accuracy_only: + self.accuracy_check() + else: + self.run_benchmark() + if __name__ == "__main__": - infer = ssd_resnet34_infer() - infer.run() + infer = ssd_resnet34_infer() + infer.run() + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_benchmarks_tf2.0.patch b/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_benchmarks_tf2.0.patch new file mode 100644 index 000000000..dd35c6136 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_benchmarks_tf2.0.patch @@ -0,0 +1,655 @@ +diff --git a/scripts/tf_cnn_benchmarks/allreduce.py b/scripts/tf_cnn_benchmarks/allreduce.py +index 56d8c88..dddf57d 100644 +--- a/scripts/tf_cnn_benchmarks/allreduce.py ++++ b/scripts/tf_cnn_benchmarks/allreduce.py +@@ -24,7 +24,7 @@ import re + from six.moves import xrange # pylint: disable=redefined-builtin + import tensorflow as tf + +-from tensorflow.contrib.all_reduce.python import all_reduce ++from tensorflow.python.distribute import all_reduce + from tensorflow.python.framework import device as pydev + from tensorflow.python.framework import ops + from tensorflow.python.ops import collective_ops +diff --git a/scripts/tf_cnn_benchmarks/models/model_config.py b/scripts/tf_cnn_benchmarks/models/model_config.py +index 9b8a8f6..6ef2bf6 100644 +--- a/scripts/tf_cnn_benchmarks/models/model_config.py ++++ b/scripts/tf_cnn_benchmarks/models/model_config.py +@@ -27,15 +27,15 @@ from models import densenet_model + from models import googlenet_model + from models import inception_model + from models import lenet_model +-from models import mobilenet_v2 +-from models import nasnet_model ++#from models import mobilenet_v2 ++#from models import nasnet_model + from models import official_resnet_model + from models import overfeat_model + from models import resnet_model + from models import ssd_model + from models import trivial_model + from models import vgg_model +-from models.experimental import deepspeech ++#from models.experimental import deepspeech + from models.experimental import official_ncf_model + + +@@ -81,9 +81,9 @@ _model_name_to_imagenet_model = { + 'resnet101_v2': resnet_model.create_resnet101_v2_model, + 'resnet152': resnet_model.create_resnet152_model, + 'resnet152_v2': resnet_model.create_resnet152_v2_model, +- 'nasnet': nasnet_model.NasnetModel, +- 'nasnetlarge': nasnet_model.NasnetLargeModel, +- 'mobilenet': mobilenet_v2.MobilenetModel, ++ #'nasnet': nasnet_model.NasnetModel, ++ #'nasnetlarge': nasnet_model.NasnetLargeModel, ++ #'mobilenet': mobilenet_v2.MobilenetModel, + 'ncf': official_ncf_model.NcfModel, + } + +@@ -104,7 +104,7 @@ _model_name_to_cifar_model = { + 'densenet40_k12': densenet_model.create_densenet40_k12_model, + 'densenet100_k12': densenet_model.create_densenet100_k12_model, + 'densenet100_k24': densenet_model.create_densenet100_k24_model, +- 'nasnet': nasnet_model.NasnetCifarModel, ++ #'nasnet': nasnet_model.NasnetCifarModel, + } + + +@@ -120,8 +120,8 @@ def _get_model_map(dataset_name): + return _model_name_to_cifar_model + elif dataset_name in ('imagenet', 'synthetic'): + return _model_name_to_imagenet_model +- elif dataset_name == 'librispeech': +- return {'deepspeech2': deepspeech.DeepSpeech2Model} ++ #elif dataset_name == 'librispeech': ++ # return {'deepspeech2': deepspeech.DeepSpeech2Model} + elif dataset_name == 'coco': + return _model_name_to_object_detection_model + else: +diff --git a/scripts/tf_cnn_benchmarks/preprocessing.py b/scripts/tf_cnn_benchmarks/preprocessing.py +index a6ceb7c..b2c48e9 100644 +--- a/scripts/tf_cnn_benchmarks/preprocessing.py ++++ b/scripts/tf_cnn_benchmarks/preprocessing.py +@@ -25,8 +25,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin + import tensorflow as tf + + import cnn_util +-from tensorflow.contrib.data.python.ops import threadpool +-from tensorflow.contrib.image.python.ops import distort_image_ops ++from tensorflow.python.data.experimental.ops import threadpool ++from tensorflow_addons.image import distort_image_ops + from tensorflow.python.data.ops import multi_device_iterator_ops + from tensorflow.python.framework import function + from tensorflow.python.layers import utils +@@ -34,6 +34,7 @@ from tensorflow.python.ops import data_flow_ops + from tensorflow.python.platform import gfile + import mlperf + ++tf.compat.v1.disable_eager_execution() + + def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. +@@ -72,14 +73,14 @@ def parse_example_proto(example_serialized): + """ + # Dense features in Example proto. + feature_map = { +- 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, ++ 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), +- 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, ++ 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), +- 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, ++ 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, + default_value=''), + } +- sparse_float32 = tf.VarLenFeature(dtype=tf.float32) ++ sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', +@@ -87,7 +88,7 @@ def parse_example_proto(example_serialized): + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + +- features = tf.parse_single_example(example_serialized, feature_map) ++ features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) +@@ -101,7 +102,7 @@ def parse_example_proto(example_serialized): + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) +- bbox = tf.transpose(bbox, [0, 2, 1]) ++ bbox = tf.transpose(a=bbox, perm=[0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] + +@@ -167,7 +168,7 @@ def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): +- with tf.name_scope(scope or 'decode_jpeg'): ++ with tf.compat.v1.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height +@@ -227,12 +228,12 @@ def eval_image(image, + """ + # TODO(reedwm): Currently we resize then crop. Investigate if it's faster to + # crop then resize. +- with tf.name_scope('eval_image'): ++ with tf.compat.v1.name_scope('eval_image'): + if summary_verbosity >= 3: +- tf.summary.image( ++ tf.compat.v1.summary.image( + 'original_image', tf.expand_dims(image, 0)) + +- shape = tf.shape(image) ++ shape = tf.shape(input=image) + image_height = shape[0] + image_width = shape[1] + image_height_float = tf.cast(image_height, tf.float32) +@@ -259,10 +260,9 @@ def eval_image(image, + + # Resize the image to shape (`resize_height`, `resize_width`) + image_resize_method = get_image_resize_method(resize_method, batch_position) +- distorted_image = tf.image.resize_images(image, ++ distorted_image = tf.image.resize(image, + [resize_height, resize_width], +- image_resize_method, +- align_corners=False) ++ image_resize_method) + + # Do a central crop of the image to size (height, width). + # MLPerf requires us to log (height, width) with two different keys. +@@ -277,7 +277,7 @@ def eval_image(image, + + distorted_image.set_shape([height, width, 3]) + if summary_verbosity >= 3: +- tf.summary.image( ++ tf.compat.v1.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image +@@ -322,7 +322,7 @@ def train_image(image_buffer, + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): +- with tf.name_scope(scope or 'distort_image'): ++ with tf.compat.v1.name_scope(scope or 'distort_image'): + # A large fraction of image datasets contain a human-annotated bounding box + # delineating the region of the image containing the object of interest. We + # choose to create a new bounding box for the object which is a randomly +@@ -344,7 +344,7 @@ def train_image(image_buffer, + value=max_attempts) + + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( +- tf.image.extract_jpeg_shape(image_buffer), ++ image_size=tf.image.extract_jpeg_shape(image_buffer), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, +@@ -358,7 +358,7 @@ def train_image(image_buffer, + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) +- tf.summary.image( ++ tf.compat.v1.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + +@@ -381,15 +381,14 @@ def train_image(image_buffer, + # ratio is not respected. + mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width]) + image_resize_method = get_image_resize_method(resize_method, batch_position) +- distorted_image = tf.image.resize_images( ++ distorted_image = tf.image.resize( + distorted_image, [height, width], +- image_resize_method, +- align_corners=False) ++ image_resize_method) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if summary_verbosity >= 3: +- tf.summary.image('cropped_resized_maybe_flipped_image', ++ tf.compat.v1.summary.image('cropped_resized_maybe_flipped_image', + tf.expand_dims(distorted_image, 0)) + + if distortions: +@@ -404,7 +403,7 @@ def train_image(image_buffer, + distorted_image *= 255 + + if summary_verbosity >= 3: +- tf.summary.image( ++ tf.compat.v1.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image +@@ -429,7 +428,7 @@ def distort_color(image, batch_position=0, distort_color_in_yiq=False, + Returns: + color-distorted image + """ +- with tf.name_scope(scope or 'distort_color'): ++ with tf.compat.v1.name_scope(scope or 'distort_color'): + + def distort_fn_0(image=image): + """Variant 0 of distort function.""" +@@ -495,7 +494,7 @@ class InputPreprocessor(object): + """Creates a MultiDeviceIterator.""" + assert self.supports_datasets() + assert num_splits == len(gpu_devices) +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + if doing_eval: + subset = 'validation' + else: +@@ -522,7 +521,7 @@ class InputPreprocessor(object): + gpu_devices, + source_device=cpu_device, + max_buffer_size=params.multi_device_iterator_max_buffer_size) +- tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, ++ tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS, + multi_device_iterator.initializer) + return multi_device_iterator + +@@ -544,7 +543,7 @@ class InputPreprocessor(object): + + def create_iterator(self, ds): + ds_iterator = tf.compat.v1.data.make_initializable_iterator(ds) +- tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, ++ tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS, + ds_iterator.initializer) + return ds_iterator + +@@ -558,7 +557,7 @@ class InputPreprocessor(object): + assert self.supports_datasets() + batch_size_per_split = batch_size // num_splits + assert batch_size_per_split == model_input_shapes[0][0] +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + ds = self.create_dataset(batch_size, num_splits, batch_size_per_split, + dataset, subset, train, + datasets_repeat_cached_sample, num_threads, +@@ -572,7 +571,7 @@ class InputPreprocessor(object): + + @function.Defun(tf.string) + def _fn(h): +- remote_iterator = tf.data.Iterator.from_string_handle( ++ remote_iterator = tf.compat.v1.data.Iterator.from_string_handle( + h, ds_iterator.output_types, ds_iterator.output_shapes) + input_list = remote_iterator.get_next() + reshaped_input_list = [ +@@ -730,7 +729,7 @@ class RecordInputImagePreprocessor(BaseImagePreprocessor): + shift_ratio=-1): + if shift_ratio < 0: + shift_ratio = self.shift_ratio +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + # Build final results per split. + images = [[] for _ in range(self.num_splits)] + labels = [[] for _ in range(self.num_splits)] +@@ -795,7 +794,7 @@ class ImagenetPreprocessor(RecordInputImagePreprocessor): + try: + from official.resnet.imagenet_preprocessing import preprocess_image + except ImportError: +- tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.') ++ tf.compat.v1.logging.fatal('Please include tensorflow/models to the PYTHONPATH.') + raise + if self.train: + image = preprocess_image( +@@ -824,28 +823,28 @@ class Cifar10ImagePreprocessor(BaseImagePreprocessor): + Returns: + distorted image. + """ +- image = tf.image.resize_image_with_crop_or_pad( ++ image = tf.image.resize_with_crop_or_pad( + image, self.height + 8, self.width + 8) +- distorted_image = tf.random_crop(image, ++ distorted_image = tf.image.random_crop(image, + [self.height, self.width, self.depth]) + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + if self.summary_verbosity >= 3: +- tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0)) ++ tf.compat.v1.summary.image('distorted_image', tf.expand_dims(distorted_image, 0)) + return distorted_image + + def _eval_image(self, image): + """Get the image for model evaluation.""" +- distorted_image = tf.image.resize_image_with_crop_or_pad( ++ distorted_image = tf.image.resize_with_crop_or_pad( + image, self.width, self.height) + if self.summary_verbosity >= 3: +- tf.summary.image('cropped.image', tf.expand_dims(distorted_image, 0)) ++ tf.compat.v1.summary.image('cropped.image', tf.expand_dims(distorted_image, 0)) + return distorted_image + + def preprocess(self, raw_image): + """Preprocessing raw image.""" + if self.summary_verbosity >= 3: +- tf.summary.image('raw.image', tf.expand_dims(raw_image, 0)) ++ tf.compat.v1.summary.image('raw.image', tf.expand_dims(raw_image, 0)) + if self.train and self.distortions: + image = self._distort_image(raw_image) + else: +@@ -860,11 +859,11 @@ class Cifar10ImagePreprocessor(BaseImagePreprocessor): + shift_ratio=-1): + # TODO(jsimsa): Implement datasets code path + del shift_ratio, params +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + all_images, all_labels = dataset.read_data_files(subset) + all_images = tf.constant(all_images) + all_labels = tf.constant(all_labels) +- input_image, input_label = tf.train.slice_input_producer( ++ input_image, input_label = tf.compat.v1.train.slice_input_producer( + [all_images, all_labels]) + input_image = tf.cast(input_image, self.dtype) + input_label = tf.cast(input_label, tf.int32) +@@ -872,7 +871,7 @@ class Cifar10ImagePreprocessor(BaseImagePreprocessor): + min_fraction_of_examples_in_queue = 0.4 + min_queue_examples = int(dataset.num_examples_per_epoch(subset) * + min_fraction_of_examples_in_queue) +- raw_images, raw_labels = tf.train.shuffle_batch( ++ raw_images, raw_labels = tf.compat.v1.train.shuffle_batch( + [input_image, input_label], batch_size=self.batch_size, + capacity=min_queue_examples + 3 * self.batch_size, + min_after_dequeue=min_queue_examples) +@@ -891,7 +890,7 @@ class Cifar10ImagePreprocessor(BaseImagePreprocessor): + # reshape to the format returned by minibatch. + raw_image = tf.reshape(raw_images[i], + [dataset.depth, dataset.height, dataset.width]) +- raw_image = tf.transpose(raw_image, [1, 2, 0]) ++ raw_image = tf.transpose(a=raw_image, perm=[1, 2, 0]) + image = self.preprocess(raw_image) + images[split_index].append(image) + +@@ -912,7 +911,7 @@ class COCOPreprocessor(BaseImagePreprocessor): + params, + shift_ratio=-1): + del shift_ratio # Not used when using datasets instead of data_flow_ops +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + ds = self.create_dataset( + self.batch_size, self.num_splits, self.batch_size_per_split, + dataset, subset, self.train, params.datasets_repeat_cached_sample) +@@ -946,13 +945,13 @@ class COCOPreprocessor(BaseImagePreprocessor): + image_buffer = data['image_buffer'] + boxes = data['groundtruth_boxes'] + classes = tf.reshape(data['groundtruth_classes'], [-1, 1]) +- source_id = tf.string_to_number(data['source_id']) ++ source_id = tf.strings.to_number(data['source_id']) + raw_shape = data['raw_shape'] + + ssd_encoder = ssd_dataloader.Encoder() + + # Only 80 of the 90 COCO classes are used. +- class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP) ++ class_map = tf.convert_to_tensor(value=ssd_constants.CLASS_MAP) + classes = tf.gather(class_map, classes) + classes = tf.cast(classes, dtype=tf.float32) + +@@ -984,7 +983,7 @@ class COCOPreprocessor(BaseImagePreprocessor): + + else: + image = tf.image.decode_jpeg(image_buffer) +- image = tf.image.resize_images( ++ image = tf.image.resize( + image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE)) + # resize_image returns image of dtype float32 and does not change its + # range. Divide by 255 to convert image to [0, 1] range. +@@ -996,8 +995,8 @@ class COCOPreprocessor(BaseImagePreprocessor): + def trim_and_pad(inp_tensor): + """Limit the number of boxes, and pad if necessary.""" + inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES] +- num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0] +- inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]]) ++ num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(input=inp_tensor)[0] ++ inp_tensor = tf.pad(tensor=inp_tensor, paddings=[[0, num_pad], [0, 0]]) + return tf.reshape(inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES, + inp_tensor.get_shape()[1]]) + +@@ -1064,7 +1063,7 @@ class COCOPreprocessor(BaseImagePreprocessor): + + ds = ds.map(ssd_dataloader.ssd_parse_example_proto, num_parallel_calls=64) + ds = ds.filter( +- lambda data: tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)) ++ lambda data: tf.greater(tf.shape(input=data['groundtruth_boxes'])[0], 0)) + ds = ds.apply( + tf.data.experimental.map_and_batch( + map_func=self.preprocess, +@@ -1142,12 +1141,12 @@ class TestImagePreprocessor(BaseImagePreprocessor): + fake_labels = cnn_util.roll_numpy_batches(self.fake_labels, self.batch_size, + shift_ratio) + +- with tf.name_scope('batch_processing'): +- image_slice, label_slice = tf.train.slice_input_producer( ++ with tf.compat.v1.name_scope('batch_processing'): ++ image_slice, label_slice = tf.compat.v1.train.slice_input_producer( + [fake_images, fake_labels], + shuffle=False, + name='image_slice') +- raw_images, raw_labels = tf.train.batch( ++ raw_images, raw_labels = tf.compat.v1.train.batch( + [image_slice, label_slice], batch_size=self.batch_size, + name='image_batch') + images = [[] for _ in range(self.num_splits)] +@@ -1247,7 +1246,7 @@ class LibrispeechPreprocessor(InputPreprocessor): + # TODO(laigd): in distributed mode we use shift_ratio so different workers + # won't work on same inputs, so we should respect that. + del shift_ratio +- with tf.name_scope('batch_processing'): ++ with tf.compat.v1.name_scope('batch_processing'): + ds = self.create_dataset( + self.batch_size, + self.num_splits, +@@ -1287,14 +1286,14 @@ class LibrispeechPreprocessor(InputPreprocessor): + del batch_position + assert self.supports_datasets() + context_features = { +- 'labels': tf.VarLenFeature(dtype=tf.int64), +- 'input_length': tf.FixedLenFeature([], dtype=tf.int64), +- 'label_length': tf.FixedLenFeature([], dtype=tf.int64), ++ 'labels': tf.io.VarLenFeature(dtype=tf.int64), ++ 'input_length': tf.io.FixedLenFeature([], dtype=tf.int64), ++ 'label_length': tf.io.FixedLenFeature([], dtype=tf.int64), + } + sequence_features = { +- 'features': tf.FixedLenSequenceFeature([161], dtype=tf.float32) ++ 'features': tf.io.FixedLenSequenceFeature([161], dtype=tf.float32) + } +- context_parsed, sequence_parsed = tf.parse_single_sequence_example( ++ context_parsed, sequence_parsed = tf.io.parse_single_sequence_example( + serialized=value, + context_features=context_features, + sequence_features=sequence_features, +@@ -1306,7 +1305,7 @@ class LibrispeechPreprocessor(InputPreprocessor): + # Label + tf.cast( + tf.reshape( +- tf.sparse_tensor_to_dense(context_parsed['labels']), [-1]), ++ tf.sparse.to_dense(context_parsed['labels']), [-1]), + dtype=tf.int32), + # Input length + tf.cast( +diff --git a/scripts/tf_cnn_benchmarks/ssd_dataloader.py b/scripts/tf_cnn_benchmarks/ssd_dataloader.py +index b4fe986..887f1da 100644 +--- a/scripts/tf_cnn_benchmarks/ssd_dataloader.py ++++ b/scripts/tf_cnn_benchmarks/ssd_dataloader.py +@@ -105,17 +105,17 @@ def calc_iou_tensor(boxes1, boxes2): + b2_left, b2_top, b2_right, b2_bottom = tf.split(boxes2, 4, axis=1) + + # Shape of intersect_* (N, M) +- intersect_left = tf.maximum(b1_left, tf.transpose(b2_left)) +- intersect_top = tf.maximum(b1_top, tf.transpose(b2_top)) +- intersect_right = tf.minimum(b1_right, tf.transpose(b2_right)) +- intersect_bottom = tf.minimum(b1_bottom, tf.transpose(b2_bottom)) ++ intersect_left = tf.maximum(b1_left, tf.transpose(a=b2_left)) ++ intersect_top = tf.maximum(b1_top, tf.transpose(a=b2_top)) ++ intersect_right = tf.minimum(b1_right, tf.transpose(a=b2_right)) ++ intersect_bottom = tf.minimum(b1_bottom, tf.transpose(a=b2_bottom)) + + boxes1_area = (b1_right - b1_left) * (b1_bottom - b1_top) + boxes2_area = (b2_right - b2_left) * (b2_bottom - b2_top) + + intersect = tf.multiply(tf.maximum((intersect_right - intersect_left), 0), + tf.maximum((intersect_bottom - intersect_top), 0)) +- union = boxes1_area + tf.transpose(boxes2_area) - intersect ++ union = boxes1_area + tf.transpose(a=boxes2_area) - intersect + iou = intersect / union + + return iou +@@ -155,18 +155,18 @@ def ssd_parse_example_proto(example_serialized): + raw_shape: [height, width, 3]. + """ + feature_map = { +- 'image/encoded': tf.FixedLenFeature( ++ 'image/encoded': tf.io.FixedLenFeature( + (), dtype=tf.string, default_value=''), +- 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), +- 'image/height': tf.FixedLenFeature((), tf.int64, default_value=1), +- 'image/width': tf.FixedLenFeature((), tf.int64, default_value=1), +- 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), +- 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), +- 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), +- 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), +- 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), ++ 'image/source_id': tf.io.FixedLenFeature((), tf.string, default_value=''), ++ 'image/height': tf.io.FixedLenFeature((), tf.int64, default_value=1), ++ 'image/width': tf.io.FixedLenFeature((), tf.int64, default_value=1), ++ 'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32), ++ 'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32), ++ 'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32), ++ 'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32), ++ 'image/object/class/label': tf.io.VarLenFeature(dtype=tf.int64), + } +- features = tf.parse_single_example(example_serialized, feature_map) ++ features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 1) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 1) +@@ -214,21 +214,21 @@ def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): + cropped_classes: class labels for objects in the cropped region. + """ + +- num_boxes = tf.shape(boxes)[0] ++ num_boxes = tf.shape(input=boxes)[0] + + def no_crop_check(): +- return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32) ++ return (tf.random.uniform(shape=(), minval=0, maxval=1, dtype=tf.float32) + < ssd_constants.P_NO_CROP_PER_PASS) + + def no_crop_proposal(): + return ( + tf.ones((), tf.bool), +- tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32), ++ tf.convert_to_tensor(value=[0, 0, 1, 1], dtype=tf.float32), + tf.ones((num_boxes,), tf.bool), + ) + + def crop_proposal(): +- rand_vec = lambda minval, maxval: tf.random_uniform( ++ rand_vec = lambda minval, maxval: tf.random.uniform( + shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, + dtype=tf.float32) + +@@ -240,14 +240,14 @@ def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): + + ltrb = tf.concat([left, top, right, bottom], axis=1) + +- min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ++ min_iou = tf.random.shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] + ious = calc_iou_tensor(ltrb, boxes) + + # discard any bboxes whose center not in the cropped image + xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], + (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)] + +- masks = tf.reduce_all(tf.stack([ ++ masks = tf.reduce_all(input_tensor=tf.stack([ + tf.greater(xc, tf.tile(left, (1, num_boxes))), + tf.less(xc, tf.tile(right, (1, num_boxes))), + tf.greater(yc, tf.tile(top, (1, num_boxes))), +@@ -257,22 +257,22 @@ def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): + # Checks of whether a crop is valid. + valid_aspect = tf.logical_and(tf.less(height/width, 2), + tf.less(width/height, 2)) +- valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) +- valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) ++ valid_ious = tf.reduce_all(input_tensor=tf.greater(ious, min_iou), axis=1, keepdims=True) ++ valid_masks = tf.reduce_any(input_tensor=masks, axis=1, keepdims=True) + +- valid_all = tf.cast(tf.reduce_all(tf.concat( ++ valid_all = tf.cast(tf.reduce_all(input_tensor=tf.concat( + [valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) + + # One indexed, as zero is needed for the case of no matches. + index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) + + # Either one-hot, or zeros if there is no valid crop. +- selection = tf.equal(tf.reduce_max(index * valid_all), index) ++ selection = tf.equal(tf.reduce_max(input_tensor=index * valid_all), index) + +- use_crop = tf.reduce_any(selection) +- output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast( ++ use_crop = tf.reduce_any(input_tensor=selection) ++ output_ltrb = tf.reduce_sum(input_tensor=tf.multiply(ltrb, tf.tile(tf.cast( + selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) +- output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile( ++ output_masks = tf.reduce_any(input_tensor=tf.logical_and(masks, tf.tile( + selection[:, tf.newaxis], (1, num_boxes))), axis=0) + + return use_crop, output_ltrb, output_masks +@@ -290,7 +290,7 @@ def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): + loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)], + ) + +- filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0) ++ filtered_boxes = tf.boolean_mask(tensor=boxes, mask=box_masks, axis=0) + + mlperf.logger.log(key=mlperf.tags.NUM_CROPPING_ITERATIONS, + value=ssd_constants.NUM_CROP_PASSES) +@@ -330,19 +330,19 @@ def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): + image_buffer, crop_window, channels=3) + + # Resize converts image dtype from uint8 to float32, without rescaling values. +- resized_image = tf.image.resize_images( ++ resized_image = tf.image.resize( + cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE]) + mlperf.logger.log(key=mlperf.tags.INPUT_SIZE, + value=ssd_constants.IMAGE_SIZE) + +- cropped_classes = tf.boolean_mask(classes, box_masks, axis=0) ++ cropped_classes = tf.boolean_mask(tensor=classes, mask=box_masks, axis=0) + + return resized_image, cropped_boxes, cropped_classes + + + def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0): + """Distort the color of the image.""" +- with tf.name_scope('distort_color'): ++ with tf.compat.v1.name_scope('distort_color'): + if brightness > 0: + image = tf.image.random_brightness(image, max_delta=brightness) + if contrast > 0: +@@ -392,7 +392,7 @@ class Encoder(object): + + self.default_boxes = DefaultBoxes()('ltrb') + self.default_boxes = box_list.BoxList( +- tf.convert_to_tensor(self.default_boxes)) ++ tf.convert_to_tensor(value=self.default_boxes)) + self.assigner = target_assigner.TargetAssigner( + similarity_calc, matcher, box_coder) + +@@ -401,5 +401,5 @@ class Encoder(object): + encoded_classes, _, encoded_boxes, _, matches = self.assigner.assign( + self.default_boxes, target_boxes, gt_labels) + num_matched_boxes = tf.reduce_sum( +- tf.cast(tf.not_equal(matches, -1), tf.float32)) ++ input_tensor=tf.cast(tf.not_equal(matches, -1), tf.float32)) + return encoded_classes, encoded_boxes, num_matched_boxes diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_models_tf2.0.patch b/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_models_tf2.0.patch new file mode 100644 index 000000000..1708244fd --- /dev/null +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/tensorflow_models_tf2.0.patch @@ -0,0 +1,22 @@ +diff --git a/research/object_detection/matchers/bipartite_matcher.py b/research/object_detection/matchers/bipartite_matcher.py +index 56cff85a..6ffc91df 100644 +--- a/research/object_detection/matchers/bipartite_matcher.py ++++ b/research/object_detection/matchers/bipartite_matcher.py +@@ -12,15 +12,14 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + # ============================================================================== +- ++pass + """Bipartite matcher implementation.""" + + import tensorflow as tf + +-from tensorflow.contrib.image.python.ops import image_ops ++from tensorflow.python.ops import image_ops + from object_detection.core import matcher + +- + class GreedyBipartiteMatcher(matcher.Matcher): + """Wraps a Tensorflow greedy bipartite matcher.""" + diff --git a/models/object_detection/tensorflow/ssd-resnet34/training/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/training/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/training/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/training/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/ssd-resnet34/training/fp32/benchmark_v1.13.diff b/models/object_detection/tensorflow/ssd-resnet34/training/fp32/benchmark_v1.13.diff deleted file mode 100644 index 9c082c9a6..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/training/fp32/benchmark_v1.13.diff +++ /dev/null @@ -1,26 +0,0 @@ -diff --git a/scripts/tf_cnn_benchmarks/datasets.py b/scripts/tf_cnn_benchmarks/datasets.py -index 58c0f0d..46c5322 100644 ---- a/scripts/tf_cnn_benchmarks/datasets.py -+++ b/scripts/tf_cnn_benchmarks/datasets.py -@@ -52,7 +52,7 @@ class Dataset(object): - self._num_classes = num_classes - - def tf_record_pattern(self, subset): -- return os.path.join(self.data_dir, '%s-*-of-*' % subset) -+ return os.path.join(self.data_dir, '*%s*-*-of-*' % subset) - - def reader(self): - return tf.TFRecordReader() -diff --git a/scripts/tf_cnn_benchmarks/preprocessing.py b/scripts/tf_cnn_benchmarks/preprocessing.py -index a6ceb7c..3456c52 100644 ---- a/scripts/tf_cnn_benchmarks/preprocessing.py -+++ b/scripts/tf_cnn_benchmarks/preprocessing.py -@@ -1017,7 +1017,7 @@ class COCOPreprocessor(BaseImagePreprocessor): - subset, - train, - datasets_repeat_cached_sample, -- num_threads=None, -+ num_threads=28, - datasets_use_caching=False, - datasets_parallel_interleave_cycle_length=None, - datasets_sloppy_parallel_interleave=False, diff --git a/models/object_detection/tensorflow/ssd_vgg16/__init__.py b/models/object_detection/tensorflow/ssd_vgg16/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py b/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py deleted file mode 100644 index fd96ab7af..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py b/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py deleted file mode 100644 index 35e26af07..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import math - -import tensorflow as tf -import numpy as np - -from tensorflow.contrib.image.python.ops import image_ops - - -def areas(gt_bboxes): - with tf.name_scope('bboxes_areas', values=[gt_bboxes]): - ymin, xmin, ymax, xmax = tf.split(gt_bboxes, 4, axis=1) - return (xmax - xmin) * (ymax - ymin) - - -def intersection(gt_bboxes, default_bboxes): - with tf.name_scope('bboxes_intersection', values=[gt_bboxes, default_bboxes]): - # num_anchors x 1 - ymin, xmin, ymax, xmax = tf.split(gt_bboxes, 4, axis=1) - # 1 x num_anchors - gt_ymin, gt_xmin, gt_ymax, gt_xmax = [tf.transpose(b, perm=[1, 0]) for b in tf.split(default_bboxes, 4, axis=1)] - # broadcast here to generate the full matrix - int_ymin = tf.maximum(ymin, gt_ymin) - int_xmin = tf.maximum(xmin, gt_xmin) - int_ymax = tf.minimum(ymax, gt_ymax) - int_xmax = tf.minimum(xmax, gt_xmax) - h = tf.maximum(int_ymax - int_ymin, 0.) - w = tf.maximum(int_xmax - int_xmin, 0.) - - return h * w - - -def iou_matrix(gt_bboxes, default_bboxes): - with tf.name_scope('iou_matrix', values=[gt_bboxes, default_bboxes]): - inter_vol = intersection(gt_bboxes, default_bboxes) - # broadcast - union_vol = areas(gt_bboxes) + tf.transpose(areas(default_bboxes), perm=[1, 0]) - inter_vol - - return tf.where(tf.equal(union_vol, 0.0), - tf.zeros_like(inter_vol), tf.truediv(inter_vol, union_vol)) - - -def do_dual_max_match(overlap_matrix, low_thres, high_thres, ignore_between=True, gt_max_first=True): - ''' - overlap_matrix: num_gt * num_anchors - ''' - with tf.name_scope('dual_max_match', values=[overlap_matrix]): - # first match from anchors' side - anchors_to_gt = tf.argmax(overlap_matrix, axis=0) - # the matching degree - match_values = tf.reduce_max(overlap_matrix, axis=0) - - # positive_mask = tf.greater(match_values, high_thres) - less_mask = tf.less(match_values, low_thres) - between_mask = tf.logical_and(tf.less(match_values, high_thres), tf.greater_equal(match_values, low_thres)) - negative_mask = less_mask if ignore_between else between_mask - ignore_mask = between_mask if ignore_between else less_mask - # fill all negative positions with -1, all ignore positions is -2 - match_indices = tf.where(negative_mask, -1 * tf.ones_like(anchors_to_gt), anchors_to_gt) - match_indices = tf.where(ignore_mask, -2 * tf.ones_like(match_indices), match_indices) - - # negtive values has no effect in tf.one_hot, that means all zeros along that axis - # so all positive match positions in anchors_to_gt_mask is 1, all others are 0 - anchors_to_gt_mask = tf.one_hot(tf.clip_by_value(match_indices, -1, tf.cast(tf.shape(overlap_matrix)[0], tf.int64)), - tf.shape(overlap_matrix)[0], on_value=1, off_value=0, axis=0, dtype=tf.int32) - # match from ground truth's side - gt_to_anchors = tf.argmax(overlap_matrix, axis=1) - - if gt_max_first: - # the max match from ground truth's side has higher priority - left_gt_to_anchors_mask = tf.one_hot(gt_to_anchors, tf.shape( - overlap_matrix)[1], on_value=1, off_value=0, axis=1, dtype=tf.int32) - else: - # the max match from anchors' side has higher priority - # use match result from ground truth's side only when the the matching degree from anchors' side is lower than position threshold - left_gt_to_anchors_mask = tf.cast(tf.logical_and(tf.reduce_max(anchors_to_gt_mask, axis=1, keep_dims=True) < 1, - tf.one_hot(gt_to_anchors, tf.shape(overlap_matrix)[1], - on_value=True, off_value=False, axis=1, dtype=tf.bool) - ), tf.int64) - # can not use left_gt_to_anchors_mask here, because there are many ground truthes match to one anchor, we should pick the highest one even when we are merging matching from ground truth side - left_gt_to_anchors_scores = overlap_matrix * tf.to_float(left_gt_to_anchors_mask) - # merge matching results from ground truth's side with the original matching results from anchors' side - # then select all the overlap score of those matching pairs - selected_scores = tf.gather_nd(overlap_matrix, tf.stack([tf.where(tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0, - tf.argmax(left_gt_to_anchors_scores, axis=0), - anchors_to_gt), - tf.range(tf.cast(tf.shape(overlap_matrix)[1], tf.int64))], axis=1)) - # return the matching results for both foreground anchors and background anchors, also with overlap scores - return tf.where(tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0, - tf.argmax(left_gt_to_anchors_scores, axis=0), - match_indices), selected_scores - -# def save_anchors(bboxes, labels, anchors_point): -# if not hasattr(save_image_with_bbox, "counter"): -# save_image_with_bbox.counter = 0 # it doesn't exist yet, so initialize it -# save_image_with_bbox.counter += 1 - -# np.save('./debug/bboxes_{}.npy'.format(save_image_with_bbox.counter), np.copy(bboxes)) -# np.save('./debug/labels_{}.npy'.format(save_image_with_bbox.counter), np.copy(labels)) -# np.save('./debug/anchors_{}.npy'.format(save_image_with_bbox.counter), np.copy(anchors_point)) -# return save_image_with_bbox.counter - - -class AnchorEncoder(object): - def __init__(self, allowed_borders, positive_threshold, ignore_threshold, prior_scaling, clip=False): - super(AnchorEncoder, self).__init__() - self._all_anchors = None - self._allowed_borders = allowed_borders - self._positive_threshold = positive_threshold - self._ignore_threshold = ignore_threshold - self._prior_scaling = prior_scaling - self._clip = clip - - def center2point(self, center_y, center_x, height, width): - return center_y - height / 2., center_x - width / 2., center_y + height / 2., center_x + width / 2., - - def point2center(self, ymin, xmin, ymax, xmax): - height, width = (ymax - ymin), (xmax - xmin) - return ymin + height / 2., xmin + width / 2., height, width - - def encode_all_anchors(self, labels, bboxes, all_anchors, all_num_anchors_depth, all_num_anchors_spatial, debug=False): - # y, x, h, w are all in range [0, 1] relative to the original image size - # shape info: - # y_on_image, x_on_image: layers_shapes[0] * layers_shapes[1] - # h_on_image, w_on_image: num_anchors - assert (len(all_num_anchors_depth) == len(all_num_anchors_spatial)) and ( - len(all_num_anchors_depth) == len(all_anchors)), 'inconsist num layers for anchors.' - with tf.name_scope('encode_all_anchors'): - num_layers = len(all_num_anchors_depth) - list_anchors_ymin = [] - list_anchors_xmin = [] - list_anchors_ymax = [] - list_anchors_xmax = [] - tiled_allowed_borders = [] - for ind, anchor in enumerate(all_anchors): - anchors_ymin_, anchors_xmin_, anchors_ymax_, anchors_xmax_ = self.center2point( - anchor[0], anchor[1], anchor[2], anchor[3]) - - list_anchors_ymin.append(tf.reshape(anchors_ymin_, [-1])) - list_anchors_xmin.append(tf.reshape(anchors_xmin_, [-1])) - list_anchors_ymax.append(tf.reshape(anchors_ymax_, [-1])) - list_anchors_xmax.append(tf.reshape(anchors_xmax_, [-1])) - - tiled_allowed_borders.extend([self._allowed_borders[ind]] * - all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - - anchors_ymin = tf.concat(list_anchors_ymin, 0, name='concat_ymin') - anchors_xmin = tf.concat(list_anchors_xmin, 0, name='concat_xmin') - anchors_ymax = tf.concat(list_anchors_ymax, 0, name='concat_ymax') - anchors_xmax = tf.concat(list_anchors_xmax, 0, name='concat_xmax') - - if self._clip: - anchors_ymin = tf.clip_by_value(anchors_ymin, 0., 1.) - anchors_xmin = tf.clip_by_value(anchors_xmin, 0., 1.) - anchors_ymax = tf.clip_by_value(anchors_ymax, 0., 1.) - anchors_xmax = tf.clip_by_value(anchors_xmax, 0., 1.) - - anchor_allowed_borders = tf.stack(tiled_allowed_borders, 0, name='concat_allowed_borders') - - inside_mask = tf.logical_and(tf.logical_and(anchors_ymin > -anchor_allowed_borders * 1., - anchors_xmin > -anchor_allowed_borders * 1.), - tf.logical_and(anchors_ymax < (1. + anchor_allowed_borders * 1.), - anchors_xmax < (1. + anchor_allowed_borders * 1.))) - - anchors_point = tf.stack([anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1) - - # save_anchors_op = tf.py_func(save_anchors, - # [bboxes, - # labels, - # anchors_point], - # tf.int64, stateful=True) - - # with tf.control_dependencies([save_anchors_op]): - overlap_matrix = iou_matrix(bboxes, anchors_point) * tf.cast(tf.expand_dims(inside_mask, 0), tf.float32) - matched_gt, gt_scores = do_dual_max_match(overlap_matrix, self._ignore_threshold, self._positive_threshold) - # get all positive matching positions - matched_gt_mask = matched_gt > -1 - matched_indices = tf.clip_by_value(matched_gt, 0, tf.int64.max) - # the labels here maybe chaos at those non-positive positions - gt_labels = tf.gather(labels, matched_indices) - # filter the invalid labels - gt_labels = gt_labels * tf.cast(matched_gt_mask, tf.int64) - # set those ignored positions to -1 - gt_labels = gt_labels + (-1 * tf.cast(matched_gt < -1, tf.int64)) - - gt_ymin, gt_xmin, gt_ymax, gt_xmax = tf.unstack(tf.gather(bboxes, matched_indices), 4, axis=-1) - - # transform to center / size. - gt_cy, gt_cx, gt_h, gt_w = self.point2center(gt_ymin, gt_xmin, gt_ymax, gt_xmax) - anchor_cy, anchor_cx, anchor_h, anchor_w = self.point2center( - anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) - # encode features. - # the prior_scaling (in fact is 5 and 10) is use for balance the regression loss of center and with(or height) - gt_cy = (gt_cy - anchor_cy) / anchor_h / self._prior_scaling[0] - gt_cx = (gt_cx - anchor_cx) / anchor_w / self._prior_scaling[1] - gt_h = tf.log(gt_h / anchor_h) / self._prior_scaling[2] - gt_w = tf.log(gt_w / anchor_w) / self._prior_scaling[3] - # now gt_localizations is our regression object, but also maybe chaos at those non-positive positions - if debug: - gt_targets = tf.stack([anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1) - else: - gt_targets = tf.stack([gt_cy, gt_cx, gt_h, gt_w], axis=-1) - # set all targets of non-positive positions to 0 - gt_targets = tf.expand_dims(tf.cast(matched_gt_mask, tf.float32), -1) * gt_targets - self._all_anchors = (anchor_cy, anchor_cx, anchor_h, anchor_w) - return gt_targets, gt_labels, gt_scores - - # return a list, of which each is: - # shape: [feature_h, feature_w, num_anchors, 4] - # order: ymin, xmin, ymax, xmax - def decode_all_anchors(self, pred_location, num_anchors_per_layer): - assert self._all_anchors is not None, 'no anchors to decode.' - with tf.name_scope('decode_all_anchors', values=[pred_location]): - anchor_cy, anchor_cx, anchor_h, anchor_w = self._all_anchors - - pred_h = tf.exp(pred_location[:, -2] * self._prior_scaling[2]) * anchor_h - pred_w = tf.exp(pred_location[:, -1] * self._prior_scaling[3]) * anchor_w - pred_cy = pred_location[:, 0] * self._prior_scaling[0] * anchor_h + anchor_cy - pred_cx = pred_location[:, 1] * self._prior_scaling[1] * anchor_w + anchor_cx - - return tf.split(tf.stack(self.center2point(pred_cy, pred_cx, pred_h, pred_w), axis=-1), num_anchors_per_layer, axis=0) - - def ext_decode_all_anchors(self, pred_location, all_anchors, all_num_anchors_depth, all_num_anchors_spatial): - assert (len(all_num_anchors_depth) == len(all_num_anchors_spatial)) and ( - len(all_num_anchors_depth) == len(all_anchors)), 'inconsist num layers for anchors.' - with tf.name_scope('ext_decode_all_anchors', values=[pred_location]): - num_anchors_per_layer = [] - for ind in range(len(all_anchors)): - num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - - num_layers = len(all_num_anchors_depth) - list_anchors_ymin = [] - list_anchors_xmin = [] - list_anchors_ymax = [] - list_anchors_xmax = [] - tiled_allowed_borders = [] - for ind, anchor in enumerate(all_anchors): - anchors_ymin_, anchors_xmin_, anchors_ymax_, anchors_xmax_ = self.center2point( - anchor[0], anchor[1], anchor[2], anchor[3]) - - list_anchors_ymin.append(tf.reshape(anchors_ymin_, [-1])) - list_anchors_xmin.append(tf.reshape(anchors_xmin_, [-1])) - list_anchors_ymax.append(tf.reshape(anchors_ymax_, [-1])) - list_anchors_xmax.append(tf.reshape(anchors_xmax_, [-1])) - - anchors_ymin = tf.concat(list_anchors_ymin, 0, name='concat_ymin') - anchors_xmin = tf.concat(list_anchors_xmin, 0, name='concat_xmin') - anchors_ymax = tf.concat(list_anchors_ymax, 0, name='concat_ymax') - anchors_xmax = tf.concat(list_anchors_xmax, 0, name='concat_xmax') - - anchor_cy, anchor_cx, anchor_h, anchor_w = self.point2center( - anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) - - pred_h = tf.exp(pred_location[:, -2] * self._prior_scaling[2]) * anchor_h - pred_w = tf.exp(pred_location[:, -1] * self._prior_scaling[3]) * anchor_w - pred_cy = pred_location[:, 0] * self._prior_scaling[0] * anchor_h + anchor_cy - pred_cx = pred_location[:, 1] * self._prior_scaling[1] * anchor_w + anchor_cx - - return tf.split(tf.stack(self.center2point(pred_cy, pred_cx, pred_h, pred_w), axis=-1), num_anchors_per_layer, axis=0) - - -class AnchorCreator(object): - def __init__(self, img_shape, layers_shapes, anchor_scales, extra_anchor_scales, anchor_ratios, layer_steps): - super(AnchorCreator, self).__init__() - # img_shape -> (height, width) - self._img_shape = img_shape - self._layers_shapes = layers_shapes - self._anchor_scales = anchor_scales - self._extra_anchor_scales = extra_anchor_scales - self._anchor_ratios = anchor_ratios - self._layer_steps = layer_steps - self._anchor_offset = [0.5] * len(self._layers_shapes) - - def get_layer_anchors(self, layer_shape, anchor_scale, extra_anchor_scale, anchor_ratio, layer_step, offset=0.5): - ''' assume layer_shape[0] = 6, layer_shape[1] = 5 - x_on_layer = [[0, 1, 2, 3, 4], - [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4]] - y_on_layer = [[0, 0, 0, 0, 0], - [1, 1, 1, 1, 1], - [2, 2, 2, 2, 2], - [3, 3, 3, 3, 3], - [4, 4, 4, 4, 4], - [5, 5, 5, 5, 5]] - ''' - with tf.name_scope('get_layer_anchors'): - x_on_layer, y_on_layer = tf.meshgrid(tf.range(layer_shape[1]), tf.range(layer_shape[0])) - - y_on_image = (tf.cast(y_on_layer, tf.float32) + offset) * layer_step / self._img_shape[0] - x_on_image = (tf.cast(x_on_layer, tf.float32) + offset) * layer_step / self._img_shape[1] - - num_anchors_along_depth = len(anchor_scale) * len(anchor_ratio) + len(extra_anchor_scale) - num_anchors_along_spatial = layer_shape[1] * layer_shape[0] - - list_h_on_image = [] - list_w_on_image = [] - - global_index = 0 - # for square anchors - for _, scale in enumerate(extra_anchor_scale): - list_h_on_image.append(scale) - list_w_on_image.append(scale) - global_index += 1 - # for other aspect ratio anchors - for scale_index, scale in enumerate(anchor_scale): - for ratio_index, ratio in enumerate(anchor_ratio): - list_h_on_image.append(scale / math.sqrt(ratio)) - list_w_on_image.append(scale * math.sqrt(ratio)) - global_index += 1 - # shape info: - # y_on_image, x_on_image: layers_shapes[0] * layers_shapes[1] - # h_on_image, w_on_image: num_anchors_along_depth - return tf.expand_dims(y_on_image, axis=-1), tf.expand_dims(x_on_image, axis=-1), \ - tf.constant(list_h_on_image, dtype=tf.float32), \ - tf.constant(list_w_on_image, dtype=tf.float32), num_anchors_along_depth, num_anchors_along_spatial - - def get_all_anchors(self): - all_anchors = [] - all_num_anchors_depth = [] - all_num_anchors_spatial = [] - for layer_index, layer_shape in enumerate(self._layers_shapes): - anchors_this_layer = self.get_layer_anchors(layer_shape, - self._anchor_scales[layer_index], - self._extra_anchor_scales[layer_index], - self._anchor_ratios[layer_index], - self._layer_steps[layer_index], - self._anchor_offset[layer_index]) - all_anchors.append(anchors_this_layer[:-2]) - all_num_anchors_depth.append(anchors_this_layer[-2]) - all_num_anchors_spatial.append(anchors_this_layer[-1]) - return all_anchors, all_num_anchors_depth, all_num_anchors_spatial diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py b/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py deleted file mode 100644 index 477200916..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import time -from argparse import ArgumentParser -import sys -from google.protobuf import text_format -import tensorflow as tf - -from dataset import dataset_common -from preprocessing import ssd_preprocessing -import anchor_manipulator - -SSD_VGG16_IMAGE_SIZE = 300 -NUM_CLASSES = 81 -NEGATIVE_RATIO = 1.0 -SELECT_THRESHOLD = 0.1 -MATCH_THRESHOLD = 0.5 -NEG_THRESHOLD = 0.5 -DATA_FORMAT = 'channels_last' -NUM_READERS = 10 -NUM_PREPROCESSING_THREADS = 28 - - -def input_fn(dataset_pattern='val-*', batch_size=1, data_location=None): - out_shape = [SSD_VGG16_IMAGE_SIZE] * 2 - anchor_creator = anchor_manipulator.AnchorCreator(out_shape, - layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), - (1, 1)], - anchor_scales=[(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), - (0.9,)], - extra_anchor_scales=[(0.1414,), (0.2739,), (0.4541,), (0.6315,), - (0.8078,), (0.9836,)], - anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), - (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), - (1., 2., .5), (1., 2., .5)], - layer_steps=[8, 16, 32, 64, 100, 300]) - all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() - - num_anchors_per_layer = [] - for ind in range(len(all_anchors)): - num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - - anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders=[1.0] * 6, - positive_threshold=MATCH_THRESHOLD, - ignore_threshold=NEG_THRESHOLD, - prior_scaling=[0.1, 0.1, 0.2, 0.2]) - - def image_preprocessing_fn(image_, labels_, bboxes_): - return ssd_preprocessing.preprocess_image(image_, labels_, - bboxes_, out_shape, - is_training=False, - data_format=DATA_FORMAT, - output_rgb=False) - - def anchor_encoder_fn(glabels_, gbboxes_): - return anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, - all_anchors, - all_num_anchors_depth, - all_num_anchors_spatial) - - image, filename, shape, loc_targets, cls_targets, match_scores = \ - dataset_common.slim_get_batch(NUM_CLASSES, - batch_size, - 'val', - os.path.join( - data_location, - dataset_pattern), - NUM_READERS, - NUM_PREPROCESSING_THREADS, - image_preprocessing_fn, - anchor_encoder_fn, - num_epochs=1, - is_training=False) - return image, filename, shape - - -class EvaluateSSDModel(): - def __init__(self): - - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-b', "--batch-size", - help="Specify the batch size. If this " - "parameter is not specified or is -1, the " - "largest ideal batch size for the model will " - "be used.", - dest="batch_size", type=int, default=1) - - arg_parser.add_argument('-e', "--num-inter-threads", - help='The number of inter-thread.', - dest='num_inter_threads', type=int, default=0) - - arg_parser.add_argument('-a', "--num-intra-threads", - help='The number of intra-thread.', - dest='num_intra_threads', type=int, default=0) - - arg_parser.add_argument('--data-num-inter-threads', dest='data_num_inter_threads', - help='number threads across operators', - type=int, default=21) - - arg_parser.add_argument('--data-num-intra-threads', dest='data_num_intra_threads', - help='number threads for data layer operator', - type=int, default=28) - - arg_parser.add_argument('--kmp-blocktime', dest='kmp_blocktime', - help='number of kmp blocktime', - type=int, default=1) - - arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') - - arg_parser.add_argument('-d', "--data-location", - help='Specify the location of the data. ' - 'If this parameter is not specified, ' - 'the benchmark will use random/dummy data.', - dest="data_location", default=None) - - arg_parser.add_argument('-r', "--accuracy-only", - help='For accuracy measurement only.', - dest='accuracy_only', action='store_true') - - arg_parser.add_argument("--warmup-steps", type=int, default=10, - help="number of warmup steps") - - arg_parser.add_argument("--steps", type=int, default=50, - help="number of steps") - - self.args = arg_parser.parse_args() - - os.environ["KMP_BLOCKTIME"] = str(self.args.kmp_blocktime) - - def eval(self): - - data_config = tf.ConfigProto() - data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads - data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads - data_config.use_per_session_threads = 1 - - infer_config = tf.ConfigProto() - infer_config.inter_op_parallelism_threads = self.args.num_inter_threads # self.args.num_inter_threads - infer_config.intra_op_parallelism_threads = self.args.num_intra_threads # self.args.num_intra_threads - infer_config.use_per_session_threads = 1 - - data_graph = tf.Graph() - with data_graph.as_default(): - if self.args.data_location: # real data - image, filename, shape = \ - input_fn(dataset_pattern='val-*', batch_size=self.args.batch_size, - data_location=self.args.data_location) - else: # dummy data - input_shape = [self.args.batch_size, SSD_VGG16_IMAGE_SIZE, SSD_VGG16_IMAGE_SIZE, 3] - image = tf.random.uniform(input_shape, -123.68, 151.06, dtype=tf.float32, name='synthetic_images') - - infer_graph = tf.Graph() - model_file = self.args.input_graph - with infer_graph.as_default(): - graph_def = tf.GraphDef() - file_ext = os.path.splitext(model_file)[1] - with open(model_file, "rb") as f: - if file_ext == '.pbtxt': - text_format.Merge(f.read(), graph_def) - else: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name='') - - # Define input and output Tensors for inference graph - output_names = ["ExpandDims"] - for i in range(1, 160): - output_names.append("ExpandDims_" + str(i)) - - input_operation = infer_graph.get_operation_by_name("input") - output_operations = [] - for name in output_names: - output_operations.append(infer_graph.get_operation_by_name(name).outputs[0]) - - infer_sess = tf.Session(graph=infer_graph, config=infer_config) - - if not self.args.accuracy_only: # benchmark - step = 0 - total_steps = self.args.warmup_steps + self.args.steps - - total_images = 0 - total_duration = 0 - - if not self.args.data_location: # inference with dummy data - print("Inference with dummy data") - data_sess = tf.Session(graph=data_graph, config=data_config) - - while step < total_steps: - step += 1 - image_np = data_sess.run(image) - start_time = time.time() - - infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) - duration = time.time() - start_time - - if step > self.args.warmup_steps: - total_duration += duration - total_images += self.args.batch_size - print('Iteration %d: %.6f sec' % (step, duration)) - sys.stdout.flush() - - else: # benchmark with real data - print("Inference with real data") - with data_graph.as_default(): - with tf.train.MonitoredTrainingSession(config=data_config) as data_sess: - while not data_sess.should_stop() and step < total_steps: - step += 1 - start_time = time.time() - image_np, _, _ = data_sess.run([image, filename, shape]) - infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) - duration = time.time() - start_time - - if step > self.args.warmup_steps: - total_duration += duration - total_images += self.args.batch_size - print('Iteration %d: %.6f sec' % (step, duration)) - sys.stdout.flush() - - print('Batch size = %d' % self.args.batch_size) - print('Throughput: %.3f images/sec' % (total_images / total_duration)) - if (self.args.batch_size == 1): - latency = (total_duration / total_images) * 1000 - print('Latency: %.3f ms' % (latency)) - - else: # accuracy only - results = [] - filenames = [] - shapes = [] - total_processed_images = 0 - with data_graph.as_default(): - with tf.train.MonitoredTrainingSession(config=data_config) as data_sess: - while not data_sess.should_stop(): - image_np, filename_np, shape_np = data_sess.run([image, filename, shape]) - total_processed_images += self.args.batch_size - predict = infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) - if (total_processed_images % 30 == 0): - print("Predicting results for {} images...".format(total_processed_images)) - sys.stdout.flush() - results.append(predict) - filenames.append(filename_np[0]) - shapes.append(shape_np[0]) - - log_dir = os.path.join('./', 'logs') - # if it doesn't exist, create. - if not os.path.exists(log_dir): - os.makedirs(log_dir) - for class_ind in range(1, NUM_CLASSES): - with open(os.path.join(log_dir, 'results_{}.txt'.format(class_ind)), 'wt') as f: - for image_ind, pred in enumerate(results): - shape = shapes[image_ind] - filename = filenames[image_ind] - # parsing prediction results and calculate bbox - scores = pred[(class_ind * 2) - 2][0] - bboxes = pred[(class_ind * 2) - 1][0] - bboxes[:, 0] = (bboxes[:, 0] * shape[0]).astype(np.int32, copy=False) + 1 - bboxes[:, 1] = (bboxes[:, 1] * shape[1]).astype(np.int32, copy=False) + 1 - bboxes[:, 2] = (bboxes[:, 2] * shape[0]).astype(np.int32, copy=False) + 1 - bboxes[:, 3] = (bboxes[:, 3] * shape[1]).astype(np.int32, copy=False) + 1 - - valid_mask = np.logical_and((bboxes[:, 2] - bboxes[:, 0] > 0), - (bboxes[:, 3] - bboxes[:, 1] > 0)) - - for det_ind in range(valid_mask.shape[0]): - if not valid_mask[det_ind]: - continue - f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. - format(filename.decode('utf8')[:-4], scores[det_ind], - bboxes[det_ind, 1], bboxes[det_ind, 0], - bboxes[det_ind, 3], bboxes[det_ind, 2])) - - coco_eval = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "validate_ssd_vgg16.py") - cmd_prefix = "python " + coco_eval - cmd_prefix += " --detections_path ./logs" - cmd_prefix += " --annotations_file {}/instances_val2017.json".format(self.args.data_location) - cmd = cmd_prefix - os.system(cmd) - - -if __name__ == "__main__": - obj = EvaluateSSDModel() - obj.eval() diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py b/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py deleted file mode 100755 index 6847658d4..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py +++ /dev/null @@ -1,212 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -import argparse -import os -import json -import numpy as np -from tqdm import tqdm -import tensorflow as tf -from convert_tfrecords import ImageCoder, _process_image, _int64_feature, _float_feature, _bytes_feature, _bytes_list_feature - - -def load_annotation_data(annotations_filename): - - # Load annotation data - with open(annotations_filename, 'r') as annotations_file: - data = json.load(annotations_file) - - # Create map of category IDs to category names - category_map = {} - for category_datum in data['categories']: - category_map[category_datum['id']] = category_datum['name'] - - # Create map of file IDs to annotation data - annotation_map = {} - for annotation_datum in data['annotations']: - image_id = annotation_datum['image_id'] - if (image_id not in annotation_map): - annotation_map[image_id] = [] - - # Add annotation datum for current image ID - annotation_map[image_id].append(annotation_datum) - - # Create map of file IDs to image data - image_map = {} - for image_datum in data['images']: - image_id = image_datum['id'] - if (image_id in annotation_map): - image_map[image_id] = image_datum - - return image_map, annotation_map, category_map - - -def get_annotation_data(image_data, annotation_data, category_map): - - LABEL_MAP = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, - 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, - 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, - 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, - 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, - 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, - 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, - 81: 72, 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80} - - # Retrieve image width and height - image_width = image_data['width'] - image_height = image_data['height'] - - bboxes = [] - labels = [] - label_names = [] - difficult = [] - truncated = [] - for annotation_datum in annotation_data: - # Scale bounding box coordinates - # COCO bounding boxes are [x, y, width, height] but https://github.com/HiKapok/SSD.TensorFlow.git expects [ymin, xmin, ymax, xmax] - bbox = annotation_datum['bbox'] - ymin = bbox[1] / image_height - xmin = bbox[0] / image_width - ymax = (bbox[1] + bbox[3]) / image_height - xmax = (bbox[0] + bbox[2]) / image_width - bboxes.append([ymin, xmin, ymax, xmax]) - - labels.append(LABEL_MAP[annotation_datum['category_id']]) - label_names.append(category_map[annotation_datum['category_id']].encode('ascii')) - - # Append difficult and truncated flags - difficult.append(0) - truncated.append(0) - - return bboxes, labels, label_names, difficult, truncated - - -def get_record(filename, buffer, width, height, bboxes, labels, label_names, difficult, truncated): - - CHANNEL_COUNT = 3 - IMAGE_FORMAT = 'JPEG' - - # Extract bounding box coordinates - ymin = [] - xmin = [] - ymax = [] - xmax = [] - for bbox in bboxes: - ymin.append(bbox[0]) - xmin.append(bbox[1]) - ymax.append(bbox[2]) - xmax.append(bbox[3]) - - # Create record features - features = { - 'image/width': _int64_feature(width), - 'image/height': _int64_feature(height), - 'image/channels': _int64_feature(CHANNEL_COUNT), - 'image/shape': _int64_feature([height, width, CHANNEL_COUNT]), - 'image/object/bbox/xmin': _float_feature(xmin), - 'image/object/bbox/xmax': _float_feature(xmax), - 'image/object/bbox/ymin': _float_feature(ymin), - 'image/object/bbox/ymax': _float_feature(ymax), - 'image/object/bbox/label': _int64_feature(labels), - 'image/object/bbox/label_text': _bytes_list_feature(label_names), - 'image/object/bbox/difficult': _int64_feature(difficult), - 'image/object/bbox/truncated': _int64_feature(truncated), - 'image/format': _bytes_feature(IMAGE_FORMAT), - 'image/filename': _bytes_feature(filename.encode('utf8')), - 'image/encoded': _bytes_feature(buffer)} - - return tf.train.Example(features=tf.train.Features(feature=features)) - - -def check_for_link(value): - """ - Throws an error if the specified path is a link. os.islink returns - True for sym links. For files, we also look at the number of links in - os.stat() to determine if it's a hard link. - """ - if os.path.islink(value) or \ - (os.path.isfile(value) and os.stat(value).st_nlink > 1): - raise argparse.ArgumentTypeError("{} cannot be a link.".format(value)) - - -def check_valid_file_or_folder(value): - """verifies filename exists and isn't a link""" - if value is not None: - if not os.path.isfile(value) and not os.path.isdir(value): - raise argparse.ArgumentTypeError("{} does not exist or is not a file/folder.". - format(value)) - check_for_link(value) - return value - - -def main(): - - RECORDS_PER_FILE = 1024 - RECORD_FILENAME_FORMAT = '%s-%.5d-of-%.5d' - - parser = argparse.ArgumentParser() - parser.add_argument('--image_path', type=check_valid_file_or_folder, - required=True, help='path to the input validation image files') - parser.add_argument('--annotations_file', type=check_valid_file_or_folder, - required=True, help='name of the input validation annotations file') - parser.add_argument('--output_prefix', type=str, required=True, help='prefix of the output TensorFlow record files') - parser.add_argument('--output_path', type=check_valid_file_or_folder, required=True, - help='path to the output TensorFlow record files') - - args = parser.parse_args() - - # Load annotation data - image_map, annotation_map, category_map = load_annotation_data(args.annotations_file) - - # Create output path if necessary - if (not os.path.exists(args.output_path)): - os.makedirs(args.output_path) - - # Create image coder - image_coder = ImageCoder() - - record_file_index = 0 - record_file_count = np.ceil(len(image_map) / RECORDS_PER_FILE).astype(int) - for index, image_id in tqdm(enumerate(image_map), desc='Generating', total=len(image_map), unit=' file'): - # Create record writer - if (index % RECORDS_PER_FILE == 0): - output_filename = os.path.join(args.output_path, RECORD_FILENAME_FORMAT % - (args.output_prefix, record_file_index, record_file_count)) - writer = tf.python_io.TFRecordWriter(output_filename) - record_file_index += 1 - - # Extract image data from current image file - image_filename = image_map[image_id]['file_name'] - image_buffer, _, _ = _process_image(os.path.join(args.image_path, image_filename), image_coder) - - # Retrieve annotation data associated with current image file - bboxes, labels, label_names, difficult, truncated = get_annotation_data( - image_map[image_id], annotation_map[image_id], category_map) - - # Write TF record for current image file - image_width, image_height = image_map[image_id]['width'], image_map[image_id]['height'] - record = get_record(image_filename, image_buffer, image_width, image_height, - bboxes, labels, label_names, difficult, truncated) - writer.write(record.SerializeToString()) - - -if __name__ == '__main__': - - main() diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py b/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py deleted file mode 100644 index 74a0dbcdb..000000000 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py +++ /dev/null @@ -1,113 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -import argparse -import os -import json -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - - -def convert_detection(label, detection): - - ID_INDEX = 0 - SCORE_INDEX = 1 - XMIN_INDEX = 2 - YMIN_INDEX = 3 - XMAX_INDEX = 4 - YMAX_INDEX = 5 - LABEL_MAP = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, - 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, - 22: 23, 23: 24, 24: 25, 25: 27, 26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, - 32: 36, 33: 37, 34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, - 42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55, 51: 56, - 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63, 59: 64, 60: 65, 61: 67, - 62: 70, 63: 72, 64: 73, 65: 74, 66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, - 72: 81, 73: 82, 74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90} - - # Extract image ID and bounding box score from detection - image_id = int(detection[ID_INDEX]) - score = float(detection[SCORE_INDEX]) - - # Convert bounding box coordinates [xmin, ymin, xmax, ymax] to [x, y, width, height] - x = float(detection[XMIN_INDEX]) - y = float(detection[YMIN_INDEX]) - width = float(detection[XMAX_INDEX]) - x - height = float(detection[YMAX_INDEX]) - y - bbox = [x, y, width, height] - - return {'category_id': LABEL_MAP[label], 'image_id': image_id, 'score': score, 'bbox': bbox} - - -def generate_results_file(detections_path, results_filename): - - DETECTIONS_EXTENSION = '.txt' - - # Retrieve detections filenames - filenames = [filename for filename in os.listdir(detections_path) if filename.endswith(DETECTIONS_EXTENSION)] - - results = [] - for filename in filenames: - # Read detections from current file - with open(os.path.join(detections_path, filename), 'r') as detections_file: - lines = detections_file.readlines() - - # Convert detections from current file - label = int(os.path.splitext(filename)[0].split('_')[1]) - for line in lines: - results.append(convert_detection(label, line.strip().split())) - - # Write results to file - with open(os.path.join(detections_path, results_filename), 'w') as results_file: - json.dump(results, results_file) - - -def main(): - - RESULTS_FILENAME = 'results.json' - ANNOTATION_TYPE = 'bbox' - - parser = argparse.ArgumentParser() - parser.add_argument('--detections_path', type=str, required=True, - help='path to the input detected bounding box files') - parser.add_argument('--annotations_file', type=str, required=True, - help='name of the input validation annotations file') - - args = parser.parse_args() - - # Generate COCO results file - print('Generating COCO results...') - generate_results_file(args.detections_path, RESULTS_FILENAME) - - # Create COCO instance - cocoGt = COCO(args.annotations_file) - - # Load COCO results - cocoDt = cocoGt.loadRes(os.path.join(args.detections_path, RESULTS_FILENAME)) - - # Evaluate results - cocoEval = COCOeval(cocoGt, cocoDt, ANNOTATION_TYPE) - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - -if __name__ == '__main__': - - main() diff --git a/models/recommendation/__init__.py b/models/recommendation/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/recommendation/__init__.py +++ b/models/recommendation/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/recommendation/tensorflow/__init__.py b/models/recommendation/tensorflow/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/recommendation/tensorflow/__init__.py +++ b/models/recommendation/tensorflow/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/recommendation/tensorflow/ncf/__init__.py b/models/recommendation/tensorflow/ncf/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/recommendation/tensorflow/ncf/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/recommendation/tensorflow/ncf/inference/__init__.py b/models/recommendation/tensorflow/ncf/inference/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/recommendation/tensorflow/ncf/inference/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/recommendation/tensorflow/ncf/inference/fp32/__init__.py b/models/recommendation/tensorflow/ncf/inference/fp32/__init__.py deleted file mode 100644 index c4fdb7d61..000000000 --- a/models/recommendation/tensorflow/ncf/inference/fp32/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# diff --git a/models/recommendation/tensorflow/ncf/inference/fp32/ncf_main.py b/models/recommendation/tensorflow/ncf/inference/fp32/ncf_main.py deleted file mode 100644 index 338a2b00f..000000000 --- a/models/recommendation/tensorflow/ncf/inference/fp32/ncf_main.py +++ /dev/null @@ -1,594 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# - -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""NCF framework to train and evaluate the NeuMF model. -The NeuMF model assembles both MF and MLP models under the NCF framework. Check -`neumf_model.py` for more details about the models. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import gc -import heapq -import math -import multiprocessing -import os -import signal -import typing -import time -from datetime import datetime - -# pylint: disable=g-bad-import-order -import numpy as np -from absl import app as absl_app -from absl import flags -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.datasets import movielens -from official.recommendation import constants as rconst -from official.recommendation import data_preprocessing -from official.recommendation import neumf_model -from official.utils.flags import core as flags_core -from official.utils.logs import hooks_helper -from official.utils.logs import logger -from official.utils.misc import distribution_utils -from official.utils.misc import model_helpers - -_TOP_K = 10 # Top-k list for evaluation -# keys for evaluation metrics -_HR_KEY = "HR" -_NDCG_KEY = "NDCG" - - -class LoggerHook(tf.train.SessionRunHook): - """ Logs runtime. """ - - def begin(self): - self._step = -1 - self._displayed_steps = 0 - self._total_recommendations_per_sec = 0 - self._total_duration = 0 - - def before_run(self, run_context): - self._step += 1 - self._start_time = time.time() - - def after_run(self, run_context, run_values): - duration = time.time() - self._start_time - # Display benchmarking metrics - display_every = 100 if FLAGS.dataset == 'ml-1m' else 50000 - if self._step != 0 and self._step % display_every == 0: - recommendations_per_sec = FLAGS.batch_size / duration - self._displayed_steps += 1 - self._total_recommendations_per_sec += recommendations_per_sec - self._total_duration += duration - format_str = ('%s: step %d, %.1f recommendations/sec, %.5f msec/batch') - print(format_str % (datetime.now(), self._step, recommendations_per_sec, duration * 1000)) - - def end(self, run_context): - print('Average recommendations/sec across %d steps: %.1f (%.5f msec/batch)' % - (self._step, self._total_recommendations_per_sec / self._displayed_steps, - (self._total_duration * 1000) / self._displayed_steps)) - - -def get_hit_rate_and_ndcg(predicted_scores_by_user, items_by_user, top_k=_TOP_K, - match_mlperf=False): - """Returns the hit rate and the normalized DCG for evaluation. - `predicted_scores_by_user` and `items_by_user` are parallel NumPy arrays with - shape (num_users, num_items) such that `predicted_scores_by_user[i, j]` is the - predicted score that user `i` would rate item `items_by_user[i][j]`. - `items_by_user[i, 0]` is the item that user `i` interacted with, while - `items_by_user[i, 1:] are items that user `i` did not interact with. The goal - of the NCF model to give a high score for `predicted_scores_by_user[i, 0]` - compared to `predicted_scores_by_user[i, 1:]`, and the returned HR and NDCG - will be higher the more successful the model is at this goal. - If `match_mlperf` is True, then the HR and NDCG computations are done in a - slightly unusual way to match the MLPerf reference implementation. - Specifically, if `items_by_user[i, :]` contains duplicate items, it will be - treated as if the item only appeared once. Effectively, for duplicate items in - a row, the predicted score for all but one of the items will be set to - -infinity - For example, suppose we have that following inputs: - predicted_scores_by_user: [[ 2, 3, 3], - [ 5, 4, 4]] - items_by_user: [[10, 20, 20], - [30, 40, 40]] - top_k: 2 - Then with match_mlperf=True, the HR would be 2/2 = 1.0. With - match_mlperf=False, the HR would be 1/2 = 0.5. This is because each user has - predicted scores for only 2 unique items: 10 and 20 for the first user, and 30 - and 40 for the second. Therefore, with match_mlperf=True, it's guarenteed the - first item's score is in the top 2. With match_mlperf=False, this function - would compute the first user's first item is not in the top 2, because item 20 - has a higher score, and item 20 occurs twice. - Args: - predicted_scores_by_user: 2D Numpy array of the predicted scores. - `predicted_scores_by_user[i, j]` is the predicted score that user `i` - would rate item `items_by_user[i][j]`. - items_by_user: 2d numpy array of the item IDs. For user `i`, - `items_by_user[i][0]` is the itme that user `i` interacted with, while - `predicted_scores_by_user[i, 1:] are items that user `i` did not interact - with. - top_k: Only consider the highest rated `top_k` items per user. The HR and - NDCG for that user will only be nonzero if the predicted score for that - user's first item is in the `top_k` top scores. - match_mlperf: If True, compute HR and NDCG slightly differently to match the - MLPerf reference implementation. - Returns: - (hr, ndcg) tuple of floats, averaged across all users. - """ - num_users = predicted_scores_by_user.shape[0] - zero_indices = np.zeros((num_users, 1), dtype=np.int32) - - if match_mlperf: - predicted_scores_by_user = predicted_scores_by_user.copy() - items_by_user = items_by_user.copy() - - # For each user, sort the items and predictions by increasing item number. - # We use mergesort since it's the only stable sort, which we need to be - # equivalent to the MLPerf reference implementation. - sorted_items_indices = items_by_user.argsort(kind="mergesort") - sorted_items = items_by_user[ - np.arange(num_users)[:, np.newaxis], sorted_items_indices] - sorted_predictions = predicted_scores_by_user[ - np.arange(num_users)[:, np.newaxis], sorted_items_indices] - - # For items that occur more than once in a user's row, set the predicted - # score of the subsequent occurrences to -infinity, which effectively - # removes them from the array. - diffs = sorted_items[:, :-1] - sorted_items[:, 1:] - diffs = np.concatenate( - [np.ones((diffs.shape[0], 1), dtype=diffs.dtype), diffs], axis=1) - predicted_scores_by_user = np.where(diffs, sorted_predictions, -np.inf) - - # After this block, `zero_indices` will be a (num_users, 1) shaped array - # indicating, for each user, the index of item of value 0 in - # `sorted_items_indices`. This item is the one we want to check if it is in - # the top_k items. - zero_indices = np.array(np.where(sorted_items_indices == 0)) - assert np.array_equal(zero_indices[0, :], np.arange(num_users)) - zero_indices = zero_indices[1, :, np.newaxis] - - # NumPy has an np.argparition() method, however log(1000) is so small that - # sorting the whole array is simpler and fast enough. - top_indicies = np.argsort(predicted_scores_by_user, axis=1)[:, -top_k:] - top_indicies = np.flip(top_indicies, axis=1) - - # Both HR and NDCG vectorized computation takes advantage of the fact that if - # the positive example for a user is not in the top k, that index does not - # appear. That is to say: hit_ind.shape[0] <= num_users - hit_ind = np.argwhere(np.equal(top_indicies, zero_indices)) - hr = hit_ind.shape[0] / num_users - ndcg = np.sum(np.log(2) / np.log(hit_ind[:, 1] + 2)) / num_users - return hr, ndcg - - -def evaluate_model(estimator, ncf_dataset, pred_input_fn): - # type: (tf.estimator.Estimator, prepare.NCFDataset, typing.Callable) -> dict - """Model evaluation with HR and NDCG metrics. - The evaluation protocol is to rank the test interacted item (truth items) - among the randomly chosen 999 items that are not interacted by the user. - The performance of the ranked list is judged by Hit Ratio (HR) and Normalized - Discounted Cumulative Gain (NDCG). - For evaluation, the ranked list is truncated at 10 for both metrics. As such, - the HR intuitively measures whether the test item is present on the top-10 - list, and the NDCG accounts for the position of the hit by assigning higher - scores to hits at top ranks. Both metrics are calculated for each test user, - and the average scores are reported. - Args: - estimator: The Estimator. - ncf_dataset: An NCFDataSet object, which contains the information about - test/eval dataset, such as: - num_users: How many unique users are in the eval set. - test_data: The points which are used for consistent evaluation. These - are already included in the pred_input_fn. - pred_input_fn: The input function for the test data. - Returns: - eval_results: A dict of evaluation results for benchmark logging. - eval_results = { - _HR_KEY: hr, - _NDCG_KEY: ndcg, - tf.GraphKeys.GLOBAL_STEP: global_step - } - where hr is an integer indicating the average HR scores across all users, - ndcg is an integer representing the average NDCG scores across all users, - and global_step is the global step - """ - - tf.logging.info("Computing predictions for eval set...") - - # OpenMP settings - # os.environ["OMP_NUM_THREADS"] = "11" - if not os.environ.get("KMP_BLOCKTIME"): - os.environ["KMP_BLOCKTIME"] = "1" - if not os.environ.get("KMP_SETTINGS"): - os.environ["KMP_SETTINGS"] = "1" - if not os.environ.get("KMP_AFFINITY"): - os.environ["KMP_AFFINITY"] = "granularity=fine,noverbose,compact,1,0" - - # Get predictions - prediction_hooks = None if FLAGS.accuracy_only else [LoggerHook()] - predictions = estimator.predict(input_fn=pred_input_fn, - yield_single_examples=False, - hooks=prediction_hooks) - predictions = list(predictions) - - prediction_batches = [p[movielens.RATING_COLUMN] for p in predictions] - item_batches = [p[movielens.ITEM_COLUMN] for p in predictions] - - # Reshape the predicted scores and items. Each user takes one row. - prediction_with_padding = np.concatenate(prediction_batches, axis=0) - predicted_scores_by_user = prediction_with_padding[ - :ncf_dataset.num_users * (1 + rconst.NUM_EVAL_NEGATIVES)]\ - .reshape(ncf_dataset.num_users, -1) - item_with_padding = np.concatenate(item_batches, axis=0) - items_by_user = item_with_padding[ - :ncf_dataset.num_users * (1 + rconst.NUM_EVAL_NEGATIVES)]\ - .reshape(ncf_dataset.num_users, -1) - - tf.logging.info("Computing metrics...") - - hr, ndcg = get_hit_rate_and_ndcg(predicted_scores_by_user, items_by_user, - match_mlperf=FLAGS.ml_perf) - - global_step = estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP) - eval_results = { - _HR_KEY: hr, - _NDCG_KEY: ndcg, - tf.GraphKeys.GLOBAL_STEP: global_step - } - - return eval_results - - -def construct_estimator(num_gpus, model_dir, params, batch_size, - eval_batch_size): - """Construct either an Estimator or TPUEstimator for NCF. - Args: - num_gpus: The number of gpus (Used to select distribution strategy) - model_dir: The model directory for the estimator - params: The params dict for the estimator - batch_size: The mini-batch size for training. - eval_batch_size: The batch size used during evaluation. - Returns: - An Estimator or TPUEstimator. - """ - - if params["use_tpu"]: - tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( - tpu=params["tpu"], - zone=params["tpu_zone"], - project=params["tpu_gcp_project"], - ) - - tpu_config = tf.contrib.tpu.TPUConfig( - iterations_per_loop=100, - num_shards=8) - - run_config = tf.contrib.tpu.RunConfig( - cluster=tpu_cluster_resolver, - model_dir=model_dir, - session_config=tf.ConfigProto( - allow_soft_placement=True, log_device_placement=False), - tpu_config=tpu_config) - - tpu_params = {k: v for k, v in params.items() if k != "batch_size"} - - train_estimator = tf.contrib.tpu.TPUEstimator( - model_fn=neumf_model.neumf_model_fn, - use_tpu=True, - train_batch_size=batch_size, - params=tpu_params, - config=run_config) - - eval_estimator = tf.contrib.tpu.TPUEstimator( - model_fn=neumf_model.neumf_model_fn, - use_tpu=False, - train_batch_size=1, - predict_batch_size=eval_batch_size, - params=tpu_params, - config=run_config) - - return train_estimator, eval_estimator - - distribution = distribution_utils.get_distribution_strategy(num_gpus=num_gpus) - cpu_session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, - inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, - intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) - - run_config = tf.estimator.RunConfig(train_distribute=distribution, - session_config=cpu_session_config) - params["eval_batch_size"] = eval_batch_size - estimator = tf.estimator.Estimator(model_fn=neumf_model.neumf_model_fn, - model_dir=model_dir, config=run_config, - params=params) - return estimator, estimator - - -def main(_): - with logger.benchmark_context(FLAGS): - run_ncf(FLAGS) - - -def serving_input_receiver_fn(): - inputs = { - 'user_id': tf.placeholder(dtype=tf.int32, shape=[None], name='user_id'), - 'item_id': tf.placeholder(dtype=tf.uint16, shape=[None], name='item_id'), - } - return tf.estimator.export.ServingInputReceiver(inputs, inputs) - - -def run_ncf(_): - """Run NCF training and eval loop.""" - if FLAGS.download_if_missing: - movielens.download(FLAGS.dataset, FLAGS.data_dir) - - num_gpus = flags_core.get_num_gpus(FLAGS) - batch_size = distribution_utils.per_device_batch_size( - int(FLAGS.batch_size), num_gpus) - eval_batch_size = int(FLAGS.eval_batch_size or FLAGS.batch_size) - ncf_dataset = data_preprocessing.instantiate_pipeline( - dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, - batch_size=batch_size, - eval_batch_size=eval_batch_size, - num_neg=FLAGS.num_neg, - epochs_per_cycle=FLAGS.epochs_between_evals, - match_mlperf=FLAGS.ml_perf) - - model_helpers.apply_clean(flags.FLAGS) - - train_estimator, eval_estimator = construct_estimator( - num_gpus=num_gpus, model_dir=FLAGS.model_dir, params={ - "batch_size": batch_size, - "learning_rate": FLAGS.learning_rate, - "num_users": ncf_dataset.num_users, - "num_items": ncf_dataset.num_items, - "mf_dim": FLAGS.num_factors, - "model_layers": [int(layer) for layer in FLAGS.layers], - "mf_regularization": FLAGS.mf_regularization, - "mlp_reg_layers": [float(reg) for reg in FLAGS.mlp_regularization], - "use_tpu": FLAGS.tpu is not None, - "tpu": FLAGS.tpu, - "tpu_zone": FLAGS.tpu_zone, - "tpu_gcp_project": FLAGS.tpu_gcp_project, - }, batch_size=flags.FLAGS.batch_size, eval_batch_size=eval_batch_size) - - # Create hooks that log information about the training and metric values - train_hooks = hooks_helper.get_train_hooks( - FLAGS.hooks, - model_dir=FLAGS.model_dir, - batch_size=FLAGS.batch_size # for ExamplesPerSecondHook - ) - run_params = { - "batch_size": FLAGS.batch_size, - "eval_batch_size": eval_batch_size, - "number_factors": FLAGS.num_factors, - "hr_threshold": FLAGS.hr_threshold, - "train_epochs": FLAGS.train_epochs, - } - benchmark_logger = logger.get_benchmark_logger() - benchmark_logger.log_run_info( - model_name="recommendation", - dataset_name=FLAGS.dataset, - run_params=run_params, - test_id=FLAGS.benchmark_test_id) - - approx_train_steps = int(ncf_dataset.num_train_positives - * (1 + FLAGS.num_neg) // FLAGS.batch_size) - pred_input_fn = data_preprocessing.make_pred_input_fn(ncf_dataset=ncf_dataset) - - total_training_cycle = 1 if FLAGS.inference_only else FLAGS.train_epochs // FLAGS.epochs_between_evals - for cycle_index in range(total_training_cycle): - tf.logging.info("Starting a training cycle: {}/{}".format( - cycle_index + 1, total_training_cycle)) - - if not FLAGS.inference_only: - # Train the model - train_input_fn, train_record_dir, batch_count = \ - data_preprocessing.make_train_input_fn(ncf_dataset=ncf_dataset) - - if np.abs(approx_train_steps - batch_count) > 1: - tf.logging.warning( - "Estimated ({}) and reported ({}) number of batches differ by more " - "than one".format(approx_train_steps, batch_count)) - train_estimator.train(input_fn=train_input_fn, hooks=train_hooks, - steps=batch_count) - tf.gfile.DeleteRecursively(train_record_dir) - - # Evaluate the model - eval_results = evaluate_model( - eval_estimator, ncf_dataset, pred_input_fn) - - # Benchmark the evaluation results - benchmark_logger.log_evaluation_result(eval_results) - # Log the HR and NDCG results. - hr = eval_results[_HR_KEY] - ndcg = eval_results[_NDCG_KEY] - tf.logging.fatal( - "Iteration {}: HR = {:.4f}, NDCG = {:.4f}".format( - cycle_index + 1, hr, ndcg)) - - # Export SavedModel - if FLAGS.export_savedmodel: - eval_estimator.export_savedmodel(FLAGS.model_dir, serving_input_receiver_fn) - print("SavedModel successfully exported to: {}/".format( - FLAGS.model_dir)) - - # Some of the NumPy vector math can be quite large and likes to stay in - # memory for a while. - gc.collect() - - # If some evaluation threshold is met - if model_helpers.past_stop_threshold(FLAGS.hr_threshold, hr): - break - - # Clear the session explicitly to avoid session delete error - tf.keras.backend.clear_session() - - -def define_ncf_flags(): - """Add flags for running ncf_main.""" - # Add common flags - flags_core.define_base(export_dir=False) - flags_core.define_performance( - num_parallel_calls=False, - inter_op=True, - intra_op=True, - synthetic_data=False, - max_train_steps=False, - dtype=False, - all_reduce_alg=False - ) - flags_core.define_device(tpu=True) - flags_core.define_benchmark() - - flags.adopt_module_key_flags(flags_core) - - flags_core.set_defaults( - model_dir="/tmp/ncf/", - data_dir="/tmp/movielens-data/", - train_epochs=2, - batch_size=256, - hooks=None, - tpu=None - ) - - # Add ncf-specific flags - flags.DEFINE_enum( - name="dataset", default="ml-1m", - enum_values=["ml-1m", "ml-20m"], case_sensitive=False, - help=flags_core.help_wrap( - "Dataset to be trained and evaluated.")) - - flags.DEFINE_boolean( - name="download_if_missing", default=True, help=flags_core.help_wrap( - "Download data to data_dir if it is not already present.")) - - flags.DEFINE_string( - name="eval_batch_size", default=None, help=flags_core.help_wrap( - "The batch size used for evaluation. This should generally be larger" - "than the training batch size as the lack of back propagation during" - "evaluation can allow for larger batch sizes to fit in memory. If not" - "specified, the training batch size (--batch_size) will be used.")) - - flags.DEFINE_integer( - name="num_factors", default=8, - help=flags_core.help_wrap("The Embedding size of MF model.")) - - # Set the default as a list of strings to be consistent with input arguments - flags.DEFINE_list( - name="layers", default=["64", "32", "16", "8"], - help=flags_core.help_wrap( - "The sizes of hidden layers for MLP. Example " - "to specify different sizes of MLP layers: --layers=32,16,8,4")) - - flags.DEFINE_float( - name="mf_regularization", default=0., - help=flags_core.help_wrap( - "The regularization factor for MF embeddings. The factor is used by " - "regularizer which allows to apply penalties on layer parameters or " - "layer activity during optimization.")) - - flags.DEFINE_list( - name="mlp_regularization", default=["0.", "0.", "0.", "0."], - help=flags_core.help_wrap( - "The regularization factor for each MLP layer. See mf_regularization " - "help for more info about regularization factor.")) - - flags.DEFINE_integer( - name="num_neg", default=4, - help=flags_core.help_wrap( - "The Number of negative instances to pair with a positive instance.")) - - flags.DEFINE_float( - name="learning_rate", default=0.001, - help=flags_core.help_wrap("The learning rate.")) - - flags.DEFINE_float( - name="hr_threshold", default=None, - help=flags_core.help_wrap( - "If passed, training will stop when the evaluation metric HR is " - "greater than or equal to hr_threshold. For dataset ml-1m, the " - "desired hr_threshold is 0.68 which is the result from the paper; " - "For dataset ml-20m, the threshold can be set as 0.95 which is " - "achieved by MLPerf implementation.")) - - flags.DEFINE_bool( - name="ml_perf", default=None, - help=flags_core.help_wrap( - "If set, changes the behavior of the model slightly to match the " - "MLPerf reference implementations here: \n" - "https://github.com/mlperf/reference/tree/master/recommendation/" - "pytorch\n" - "The two changes are:\n" - "1. When computing the HR and NDCG during evaluation, remove " - "duplicate user-item pairs before the computation. This results in " - "better HRs and NDCGs.\n" - "2. Use a different soring algorithm when sorting the input data, " - "which performs better due to the fact the sorting algorithms are " - "not stable.")) - - flags.DEFINE_bool( - name="inference_only", default=False, - help=flags_core.help_wrap( - "If set, runs only the forward pass.")) - - flags.DEFINE_bool( - name="accuracy_only", default=False, - help=flags_core.help_wrap( - "If set, only accuracy (i.e. no performance benchmarking) " - "metrics are computed.")) - - flags.DEFINE_bool( - name="benchmark_only", default=True, - help=flags_core.help_wrap( - "If set, only performance benchmarking (i.e. no accuracy) " - "metrics are computed.")) - - flags.DEFINE_bool( - name="export_savedmodel", default=False, - help=flags_core.help_wrap( - "If set, the model is exported in serving-compatible format to " - "the model_dir.")) - - -if __name__ == "__main__": - tf.logging.set_verbosity(tf.logging.FATAL) - define_ncf_flags() - FLAGS = flags.FLAGS - absl_app.run(main) diff --git a/models/recommendation/tensorflow/wide_deep/inference/fp32/wide_deep_inference.py b/models/recommendation/tensorflow/wide_deep/inference/fp32/wide_deep_inference.py index 3c2ae8750..85f1c3bd2 100644 --- a/models/recommendation/tensorflow/wide_deep/inference/fp32/wide_deep_inference.py +++ b/models/recommendation/tensorflow/wide_deep/inference/fp32/wide_deep_inference.py @@ -142,37 +142,38 @@ def build_estimator(model_dir, model_type): # Create a tf.estimator.RunConfig to ensure the model is run on CPU, which # trains faster than GPU for this model. run_config = tf.estimator.RunConfig().replace( - session_config=tf.ConfigProto(device_count={'GPU': 0})) + session_config=tf.compat.v1.ConfigProto(device_count={'GPU': 0})) if model_type == 'wide': return tf.estimator.LinearClassifier( model_dir=model_dir, feature_columns=wide_columns, - config=run_config) + config=run_config, loss_reduction=tf.compat.v1.losses.Reduction.SUM) elif model_type == 'deep': return tf.estimator.DNNClassifier( model_dir=model_dir, feature_columns=deep_columns, hidden_units=hidden_units, - config=run_config) + config=run_config, loss_reduction=tf.compat.v1.losses.Reduction.SUM) else: return tf.estimator.DNNLinearCombinedClassifier( model_dir=model_dir, linear_feature_columns=wide_columns, dnn_feature_columns=deep_columns, dnn_hidden_units=hidden_units, - config=run_config) + config=run_config, loss_reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE) + #config=run_config, loss_reduction=tf.compat.v1.losses.Reduction.SUM) def input_fn(data_file, num_epochs, shuffle, batch_size): """Generate an input function for the Estimator.""" - assert tf.gfile.Exists(data_file), ( + assert tf.io.gfile.exists(data_file), ( '%s not found. Please make sure you have run data_download.py and ' 'set the --data_dir argument to the correct path.' % data_file) def parse_csv(value): print('Parsing', data_file) - columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS) + columns = tf.io.decode_csv(records=value, record_defaults=_CSV_COLUMN_DEFAULTS) features = dict(zip(_CSV_COLUMNS, columns)) labels = features.pop('income_bracket') return features, tf.equal(labels, '>50K') @@ -230,9 +231,9 @@ def eval_input_fn(): print('%s: %s' % (key, results[key])) main_end = time.time() E2Eduration = main_end - main_start - print('End-to-End duration is %s', E2Eduration) + print ('End-to-End duration is %s', E2Eduration) evaluate_duration = main_end - inference_start - print('Evaluation duration is %s', evaluate_duration) + print ('Evaluation duration is %s', evaluate_duration) if flags.batch_size == 1: print('Latency is: %s', E2Eduration / num_records) @@ -261,5 +262,5 @@ def __init__(self): if __name__ == '__main__': main_start = time.time() - tf.logging.set_verbosity(tf.logging.INFO) + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) main(argv=sys.argv) diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/__init__.py b/models/recommendation/tensorflow/wide_deep_large_ds/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/recommendation/tensorflow/wide_deep_large_ds/__init__.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/dataset/featurecolumn_graph_optimization.py b/models/recommendation/tensorflow/wide_deep_large_ds/dataset/featurecolumn_graph_optimization.py index 4a6ab9f86..9233cb646 100644 --- a/models/recommendation/tensorflow/wide_deep_large_ds/dataset/featurecolumn_graph_optimization.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/dataset/featurecolumn_graph_optimization.py @@ -19,7 +19,7 @@ # '''This script optimizes feature columns in the model by removing error handling and redundant nodes. Flag wide_and_deep_large_ds should be enabled for the additional -optimization for wide_and_deep_large_ds_model which involves fusion of categorical +optimization for wide_and_deep_large_ds_model which involves fusion of categorical and numeric columns''' from __future__ import division @@ -49,7 +49,7 @@ dest='enable_column_fusion', default=False) args = parser.parse_args() output_nodes = args.output_nodes.split(",") -output_nodes = ["import/" + str(i) for i in output_nodes] +output_nodes = ["import/"+str(i) for i in output_nodes] graph = ops.Graph() graph_def = graph_pb2.GraphDef() old_graph_def = graph_pb2.GraphDef() @@ -65,8 +65,7 @@ tf.import_graph_def(graph_def) old_graph_def = graph.as_graph_def() name_node_dict = dict() - # This method optimizes tf.embedding_column and tf.categorical_column_with_hash_bucket - + #This method optimizes tf.embedding_column and tf.categorical_column_with_hash_bucket def optimize_categorical_embedding_with_hash_bucket(nodename, gatherfound): if ':' in nodename: nodename = nodename.split(':')[0] @@ -79,9 +78,9 @@ def optimize_categorical_embedding_with_hash_bucket(nodename, gatherfound): if res: node.input[1] = res if "embedding" in node.input[0]: - embedding_column_weights_list.append(node.input[0] + ":0") + embedding_column_weights_list.append(node.input[0]+":0") else: - categorical_column_weights_list.append(node.input[0] + ":0") + categorical_column_weights_list.append(node.input[0]+":0") return node.name for inputname in node.input: res = optimize_categorical_embedding_with_hash_bucket(inputname, gatherfound) @@ -89,7 +88,7 @@ def optimize_categorical_embedding_with_hash_bucket(nodename, gatherfound): return res return None - # This method optimizes tf.feature_column.bucketized_column + #This method optimizes tf.feature_column.bucketized_column def optimize_bucketized_column(nodename, gatherfound): if ':' in nodename: nodename = nodename.split(':')[0] @@ -108,7 +107,7 @@ def optimize_bucketized_column(nodename, gatherfound): return res return None - # This method optimizes tf.feature_column.crossed_column + #This method optimizes tf.feature_column.crossed_column def optimize_crossed_column(nodename, gatherfound): if ':' in nodename: nodename = nodename.split(':')[0] @@ -122,18 +121,18 @@ def optimize_crossed_column(nodename, gatherfound): "GatherV2" in node.input[0]: return node.name elif gatherfound[0] == 2 and node.op == "GatherV2" and "Unique" in node.input[1] and \ - "Identity" not in node.input[0]: + "Identity" not in node.input[0]: res = optimize_crossed_column(node.input[1], gatherfound) if res: - node.input[1] = res + ":1" + node.input[1] = res+":1" return node.name - if gatherfound[0] != 2 and node.op == "GatherV2" and "Unique" in node.input[1]: + if gatherfound[0] != 2 and node.op == "GatherV2" and "Unique" in node.input[1]: gatherfound[0] = 1 res = optimize_crossed_column(node.input[1], gatherfound) if res: node.input[1] = res return node.name - elif gatherfound[0] == 2 and node.op == "Mul" and "GatherV2" in node.input[0]: + elif gatherfound[0] == 2 and node.op == "Mul" and "GatherV2" in node.input[0]: res = optimize_crossed_column(node.input[0], gatherfound) if res: node.input[0] = res @@ -187,7 +186,7 @@ def optimize_categorical_with_voc_list(nodename, gatherfound): return res return None - # This method optimizes tf.feature_column.numeric_column + #This method optimizes tf.feature_column.numeric_column def optimize_numeric(nodename): if ':' in nodename: nodename = nodename.split(':')[0] @@ -198,21 +197,21 @@ def optimize_numeric(nodename): return node.input[0] '''This method does model specific optimization(wide_deep_large_ds). It fuses 26 categorical, - embedding weights to one constant and expects fused normalized inputs to the - numeric and hashed inputs to categorical placeholders. It also replaces gatherv2 + embedding weights to one constant and expects fused normalized inputs to the + numeric and hashed inputs to categorical placeholders. It also replaces gatherv2 with gathernd to gather weights from fused weights constant''' def fuse_categorical_numeric_columns(): - new_categorical_placeholder = tf.placeholder(tf.int64, shape=(None, None), + new_categorical_placeholder = tf.compat.v1.placeholder(tf.int64, shape=(None, None), name='new_categorical_placeholder') - new_numeric_placeholder = tf.placeholder(tf.float32, + new_numeric_placeholder = tf.compat.v1.placeholder(tf.float32, shape=(None, None), name='new_numeric_placeholder') categorical_column_weights_list.sort() embedding_column_weights_list.sort() sess = session.Session() categorical_weights_constant, embedding_weights_constant = [], [] - list_of_indices = [i for i in range(1, 11)] + [0] + \ - [i for i in range(12, 19)] + [11] + \ + list_of_indices = [i for i in range(1, 11)]+[0] + \ + [i for i in range(12, 19)]+[11] + \ [i for i in range(19, 26)] with sess.as_default(): for i in list_of_indices: @@ -236,17 +235,18 @@ def fuse_categorical_numeric_columns(): new_categorical_placeholder, name='gather_embedding_weights') embedding_reshape = tf.reshape(batch_gather_op_embedding, - shape=[-1, 32 * 26], + shape=[-1, 32*26], name='embedding_reshape') real_div_input_tens_list = [embedding_reshape, new_numeric_placeholder] new_concat_node = tf.concat(real_div_input_tens_list, name='new_concat_node', axis=1) concat_tensor = graph.get_tensor_by_name("new_concat_node:0") + '''Parsing all the nodes of graph and identifying feature columns to optimize ''' for node in old_graph_def.node: nodename = node.name if node.op == "ConcatV2" and "dnn/input_from_feature_columns" in nodename and \ - "input_layer/concat" in nodename: + "input_layer/concat" in nodename: dnn_concat_node = node elif node.op == "AddN" and "weighted_sum_no_bias" in nodename: weightsumnobias_node = node @@ -254,7 +254,7 @@ def fuse_categorical_numeric_columns(): gatherfound = [0] try: for i, inputname in enumerate(weightsumnobias_node.input): - if 'weighted_by' not in inputname and '_X_' not in inputname: + if 'weighted_by' not in inputname and '_X_' not in inputname: gatherfound[0] = 0 res = optimize_categorical_with_voc_list(weightsumnobias_node.input[i], gatherfound) if res: @@ -284,7 +284,7 @@ def fuse_categorical_numeric_columns(): for i, inputname in enumerate(dnn_concat_node.input): if '_embedding' in inputname and 'shared_embedding' not in inputname \ - and 'weighted_by' not in inputname and '_X_' not in inputname: + and 'weighted_by' not in inputname and '_X_' not in inputname: gatherfound[0] = 0 res = optimize_categorical_with_voc_list(dnn_concat_node.input[i], gatherfound) if res: @@ -308,7 +308,7 @@ def fuse_categorical_numeric_columns(): dnn_concat_node.input[i] = res2 else: gatherfound[0] = 0 - # shared_embedding + #shared_embedding res = optimize_crossed_column(dnn_concat_node.input[i], gatherfound) if res: dnn_concat_node.input[i] = res @@ -320,16 +320,17 @@ def fuse_categorical_numeric_columns(): node.input[1] = "new_numeric_placeholder:0" elif node.op == "BiasAdd" and "linear_model/weighted_sum" in node.name: node.input[0] = "Sum:0" - elif node.op == "MatMul" and "hiddenlayer_0/MatMul" in node.name: + elif node.op == "MatMul" and "hiddenlayer_0/MatMul" in node.name: node.input[0] = "new_concat_node:0" except Exception as e: + print(e) print('--------------------------------------------------------------------------') print("Cannot optimize the given graph. The given graph might be an optimized one") print('--------------------------------------------------------------------------') - sys.exit() + sys.exit() -new_graph_def = tf.GraphDef() -new_graph_def = tf.graph_util.extract_sub_graph( +new_graph_def = tf.compat.v1.GraphDef() +new_graph_def = tf.compat.v1.graph_util.extract_sub_graph( old_graph_def, output_nodes ) diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py b/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py index c4ef32b1d..eddc2f65b 100644 --- a/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/dataset/preprocess_csv_tfrecords.py @@ -61,72 +61,72 @@ if file_ext != ".tfrecords": output_file = output_file + ".tfrecords" -output_file = "{}_{}".format(in_filename, output_file) +output_file = "{}_{}".format(in_filename,output_file) csv = pandas.read_csv(eval_csv_file, header=None) -if len(csv.columns) == 39: +if len(csv.columns)==39: dataset_type = 'test' else: dataset_type = 'eval' -fill_na_dict = {} -if dataset_type == 'test': - for i in range(0, 13): - fill_na_dict[i] = 0.0 - for i in range(13, 39): - fill_na_dict[i] = "" +fill_na_dict = {} +if dataset_type=='test': + for i in range(0,13): + fill_na_dict[i]=0.0 + for i in range(13,39): + fill_na_dict[i]="" else: - for i in range(1, 14): - fill_na_dict[i] = 0.0 - for i in range(14, 40): - fill_na_dict[i] = "" -csv = csv.fillna(value=fill_na_dict).values + for i in range(1,14): + fill_na_dict[i]=0.0 + for i in range(14,40): + fill_na_dict[i]="" +csv=csv.fillna(value=fill_na_dict).values numeric_feature_names = ["numeric_1"] string_feature_names = ["string_1"] -LABEL_COLUMN = ["clicked"] -CATEGORICAL_COLUMNS1 = ["C" + str(i) + "_embedding" for i in range(1, 27)] -NUMERIC_COLUMNS1 = ["I" + str(i) for i in range(1, 14)] -if dataset_type == 'eval': +LABEL_COLUMN =["clicked"] +CATEGORICAL_COLUMNS1 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS1 = ["I"+str(i) for i in range(1, 14)] +if dataset_type=='eval': DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 else: DATA_COLUMNS = NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 -CATEGORICAL_COLUMNS2 = ["C" + str(i) + "_embedding" for i in range(1, 27)] -NUMERIC_COLUMNS2 = ["I" + str(i) for i in range(1, 14)] +CATEGORICAL_COLUMNS2 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS2 = ["I"+str(i) for i in range(1, 14)] CATEGORICAL_COLUMNS1.sort() NUMERIC_COLUMNS1.sort() no_of_rows = 0 with open(eval_csv_file, 'r') as f: if not os.path.isfile(train_csv_file): - nums = [line.strip('\n\r').split(',') for line in f.readlines()] - else: + nums=[line.strip('\n\r').split(',') for line in f.readlines()] + else: f1 = open(train_csv_file, 'r') - nums = [line.strip('\n\r').split(',') for line in f.readlines( - )] + [line.strip('\n\t').split(',') for line in f1.readlines()] + nums=[line.strip('\n\r').split(',') for line in f.readlines( + )]+[line.strip('\n\t').split(',') for line in f1.readlines()] numpy_arr = np.array(nums) - numpy_arr[numpy_arr == ''] = '0' - min_list, max_list, range_list = [], [], [] + numpy_arr[numpy_arr=='']='0' + min_list,max_list,range_list = [],[],[] for i in range(len(DATA_COLUMNS)): if DATA_COLUMNS[i] in NUMERIC_COLUMNS1: - col_min = numpy_arr[:, i].astype(np.float32).min() - col_max = numpy_arr[:, i].astype(np.float32).max() + col_min = numpy_arr[:,i].astype(np.float32).min() + col_max = numpy_arr[:,i].astype(np.float32).max() min_list.append(col_min) max_list.append(col_max) - range_list.append(col_max - col_min) + range_list.append(col_max-col_min) if os.path.isfile(train_csv_file): f1.close() - print('min list', min_list) - print('max list', max_list) - print('range list', range_list) + print('min list',min_list) + print('max list',max_list) + print('range list',range_list) with tf.python_io.TFRecordWriter(output_file) as writer: print('*****Processing data******') for row in csv: - no_of_rows = no_of_rows + 1 + no_of_rows = no_of_rows+1 if dataset_type == 'eval': unnormalized_vals = np.array(row[1:14]) else: unnormalized_vals = np.array(row[0:13]) - normalized_vals = (unnormalized_vals - min_list) / range_list + normalized_vals = (unnormalized_vals-min_list)/range_list if dataset_type == 'eval': new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40])) else: @@ -151,4 +151,4 @@ writer.write(example.SerializeToString()) print('Total number of rows ', no_of_rows) -print('Generated output file name :' + output_file) +print('Generated output file name :'+output_file) diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py b/models/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py index c4fdb7d61..f730c0c1c 100644 --- a/models/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/inference/__init__.py @@ -17,3 +17,4 @@ # # + diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py b/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py index 0ac060197..d71cc7f67 100755 --- a/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py @@ -16,8 +16,6 @@ # limitations under the License. # -# - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -44,7 +42,6 @@ def str2bool(v): else: return False - parser = argparse.ArgumentParser() parser.add_argument('--input_graph', type=str, help='file name for graph', @@ -73,30 +70,21 @@ def str2bool(v): required=False, default=None, dest='num_omp_threads') -parser.add_argument('--kmp_blocktime', type=str, - help='KMP_BLOCKTIME value', - required=False, - default=None, - dest='kmp_blocktime') parser.add_argument("--accuracy_only", type=str2bool, nargs='?', const=True, default=False, dest='compute_accuracy', required=False, help="Enable accuracy calculation") args = parser.parse_args() -if args.kmp_blocktime: - os.environ["KMP_BLOCKTIME"] = args.kmp_blocktime -os.environ["KMP_SETTINGS"] = "1" if args.num_omp_threads: os.environ["OMP_NUM_THREADS"] = args.num_omp_threads - output_probabilities_node = 'import/import/head/predictions/probabilities' -probabilities_node = 'import/' + output_probabilities_node + ':0' +probabilities_node = 'import/'+output_probabilities_node+':0' placeholder_name = 'import/new_numeric_placeholder' categorical_placeholder = 'import/new_categorical_placeholder' -config = tf.ConfigProto(log_device_placement=False, +config = tf.compat.v1.ConfigProto(log_device_placement=False, inter_op_parallelism_threads=args.num_inter_threads, intra_op_parallelism_threads=args.num_intra_threads) graph = ops.Graph() @@ -111,17 +99,18 @@ def str2bool(v): else: graph_def.ParseFromString(f.read()) with graph.as_default(): - tf.import_graph_def(graph_def) + tf.import_graph_def(graph_def) numeric_feature_names = ["numeric_1"] string_feature_names = ["string_1"] if args.compute_accuracy: full_features_names = numeric_feature_names + string_feature_names + ["label"] - feature_datatypes = [tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)] + [tf.FixedLenSequenceFeature( - [], tf.int64, default_value=0, allow_missing=True)] + [tf.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)] + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)] else: full_features_names = numeric_feature_names + string_feature_names - feature_datatypes = [tf.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)] + [tf.FixedLenSequenceFeature( - [], tf.int64, default_value=0, allow_missing=True)] + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)] + def input_fn(data_file, num_epochs, shuffle, batch_size): @@ -129,7 +118,7 @@ def input_fn(data_file, num_epochs, shuffle, batch_size): def _parse_function(proto): f = collections.OrderedDict( zip(full_features_names, feature_datatypes)) - parsed_features = tf.parse_example(proto, f) + parsed_features = tf.io.parse_example(proto, f) parsed_feature_vals_num = [tf.reshape( parsed_features["numeric_1"], shape=[-1, 13])] parsed_feature_vals_str = [tf.reshape( @@ -146,53 +135,53 @@ def _parse_function(proto): dataset = dataset.shuffle(buffer_size=20000) dataset = dataset.batch(batch_size) dataset = dataset.map(_parse_function, num_parallel_calls=28) - dataset = dataset.prefetch(batch_size * 10) + dataset = dataset.prefetch(batch_size*10) return dataset data_file = args.data_location -no_of_test_samples = sum(1 for _ in tf.python_io.tf_record_iterator(data_file)) -no_of_batches = math.ceil(float(no_of_test_samples) / batch_size) -placeholder_list = ['import/new_numeric_placeholder:0', 'import/new_categorical_placeholder:0'] +no_of_test_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(data_file)) +no_of_batches = math.ceil(float(no_of_test_samples)/batch_size) +placeholder_list = ['import/new_numeric_placeholder:0','import/new_categorical_placeholder:0'] input_tensor = [graph.get_tensor_by_name(name) for name in placeholder_list] output_name = "import/head/predictions/probabilities" -output_tensor = graph.get_tensor_by_name("import/" + output_name + ":0") +output_tensor = graph.get_tensor_by_name("import/" + output_name + ":0" ) correctly_predicted = 0 total_infer_consume = 0.0 warm_iter = 100 features_list = [] -with tf.Session(config=config, graph=graph) as sess: - res_dataset = input_fn(data_file, 1, False, batch_size) - iterator = res_dataset.make_one_shot_iterator() - next_element = iterator.get_next() - for i in range(int(no_of_batches)): - batch = sess.run(next_element) - features = batch[0:3] - features_list.append(features) - -with tf.Session(config=config, graph=graph) as sess1: - i = 0 - while True: - if i >= no_of_batches: - break - if i > warm_iter: - inference_start = time.time() - logistic = sess1.run(output_tensor, dict(zip(input_tensor, features_list[i][0:2]))) - if i > warm_iter: - infer_time = time.time() - inference_start - total_infer_consume += infer_time - if args.compute_accuracy: - predicted_labels = np.argmax(logistic, 1) - correctly_predicted = correctly_predicted + np.sum(features_list[i][2] == predicted_labels) - - i = i + 1 - inference_end = time.time() +with tf.compat.v1.Session(config=config, graph=graph) as sess: + res_dataset = input_fn(data_file, 1, False, batch_size) + iterator = tf.compat.v1.data.make_one_shot_iterator(res_dataset) + next_element = iterator.get_next() + for i in range(int(no_of_batches)): + batch=sess.run(next_element) + features=batch[0:3] + features_list.append(features) + +with tf.compat.v1.Session(config=config, graph=graph) as sess1: + i=0 + while True: + if i >= no_of_batches: + break + if i > warm_iter: + inference_start = time.time() + logistic = sess1.run(output_tensor, dict(zip(input_tensor, features_list[i][0:2]))) + if i > warm_iter: + infer_time = time.time() - inference_start + total_infer_consume += infer_time + if args.compute_accuracy: + predicted_labels = np.argmax(logistic,1) + correctly_predicted=correctly_predicted+np.sum(features_list[i][2] == predicted_labels) + + i=i+1 + inference_end = time.time() if args.compute_accuracy: accuracy = ( - float(correctly_predicted) / float(no_of_test_samples)) + float(correctly_predicted)/float(no_of_test_samples)) evaluate_duration = total_infer_consume -latency = (1000 * batch_size * float(evaluate_duration) / float(no_of_test_samples - warm_iter * batch_size)) -throughput = (no_of_test_samples - warm_iter * batch_size) / evaluate_duration +latency = (1000 * batch_size* float(evaluate_duration)/float(no_of_test_samples - warm_iter*batch_size)) +throughput = (no_of_test_samples - warm_iter * batch_size)/evaluate_duration print('--------------------------------------------------') print('Total test records : ', no_of_test_samples) @@ -202,6 +191,6 @@ def _parse_function(proto): print('Classification accuracy (%) : ', round((accuracy * 100), 4)) print('No of correct predictions : ', int(correctly_predicted)) print('Inference duration (seconds) : ', round(evaluate_duration, 4)) -print('Average Latency (ms/batch) : ', round(latency, 4)) +print('Average Latency (ms/batch) : ', round(latency,4)) print('Throughput is (records/sec) : ', round(throughput, 3)) print('--------------------------------------------------') diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/inference/parallel_inference.py b/models/recommendation/tensorflow/wide_deep_large_ds/inference/parallel_inference.py new file mode 100755 index 000000000..e1f25fcd1 --- /dev/null +++ b/models/recommendation/tensorflow/wide_deep_large_ds/inference/parallel_inference.py @@ -0,0 +1,235 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import sys +import os +import numpy as np +import argparse +import collections +import time +import math +import json +import datetime + +import tensorflow as tf +tf.compat.v1.disable_v2_behavior() +from tensorflow.python.framework import graph_util +from tensorflow.python.framework import ops +from tensorflow.core.framework import graph_pb2 +from google.protobuf import text_format + + +def str2bool(v): + if v.lower() in ('true'): + return True + else: + return False + +parser = argparse.ArgumentParser() +parser.add_argument('--input_graph', type=str, + help='file name for graph', + dest='input_graph', + required=True) +parser.add_argument('--data_location', type=str, + help='full path of data file', + dest='data_location', + required=True) +parser.add_argument('--batch_size', type=int, + help='batch size for inference.Default is 512', + default=512, + dest='batch_size') +parser.add_argument('--num_intra_threads', type=int, + help='number of threads for an operator', + required=False, + default=28, + dest='num_intra_threads') +parser.add_argument('--num_inter_threads', type=int, + help='number of threads across operators', + required=False, + default=2, + dest='num_inter_threads') +parser.add_argument('--num_omp_threads', type=str, + help='number of threads to use', + required=False, + default=None, + dest='num_omp_threads') +parser.add_argument('--num_parallel_batches', type=int, + help='number of parallel batches', + required=False, + default=1, + dest='num_parallel_batches') +parser.add_argument("--accuracy_only", type=str2bool, + nargs='?', const=True, default=False, + dest='compute_accuracy', required=False, + help="Enable accuracy calculation") + +args = parser.parse_args() +if args.num_omp_threads: + os.environ["OMP_NUM_THREADS"] = args.num_omp_threads +num_parallel_batches = args.num_parallel_batches +output_probabilities_node = 'import/import/head/predictions/probabilities' +while_probabilities_node = 'while/import/'+output_probabilities_node+':0' +while_softmax_operation = 'while/import/'+output_probabilities_node +placeholder_name = 'import/new_numeric_placeholder' +categorical_placeholder = 'import/new_categorical_placeholder' + +config = tf.compat.v1.ConfigProto(log_device_placement=False, + inter_op_parallelism_threads=args.num_inter_threads, + intra_op_parallelism_threads=args.num_intra_threads) +graph = ops.Graph() +graph_def = graph_pb2.GraphDef() + +filename, file_ext = os.path.splitext(args.input_graph) + +batch_size = args.batch_size +with open(args.input_graph, "rb") as f: + if file_ext == ".pbtxt": + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + +numeric_feature_names = ["numeric_1"] +string_feature_names = ["string_1"] +if args.compute_accuracy: + full_features_names = numeric_feature_names + string_feature_names + ["label"] + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)] +else: + full_features_names = numeric_feature_names + string_feature_names + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)] + + + +def input_fn(data_file, num_epochs, shuffle, batch_size): + """Generate an input function for the Estimator.""" + def _parse_function(proto): + f = collections.OrderedDict( + zip(full_features_names, feature_datatypes)) + parsed_features = tf.io.parse_example(proto, f) + parsed_feature_vals_num = [tf.reshape( + parsed_features["numeric_1"], shape=[-1, 13])] + parsed_feature_vals_str = [tf.reshape( + parsed_features["string_1"], shape=[-1, 2]) for i in string_feature_names] + parsed_feature_vals = parsed_feature_vals_num + parsed_feature_vals_str + if args.compute_accuracy: + parsed_feature_vals_label = [tf.reshape(parsed_features[i], shape=[-1]) for i in ["label"]] + parsed_feature_vals = parsed_feature_vals + parsed_feature_vals_label + return parsed_feature_vals + + # Extract lines from input files using the Dataset API. + dataset = tf.data.TFRecordDataset([data_file]) + if shuffle: + dataset = dataset.shuffle(buffer_size=20000) + dataset = dataset.batch(batch_size) + dataset = dataset.map(_parse_function, num_parallel_calls=28) + dataset = dataset.cache() + dataset = dataset.prefetch(1) + return dataset + + +data_file = args.data_location +no_of_test_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(data_file)) +no_of_batches = math.ceil(float(no_of_test_samples)/batch_size) +with graph.as_default(): + tf.import_graph_def(graph_def) + res_dataset = input_fn(data_file, 1, False, batch_size) + iterator = tf.compat.v1.data.make_one_shot_iterator(res_dataset) + next_element = iterator.get_next() + iterator_names = [i.name.split(':')[1] for i in next_element] + placeholder_expandims = {} + full_nodes = [] + old_graph_def = graph.as_graph_def() + for node in old_graph_def.node: + k = node.name + if k == "IteratorGetNext": + iterator_node = node + elif (node.op == "GatherNd" or node.op == 'ConcatV2') and (placeholder_name in node.input[1] or categorical_placeholder in node.input[1]): + if node.op == 'GatherNd' and node.name == 'import/gather_categorical_weights': + gather_categorical_node = node + elif node.op == 'GatherNd' and node.name == 'import/gather_embedding_weights': + gather_embedding_node = node + elif node.op == 'ConcatV2': + concat_node = node + + gather_categorical_node.input[1] = iterator_node.name+":1" + gather_embedding_node.input[1] = iterator_node.name+":1" + concat_node.input[1] = iterator_node.name+":0" + + +new_graph_def = tf.compat.v1.GraphDef() +new_graph_def = tf.compat.v1.graph_util.extract_sub_graph( + old_graph_def, + [output_probabilities_node] +) +tf.compat.v1.reset_default_graph() +graph = ops.Graph() + +with graph.as_default(): + i = tf.constant(0) + arr = tf.TensorArray(dtype=tf.int32, size=2000, dynamic_size=True) + def _body(i, arr): + tf.import_graph_def(new_graph_def) + output_tensor = graph.get_tensor_by_name(while_probabilities_node) + if args.compute_accuracy: + labels_tensor = graph.get_tensor_by_name("while/import/IteratorGetNext:2") + predicted_labels = tf.argmax(output_tensor,1,output_type=tf.int64) + correctly_predicted_bool = tf.equal(predicted_labels, labels_tensor) + num_correct_predictions_batch = tf.reduce_sum(tf.cast(correctly_predicted_bool, tf.int32)) + else: + predicted_labels = tf.argmax(output_tensor,1,output_type=tf.int32) + num_correct_predictions_batch = tf.reduce_sum(predicted_labels) + arr = arr.write(i, num_correct_predictions_batch) + i = tf.add(i, 1) + return i, arr + i, arr = tf.compat.v2.while_loop(cond=lambda i, x: i < int(no_of_batches), body=_body, loop_vars=[i, arr], parallel_iterations=num_parallel_batches) + array_gather = arr.gather(tf.range(0, int(no_of_batches), delta=1, dtype=None, name='range')) + +with tf.compat.v1.Session(config=config, graph=graph) as sess: + inference_start = time.time() + try: + num_correct_predictions_batch = sess.run(array_gather) + except Exception as e: + print('--------------------------------------------------') + print("Exception during execution of model: ",e) + print('--------------------------------------------------') + sys.exit() + total_num_correct_predictions = num_correct_predictions_batch.sum(axis=0) + inference_end = time.time() +if args.compute_accuracy: + accuracy = (float(total_num_correct_predictions)/float(no_of_test_samples)) +evaluate_duration = inference_end - inference_start +latency = (1000 * float(batch_size * num_parallel_batches) * float(evaluate_duration) / float(no_of_test_samples)) + +throughput = no_of_test_samples/evaluate_duration +print('--------------------------------------------------') +print('Total test records : ', no_of_test_samples) +print('Batch size is : ', batch_size) +print('Number of batches : ', int(no_of_batches)) +if args.compute_accuracy: + print('Classification accuracy (%) : ', round((accuracy * 100), 4)) + print('No of correct predictions : ', int(total_num_correct_predictions)) +print('Inference duration (seconds) : ', round(evaluate_duration, 4)) +print('Average Latency (ms/batch) : ', round(latency,4)) +print('Throughput is (records/sec) : ', round(throughput, 3)) +print('--------------------------------------------------') diff --git a/models/recommendation/tensorflow/wide_deep_large_ds/training/train.py b/models/recommendation/tensorflow/wide_deep_large_ds/training/train.py index 15892d820..50e17a2c8 100644 --- a/models/recommendation/tensorflow/wide_deep_large_ds/training/train.py +++ b/models/recommendation/tensorflow/wide_deep_large_ds/training/train.py @@ -29,10 +29,10 @@ from os import path import sys # Set to INFO for tracking training, default is WARN. ERROR for least messages -tf.logging.set_verbosity(tf.logging.INFO) +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) print("Using TensorFlow version %s" % (tf.__version__)) -CONTINUOUS_COLUMNS = ["I" + str(i) for i in range(1, 14)] # 1-13 inclusive -CATEGORICAL_COLUMNS = ["C" + str(i) for i in range(1, 27)] # 1-26 inclusive +CONTINUOUS_COLUMNS = ["I"+str(i) for i in range(1, 14)] # 1-13 inclusive +CATEGORICAL_COLUMNS = ["C"+str(i) for i in range(1, 27)] # 1-26 inclusive LABEL_COLUMN = ["clicked"] TRAIN_DATA_COLUMNS = LABEL_COLUMN + CONTINUOUS_COLUMNS + CATEGORICAL_COLUMNS FEATURE_COLUMNS = CONTINUOUS_COLUMNS + CATEGORICAL_COLUMNS @@ -40,13 +40,13 @@ def generate_input_fn(filename, batch_size, num_epochs): def parse_csv(value): - tf.logging.info('Parsing {}'.format(filename)) + tf.compat.v1.logging.info('Parsing {}'.format(filename)) cont_defaults = [[0.0] for i in range(1, 14)] cate_defaults = [[" "] for i in range(1, 27)] label_defaults = [[0]] column_headers = TRAIN_DATA_COLUMNS record_defaults = label_defaults + cont_defaults + cate_defaults - columns = tf.decode_csv(value, record_defaults=record_defaults) + columns = tf.io.decode_csv(value, record_defaults=record_defaults) all_columns = collections.OrderedDict(zip(column_headers, columns)) labels = all_columns.pop(LABEL_COLUMN[0]) features = all_columns @@ -63,12 +63,12 @@ def parse_csv(value): return dataset -def build_feature_cols(train_file_path, test_file_path): +def build_feature_cols(train_file_path,test_file_path): # compute statistics(min,max,range) of train dataset print('****Computing statistics of train dataset*****') with open(train_file_path, 'r') as f, open(test_file_path, 'r') as f1: nums = [line.strip('\n').split(',') for line in f.readlines( - )] + [line.strip('\n').split(',') for line in f1.readlines()] + )]+[line.strip('\n').split(',') for line in f1.readlines()] numpy_arr = np.array(nums) mins_list, max_list, range_list = [], [], [] for i in range(len(TRAIN_DATA_COLUMNS)): @@ -77,14 +77,14 @@ def build_feature_cols(train_file_path, test_file_path): col_max = numpy_arr[:, i].astype(np.float32).max() mins_list.append(col_min) max_list.append(col_max) - range_list.append(col_max - col_min) + range_list.append(col_max-col_min) def numeric_column_normalized(column_name, normalizer_fn): return tf.feature_column.numeric_column(column_name, normalizer_fn=normalizer_fn) def make_minmaxscaler(min, range): def minmaxscaler(col): - return (col - min) / range + return (col - min)/range return minmaxscaler deep_columns = [] for i in range(len(CONTINUOUS_COLUMNS)): @@ -136,12 +136,12 @@ def build_model(model_type, model_dir, wide_columns, deep_columns): return m -def build_estimator(model_type='WIDE_AND_DEEP', model_dir=None, train_file_path=None, test_file_path=None): +def build_estimator(model_type='WIDE_AND_DEEP', model_dir=None, train_file_path=None,test_file_path=None): if model_dir is None: model_dir = 'models/model_' + model_type + '_' + str(int(time.time())) print("Model directory = %s" % model_dir) - wide_columns, deep_columns = build_feature_cols(train_file_path, test_file_path) + wide_columns, deep_columns = build_feature_cols(train_file_path,test_file_path) m = build_model(model_type, model_dir, wide_columns, deep_columns) print('estimator built') return m @@ -156,18 +156,18 @@ def column_to_dtype(column): """ - This function maps input columns (feature_placeholders) to - tensors that can be inputted into the graph + This function maps input columns (feature_placeholders) to + tensors that can be inputted into the graph (similar in purpose to the output of our input functions) In this particular case, we need to accomodate the sparse fields (strings) - so we have to do a slight modification to expand their dimensions, + so we have to do a slight modification to expand their dimensions, just like in the input functions """ def serving_input_fn(): feature_placeholders = { - column: tf.placeholder(column_to_dtype(column), [None]) + column: tf.compat.v1.placeholder(column_to_dtype(column), [None]) for column in FEATURE_COLUMNS } # DNNCombinedLinearClassifier expects rank 2 Tensors, @@ -185,38 +185,38 @@ def serving_input_fn(): def train_and_eval(): print("Begin training and evaluation") - train_file = args.data_location + '/train.csv' - test_file = args.data_location + '/eval.csv' + train_file = args.data_location+'/train.csv' + test_file = args.data_location+'/eval.csv' if (not path.exists(train_file)) or (not path.exists(test_file)): print('------------------------------------------------------------------------------------------') print("train.csv or eval.csv does not exist in the given data_location. Please provide valid path") print('------------------------------------------------------------------------------------------') - sys.exit() + sys.exit() no_of_training_examples = sum(1 for line in open(train_file)) no_of_test_examples = sum(1 for line in open(test_file)) batch_size = args.batch_size if args.steps == 0: no_of_epochs = 10 train_steps = math.ceil( - (float(no_of_epochs) * no_of_training_examples) / batch_size) + (float(no_of_epochs)*no_of_training_examples)/batch_size) else: no_of_epochs = math.ceil( - (float(batch_size) * args.steps) / no_of_training_examples) + (float(batch_size)*args.steps)/no_of_training_examples) train_steps = args.steps - test_steps = math.ceil(float(no_of_test_examples) / batch_size) + test_steps = math.ceil(float(no_of_test_examples)/batch_size) model_type = 'WIDE_AND_DEEP' model_dir = 'model_' + model_type + '_' + str(int(time.time())) print("Saving model checkpoints to " + model_dir) export_dir = model_dir + '/exports' m = build_estimator(model_type, model_dir, train_file, test_file) m.train(input_fn=lambda: generate_input_fn( - train_file, batch_size, int(no_of_epochs)), steps=int(train_steps)) + train_file, batch_size, int(no_of_epochs)),steps=int(train_steps)) print('fit done') results = m.evaluate(input_fn=lambda: generate_input_fn( test_file, batch_size, 1), steps=test_steps) print('evaluate done') - export_folder = m.export_savedmodel( + export_folder = m.export_saved_model( export_dir, serving_input_fn ) @@ -252,4 +252,4 @@ def get_arg_parser(): main_start = time.time() train_and_eval() main_end = time.time() - print("Total time:", main_end - main_start) + print("Total time:", main_end-main_start) diff --git a/benchmarks/face_detection_and_alignment/tensorflow/__init__.py b/models/reinforcement/__init__.py similarity index 93% rename from benchmarks/face_detection_and_alignment/tensorflow/__init__.py rename to models/reinforcement/__init__.py index fd96ab7af..5e15e5a12 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/__init__.py +++ b/models/reinforcement/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,4 +16,3 @@ # limitations under the License. # -# diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/__init__.py b/models/reinforcement/tensorflow/__init__.py similarity index 93% rename from benchmarks/face_detection_and_alignment/tensorflow/mtcc/__init__.py rename to models/reinforcement/tensorflow/__init__.py index fd96ab7af..5e15e5a12 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/__init__.py +++ b/models/reinforcement/tensorflow/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2019 Intel Corporation +# Copyright (c) 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,4 +16,3 @@ # limitations under the License. # -# diff --git a/models/reinforcement/tensorflow/minigo/__init__.py b/models/reinforcement/tensorflow/minigo/__init__.py new file mode 100644 index 000000000..199f25228 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/__init__.py @@ -0,0 +1,17 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/models/reinforcement/tensorflow/minigo/training/__init__.py b/models/reinforcement/tensorflow/minigo/training/__init__.py new file mode 100644 index 000000000..199f25228 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/__init__.py @@ -0,0 +1,17 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-multinode.patch b/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-multinode.patch new file mode 100644 index 000000000..48291bb09 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-multinode.patch @@ -0,0 +1,50 @@ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index 2ea9789..f335b94 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -20,26 +20,28 @@ fi + + # TODO(tommadams): we should probably switch to Clang at some point. + +-if [ -d "${tmp_dir}" ]; then +- pushd "${tmp_dir}" +-else ++if [ ! -d "${tmp_dir}" ]; then + echo "Cloning tensorflow to ${tmp_dir}" + git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" +- cp cc/tf_int8_fusion.patch "${tmp_dir}" ++fi + +- pushd "${tmp_dir}" ++pushd "${tmp_dir}" + +- cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" +- prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" ++git reset --hard ++git clean -fd + +- git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++cp ../tf_int8_fusion.patch "${tmp_dir}" + +- commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" +- echo "Checking out ${commit_tag}" +- git checkout "${commit_tag}" +- git apply sample.patch +- git apply tf_int8_fusion.patch +-fi ++cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" ++prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" ++ ++git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++ ++commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" ++echo "Checking out ${commit_tag}" ++git checkout "${commit_tag}" ++git apply sample.patch ++git apply tf_int8_fusion.patch + + # Run the TensorFlow configuration script, setting reasonable values for most + # of the options. +-- +1.8.3.1 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-singlenode.patch b/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-singlenode.patch new file mode 100644 index 000000000..3d6939e8d --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/avoid-repeated-clone-singlenode.patch @@ -0,0 +1,49 @@ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index 3bc9cea..2a5db5c 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -20,25 +20,27 @@ fi + + # TODO(tommadams): we should probably switch to Clang at some point. + +-if [ -d "${tmp_dir}" ]; then +- pushd "${tmp_dir}" +-else +- echo "Cloning tensorflow to ${tmp_dir}" ++if [ ! -d "${tmp_dir}" ]; then + git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" ++ echo "Cloning tensorflow to ${tmp_dir}" ++fi + +- pushd "${tmp_dir}" ++pushd "${tmp_dir}" + +- cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" +- prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" ++git reset --hard ++git clean -fd + +- git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" ++prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" ++ ++git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++ ++commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" ++echo "Checking out ${commit_tag}" ++git checkout "${commit_tag}" ++git apply sample.patch ++cp ${script_dir}/../ml_perf/tools/tensorflow_quantization/graph_transforms/fuse_quantized_convolution.cc ${tmp_dir}/tensorflow/tools/graph_transforms/ + +- commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" +- echo "Checking out ${commit_tag}" +- git checkout "${commit_tag}" +- git apply sample.patch +- cp ${script_dir}/../ml_perf/tools/tensorflow_quantization/graph_transforms/fuse_quantized_convolution.cc ${tmp_dir}/tensorflow/tools/graph_transforms/ +-fi + + # Run the TensorFlow configuration script, setting reasonable values for most + # of the options. +-- +1.8.3.1 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-large-scale.patch b/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-large-scale.patch new file mode 100644 index 000000000..e2182168a --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-large-scale.patch @@ -0,0 +1,16 @@ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index f335b94..1e714e2 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -50,6 +50,8 @@ cc_opt_flags="${CC_OPT_FLAGS:--march=native}" + + PYTHON_BIN_PATH=`which python` + ++bazel clean ++ + CC_OPT_FLAGS="${cc_opt_flags}" \ + PYTHON_BIN_PATH=${PYTHON_BIN_PATH} \ + USE_DEFAULT_PYTHON_LIB_PATH="${USE_DEFAULT_PYTHON_LIB_PATH:-1}" \ +-- +1.8.3.1 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-single-node.patch b/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-single-node.patch new file mode 100644 index 000000000..531832029 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/bazel-clean-single-node.patch @@ -0,0 +1,16 @@ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index 2a5db5c..854218f 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -49,6 +49,8 @@ cc_opt_flags="${CC_OPT_FLAGS:--march=native}" + + PYTHON_BIN_PATH=`which python` + ++bazel clean ++ + CC_OPT_FLAGS="${cc_opt_flags}" \ + PYTHON_BIN_PATH=${PYTHON_BIN_PATH} \ + USE_DEFAULT_PYTHON_LIB_PATH="${USE_DEFAULT_PYTHON_LIB_PATH:-1}" \ +-- +1.8.3.1 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/get-data.patch b/models/reinforcement/tensorflow/minigo/training/fp32/get-data.patch new file mode 100644 index 000000000..f76030729 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/get-data.patch @@ -0,0 +1,40 @@ +diff --git a/ml_perf/get_data.py b/ml_perf/get_data.py +index 680209f..6dd745b 100644 +--- a/ml_perf/get_data.py ++++ b/ml_perf/get_data.py +@@ -49,24 +49,14 @@ def freeze_graph(path): + + + def main(unused_argv): +- try: +- for d in ['checkpoint', 'target']: +- # Pull the required training checkpoints and models from GCS. +- src = os.path.join(FLAGS.src_dir, d, N) +- dst = os.path.join(FLAGS.dst_dir, d) +- utils.ensure_dir_exists(dst) +- utils.wait(utils.checked_run('gsutil', '-m', 'cp', '-r', src, dst)) +- +- # Freeze the target model. +- freeze_graph(os.path.join(FLAGS.dst_dir, 'target', N, 'target')) +- +- # Freeze the training checkpoint models. +- pattern = os.path.join(FLAGS.dst_dir, 'checkpoint', N, 'work_dir', '*.index') +- for path in glob.glob(pattern): +- freeze_graph(os.path.splitext(path)[0]) +- +- finally: +- asyncio.get_event_loop().close() ++ # Freeze the target model. ++ freeze_graph(os.path.join(FLAGS.dst_dir, 'target', N, 'target')) ++ ++ # Freeze the training checkpoint models. ++ pattern = os.path.join(FLAGS.dst_dir, 'checkpoint', N, 'work_dir', '*.index') ++ for path in glob.glob(pattern): ++ freeze_graph(os.path.splitext(path)[0]) ++ + + + if __name__ == '__main__': +-- +2.25.0 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/large-scale-no-bg.patch b/models/reinforcement/tensorflow/minigo/training/fp32/large-scale-no-bg.patch new file mode 100644 index 000000000..bab494e33 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/large-scale-no-bg.patch @@ -0,0 +1,16 @@ +diff --git a/run_minigo_mn.sh b/run_minigo_mn.sh +index 06e0633..63c713f 100755 +--- a/run_minigo_mn.sh ++++ b/run_minigo_mn.sh +@@ -29,7 +29,7 @@ BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ + `ml_perf/hostlist.sh |head -n $PLAY_NODES_PLUS_ONE|tail -n $PLAY_NODES|awk '/./{print "--selfplay_node="$0}'` \ + `ml_perf/hostlist.sh |tail -n $TRAIN_NODES|awk '/./{print "--train_node="$0}'` \ + `ml_perf/hostlist.sh |tail -n $TRAIN_PLUS_EVAL_NODES|head -n $EVAL_NODES |awk '/./{print "--eval_node="$0}'` \ +- --setup_train_workers=True &> train_workers.log & ++ --setup_train_workers=True > train_workers.log + + echo "BOARD_SIZE=9 python3 ml_perf/reference_implementation.py" + # Run training loop +-- +1.8.3.1 + diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf.patch b/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf.patch new file mode 100644 index 000000000..4e3286d99 --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf.patch @@ -0,0 +1,2445 @@ +diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md +index 21f086b..7a630d3 100644 +--- a/CONTRIBUTING.md ++++ b/CONTRIBUTING.md +@@ -27,5 +27,3 @@ information on using pull requests. + * We format all our python code with + [autopep8](https://pypi.python.org/pypi/autopep8). If you use Vim, check + out [Vim-CodeFmt](https://github.com/google/vim-codefmt). +-* We use [pylint](https://www.pylint.org/) to check the syntax of our code. +- Not all of our code yet passes pylint, but we're working on it! +diff --git a/WORKSPACE b/WORKSPACE +index cabf233..1977a70 100644 +--- a/WORKSPACE ++++ b/WORKSPACE +@@ -12,11 +12,8 @@ http_archive( + + http_archive( + name = "com_google_absl", +- strip_prefix = "abseil-cpp-666fc1266bccfd8e6eaaa084e7b42580bb8eb199", +- urls = [ +- "http://mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/666fc1266bccfd8e6eaaa084e7b42580bb8eb199.tar.gz", +- "https://github.com/abseil/abseil-cpp/archive/666fc1266bccfd8e6eaaa084e7b42580bb8eb199.tar.gz", +- ], ++ strip_prefix = "abseil-cpp-93dfcf74cb5fccae3da07897d8613ae6cab958a0", ++ urls = ["https://github.com/abseil/abseil-cpp/archive/93dfcf74cb5fccae3da07897d8613ae6cab958a0.tar.gz"], + ) + + http_archive( +diff --git a/build.sh b/build.sh +new file mode 100755 +index 0000000..1f682cc +--- /dev/null ++++ b/build.sh +@@ -0,0 +1,4 @@ ++#!/bin/bash ++. ./set_avx2_build ++bazel build --incompatible_remove_native_http_archive=false -c opt --verbose_failures --define=tf=1 --define=board_size=9 $BAZEL_BUILD_OPTS cc:selfplay cc:eval ++ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index 8b39178..3bc9cea 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -4,109 +4,99 @@ set -e + + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + dst_dir="${script_dir}/tensorflow" +-tmp_dir="/tmp/minigo_tf" +-tmp_pkg_dir="/tmp/tensorflow_pkg" ++tmp_dir="${script_dir}/minigo_tf" ++tmp_pkg_dir="${script_dir}/tensorflow_pkg" + +-rm -rfd ${tmp_dir} + rm -rfd ${tmp_pkg_dir} +-mkdir -p ${tmp_dir} + + rm -rf ${dst_dir}/* + mkdir -p ${dst_dir} + ++if [ -d "${script_dir}/../ml_perf/tools" ]; then ++ echo "Intel AI tools exist." ++else ++ git clone https://github.com/IntelAI/tools.git ${script_dir}/../ml_perf/tools/ ++fi ++ + # TODO(tommadams): we should probably switch to Clang at some point. +-commit_tag="v1.11.0" + +-echo "Cloning tensorflow to ${tmp_dir}" +-git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" ++if [ -d "${tmp_dir}" ]; then ++ pushd "${tmp_dir}" ++else ++ echo "Cloning tensorflow to ${tmp_dir}" ++ git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" ++ ++ pushd "${tmp_dir}" + +-pushd "${tmp_dir}" ++ cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" ++ prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" + +-echo "Checking out ${commit_tag}" +-git checkout "${commit_tag}" ++ git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++ ++ commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" ++ echo "Checking out ${commit_tag}" ++ git checkout "${commit_tag}" ++ git apply sample.patch ++ cp ${script_dir}/../ml_perf/tools/tensorflow_quantization/graph_transforms/fuse_quantized_convolution.cc ${tmp_dir}/tensorflow/tools/graph_transforms/ ++fi + + # Run the TensorFlow configuration script, setting reasonable values for most + # of the options. + echo "Configuring tensorflow" + cc_opt_flags="${CC_OPT_FLAGS:--march=native}" + ++PYTHON_BIN_PATH=`which python` ++ + CC_OPT_FLAGS="${cc_opt_flags}" \ +-TF_NEED_JEMALLOC=${TF_NEED_JEMALLOC:-1} \ +-TF_NEED_GCP=${TF_NEED_GCP:-1} \ ++PYTHON_BIN_PATH=${PYTHON_BIN_PATH} \ ++USE_DEFAULT_PYTHON_LIB_PATH="${USE_DEFAULT_PYTHON_LIB_PATH:-1}" \ ++TF_NEED_JEMALLOC=${TF_NEED_JEMALLOC:-0} \ ++TF_NEED_GCP=${TF_NEED_GCP:-0} \ + TF_NEED_HDFS=${TF_NEED_HDFS:-0} \ + TF_NEED_S3=${TF_NEED_S3:-0} \ + TF_NEED_KAFKA=${TF_NEED_KAFKA:-0} \ +-TF_NEED_CUDA=${TF_NEED_CUDA:-1} \ ++TF_NEED_CUDA=${TF_NEED_CUDA:-0} \ + TF_NEED_GDR=${TF_NEED_GDR:-0} \ + TF_NEED_VERBS=${TF_NEED_VERBS:-0} \ + TF_NEED_OPENCL_SYCL=${TF_NEED_OPENCL_SYCL:-0} \ ++TF_NEED_ROCM=${TF_NEED_ROCM:-0} \ + TF_CUDA_CLANG=${TF_CUDA_CLANG:-0} \ ++TF_DOWNLOAD_CLANG=${TF_DOWNLOAD_CLANG:-0} \ + TF_NEED_TENSORRT=${TF_NEED_TENSORRT:-0} \ + TF_NEED_MPI=${TF_NEED_MPI:-0} \ + TF_SET_ANDROID_WORKSPACE=${TF_SET_ANDROID_WORKSPACE:-0} \ + TF_NCCL_VERSION=${TF_NCCL_VERSION:-1.3} \ ++TF_ENABLE_XLA=${TF_ENABLE_XLA:-0} \ + ./configure + ++. ${script_dir}/../set_avx2_build ++BAZEL_OPTS="-c opt --config=mkl \ ++ --action_env=PATH \ ++ --action_env=LD_LIBRARY_PATH \ ++ $BAZEL_BUILD_OPTS \ ++ --copt=-DINTEL_MKLDNN" + echo "Building tensorflow package" +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" //tensorflow/tools/pip_package:build_pip_package ++bazel build -s $BAZEL_OPTS //tensorflow/tools/pip_package:build_pip_package + bazel-bin/tensorflow/tools/pip_package/build_pip_package ${tmp_pkg_dir} + + echo "Tensorflow built-ish" + echo "Unpacking tensorflow package..." + unzip -q ${tmp_pkg_dir}/tensorflow-*.whl -d ${tmp_dir} + ++ + echo "Copying tensor flow headers to ${dst_dir}" + cp -r ${tmp_dir}/tensorflow-*.data/purelib/tensorflow/include/* "${dst_dir}" +- + echo "Building tensorflow libraries" + +-# Add a custom BUILD target for the gRPC runtime. +-# TODO(tommadams): Remove this once the gRPC runtime is linked in to TensorFlow. +-cat <> tensorflow/BUILD +- +-tf_cc_shared_object( +- name = "libgrpc_runtime.so", +- linkopts = select({ +- "//tensorflow:darwin": [ +- "-Wl,-exported_symbols_list", # This line must be directly followed by the exported_symbols.lds file +- "\$(location //tensorflow:tf_exported_symbols.lds)", +- ], +- "//tensorflow:windows": [], +- "//conditions:default": [ +- "-z defs", +- "-Wl,--version-script", # This line must be directly followed by the version_script.lds file +- "\$(location //tensorflow:tf_version_script.lds)", +- ], +- }), +- deps = [ +- "//tensorflow:tf_exported_symbols.lds", +- "//tensorflow:tf_version_script.lds", +- "//tensorflow/core/distributed_runtime/rpc:grpc_runtime", +- ] +-) +-EOF +- +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" \ +- //tensorflow:libgrpc_runtime.so \ ++bazel build -s $BAZEL_OPTS \ + //tensorflow:libtensorflow_cc.so \ + //tensorflow:libtensorflow_framework.so + + echo "Copying tensorflow libraries to ${dst_dir}" +-cp bazel-bin/tensorflow/{libgrpc_runtime,libtensorflow_*}.so "${dst_dir}" +- +-echo "Building toco" +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" //tensorflow/contrib/lite/toco:toco +-cp bazel-bin/tensorflow/contrib/lite/toco/toco "${dst_dir}" +- +-echo "Building TF Lite" +- +-./tensorflow/contrib/lite/tools/make/download_dependencies.sh +-make -j $(nproc) -f tensorflow/contrib/lite/tools/make/Makefile +-cp tensorflow/contrib/lite/tools/make/gen/linux_x86_64/lib/libtensorflow-lite.a $dst_dir/libtensorflow_lite.a +-for dir in contrib/lite contrib/lite/kernels contrib/lite/profiling contrib/lite/schema; do +- mkdir -p $dst_dir/tensorflow/$dir +- cp tensorflow/$dir/*.h $dst_dir/tensorflow/$dir/ +-done +-cp -r tensorflow/contrib/lite/tools/make/downloads/flatbuffers/include/flatbuffers $dst_dir/ ++cp bazel-bin/tensorflow/libtensorflow_*.so "${dst_dir}" ++cp bazel-bin/tensorflow/libtensorflow_*.so.1 "${dst_dir}" ++ ++cp `find ${tmp_dir} |grep libiomp5.so` ${dst_dir} ++cp `find ${tmp_dir} |grep libmklml_intel.so` ${dst_dir} + + popd +diff --git a/cc/dual_net/tf_dual_net.cc b/cc/dual_net/tf_dual_net.cc +index a400cc2..3bee107 100644 +--- a/cc/dual_net/tf_dual_net.cc ++++ b/cc/dual_net/tf_dual_net.cc +@@ -58,6 +58,9 @@ class TfDualNet : public DualNet { + public: + TfWorker(const GraphDef& graph_def) : batch_capacity_(0) { + SessionOptions options; ++ options.config.set_intra_op_parallelism_threads(1); ++ options.config.set_inter_op_parallelism_threads(0); ++ options.config.set_use_per_session_threads(false); + options.config.mutable_gpu_options()->set_allow_growth(true); + session_.reset(NewSession(options)); + TF_CHECK_OK(session_->Create(graph_def)); +diff --git a/cc/eval.cc b/cc/eval.cc +index bde9011..525c840 100644 +--- a/cc/eval.cc ++++ b/cc/eval.cc +@@ -68,6 +68,7 @@ DEFINE_string(model, "", + "engine=lite, the model should be .tflite flatbuffer."); + DEFINE_string(model_two, "", "Descriptor for the second model"); + DEFINE_int32(parallel_games, 32, "Number of games to play in parallel."); ++DEFINE_int32(instance_id, 0, "Unique id with multi-instance."); + + // Output flags. + DEFINE_string(output_bigtable, "", +@@ -170,7 +171,10 @@ class Evaluator { + ParseOptionsFromFlags(&game_options_, &player_options_); + + int num_games = FLAGS_parallel_games; +- for (int thread_id = 0; thread_id < num_games; ++thread_id) { ++ int instance_id = FLAGS_instance_id; ++ int thread_id_begin = instance_id*num_games; ++ for (int thread_id = thread_id_begin; ++ thread_id < thread_id_begin+num_games; ++thread_id) { + bool swap_models = (thread_id & 1) != 0; + threads_.emplace_back(std::bind(&Evaluator::ThreadRun, this, thread_id, + swap_models ? &model_b : &model_a, +diff --git a/cc/selfplay.cc b/cc/selfplay.cc +index a3d4d9e..9d3cfc0 100644 +--- a/cc/selfplay.cc ++++ b/cc/selfplay.cc +@@ -119,6 +119,7 @@ DEFINE_int32(parallel_games, 32, "Number of games to play in parallel."); + DEFINE_int32(num_games, 0, + "Total number of games to play. Defaults to parallel_games. " + "Only one of num_games and run_forever must be set."); ++DEFINE_int32(instance_id, 0, "Unique id with multi-instance."); + + // Output flags. + DEFINE_string(output_dir, "", +@@ -244,7 +245,10 @@ class SelfPlayer { + batcher_ = + absl::make_unique(std::move(model_factory)); + } +- for (int i = 0; i < FLAGS_parallel_games; ++i) { ++ int instance_id = FLAGS_instance_id; ++ int thread_id_begin = instance_id * FLAGS_parallel_games; ++ for (int i = thread_id_begin; ++ i < thread_id_begin+FLAGS_parallel_games; ++i) { + threads_.emplace_back(std::bind(&SelfPlayer::ThreadRun, this, i)); + } + for (auto& t : threads_) { +diff --git a/common.py b/common.py +new file mode 100644 +index 0000000..37516cf +--- /dev/null ++++ b/common.py +@@ -0,0 +1,31 @@ ++import os ++ ++class Config(): ++ def __init__(self, tf_root): ++ self.demo_dir = os.path.join(tf_root, 'demo') ++ self.demo_tmp_dir = os.path.join(tf_root, '../demo_tmp') ++ ++ self.pb_dir = os.path.join(self.demo_dir, 'pb') ++ if not os.path.exists(self.pb_dir): ++ os.makedirs(self.pb_dir) ++ self.fp32_optimized_graph = os.path.join(self.pb_dir, 'freezed_resnet50_opt.pb') ++ self.int8_graph = os.path.join(self.pb_dir, 'int8_resnet50.pb') ++ self.int8_graph_logged = os.path.join(self.pb_dir, 'int8_resnet50_logged.pb') ++ self.int8_graph_freese = os.path.join(self.pb_dir, 'int8_resnet50_freese.pb') ++ self.int8_graph_final = os.path.join(self.pb_dir, 'int8_resnet50_final.pb') ++ ++ self.accuracy_script = os.path.join(self.demo_dir, 'accuracy.py') ++ self.benchmark_script = os.path.join(self.demo_dir, 'benchmark.py') ++ self.quantize_script = os.path.join(self.demo_dir, 'quantize_graph.py') ++ ++ self.min_max_log = os.path.join(self.demo_dir, 'min_max.log') ++ ++ ++ input_names = 'input' ++ output_names = 'predict' ++ ++ def set_fp32_graph(self, pb): ++ self.fp32_original_graph = pb ++ ++ def set_dataset(self, ds): ++ self.imagenet_data = ds +diff --git a/dual_net.py b/dual_net.py +index edf946d..83ddcba 100644 +--- a/dual_net.py ++++ b/dual_net.py +@@ -36,6 +36,15 @@ import features as features_lib + import go + import symmetries + ++import horovod.tensorflow as hvd ++ ++from tensorflow.python.framework import dtypes ++from tensorflow.core.framework import graph_pb2 ++from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference ++from tensorflow.tools.graph_transforms import TransformGraph ++from ml_perf.utils import * ++ ++import quantize_graph + + flags.DEFINE_integer('train_batch_size', 256, + 'Batch size to use for train/eval evaluation. For GPU ' +@@ -120,6 +129,18 @@ flags.DEFINE_integer( + flags.DEFINE_integer( + 'keep_checkpoint_max', default=5, help='Number of checkpoints to keep.') + ++flags.DEFINE_integer( ++ 'num_inter_threads', default=0, ++ help=('Number of inter threads.')) ++ ++flags.DEFINE_integer( ++ 'num_intra_threads', default=0, ++ help=('Number of intra threads.')) ++ ++flags.DEFINE_bool( ++ 'dist_train', default=False, ++ help=('Using distributed training or not.')) ++ + flags.DEFINE_bool( + 'use_random_symmetry', True, + help='If true random symmetries be used when doing inference.') +@@ -157,7 +178,9 @@ class DualNetwork(): + self.save_file = save_file + self.inference_input = None + self.inference_output = None +- config = tf.ConfigProto() ++ config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) + config.gpu_options.allow_growth = True + self.sess = tf.Session(graph=tf.Graph(), config=config) + self.initialize_graph() +@@ -273,6 +296,8 @@ def model_fn(features, labels, mode, params): + + optimizer = tf.train.MomentumOptimizer( + learning_rate, params['sgd_momentum']) ++ if(params['dist_train']): ++ optimizer = hvd.DistributedOptimizer(optimizer) + if params['use_tpu']: + optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) + with tf.control_dependencies(update_ops): +@@ -428,7 +453,8 @@ def model_inference_fn(features, training, params): + + def mg_res_layer(inputs): + residual = residual_inner(inputs) +- output = mg_activation(inputs + residual) ++ fixed = tf.math.add_n([inputs, residual]) ++ output = mg_activation(fixed) + return output + + def mg_squeeze_excitation_layer(inputs): +@@ -538,15 +564,26 @@ def get_estimator(): + + + def _get_nontpu_estimator(): +- session_config = tf.ConfigProto() ++ session_config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) + session_config.gpu_options.allow_growth = True ++ model_dir = None ++ if(not FLAGS.dist_train) or (hvd.rank()==0): ++ model_dir = FLAGS.work_dir ++ step_count_steps = 50 ++ summary_steps = FLAGS.summary_steps ++ else: ++ step_count_steps = 1000000 ++ summary_steps = 1000000 + run_config = tf.estimator.RunConfig( +- save_summary_steps=FLAGS.summary_steps, ++ log_step_count_steps = step_count_steps, ++ save_summary_steps=summary_steps, + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + session_config=session_config) + return tf.estimator.Estimator( + model_fn, +- model_dir=FLAGS.work_dir, ++ model_dir=model_dir, + config=run_config, + params=FLAGS.flag_values_dict()) + +@@ -618,14 +655,85 @@ def export_model(model_path): + print("Copying {} to {}".format(filename, destination_path)) + tf.gfile.Copy(filename, destination_path) + ++def generate_min_max_log(log_graph_file, tf_records, log_file): ++ cmd = 'numactl -N 0 -l python3 produce_min_max_log.py' ++ cmd += ' --input_graph={0}'.format(log_graph_file) ++ cmd += ' --data_location={0}'.format(tf_records) ++ cmd += ' --num_steps={0}'.format(FLAGS.quantize_test_steps) ++ cmd += ' --batch_size={0}'.format(FLAGS.quantize_test_batch_size) ++ cmd += ' --random_rotation={0}'.format(FLAGS.random_rotation) ++ cmd += ' 2> {0}'.format(log_file) ++ print(cmd) ++ subprocess.call(cmd, shell=True) ++ ++def quantization(opt_graph, model_path, tf_records, eval_min_max_every_epoch): ++ # first_quantize ++ rewriter = quantize_graph.GraphRewriter(opt_graph, 'eightbit', None, None, True, [], []) ++ first_quantize_graph = rewriter.rewrite(["policy_output", "value_output"]) ++ ++ if eval_min_max_every_epoch: ++ # insert_min_max_log ++ transform = 'insert_logging(op=RequantizationRange, show_name=true, message="__requant_min_max:")' ++ log_graph = TransformGraph(first_quantize_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ with tf.gfile.FastGFile(model_path + '_for_min_max.pb', 'wb') as f: ++ f.write(log_graph.SerializeToString()) ++ ++ # generate_min_max_log ++ with logged_timer('minmax time'): ++ generate_min_max_log(model_path + '_for_min_max.pb', tf_records, model_path + 'log.txt') ++ ++ # apply_calibration ++ transform = 'freeze_requantization_ranges(min_max_log_file="{0}")'.format(model_path + 'log.txt') ++ calibration_graph = TransformGraph(first_quantize_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ ++ # fuse_requantize ++ transform = 'fuse_quantized_conv_and_requantize strip_unused_nodes' ++ output_graph = TransformGraph(calibration_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ return output_graph ++ ++def optimize_graph(input_graph, model_path, quantizing_graph, tf_records, eval_min_max_every_epoch): ++ fp32_graph = graph_pb2.GraphDef() ++ with tf.gfile.Open(input_graph, "rb") as read_f: ++ data = read_f.read() ++ fp32_graph.ParseFromString(data) ++ ++ opt_graph = optimize_for_inference( ++ fp32_graph, ++ ["pos_tensor"], ++ ["policy_output", "value_output"], ++ dtypes.float32.as_datatype_enum, ++ False) ++ ++ if(quantizing_graph): ++ output_graph = quantization(opt_graph, model_path, tf_records, eval_min_max_every_epoch) ++ else: ++ output_graph = opt_graph ++ ++ with tf.gfile.GFile(model_path + '.pb', 'wb') as write_f: ++ write_f.write(output_graph.SerializeToString()) ++ ++def get_input_tensor(graph): ++ return graph.get_tensor_by_name('pos_tensor:0') ++def get_output_tensor(graph): ++ policy_output = graph.get_tensor_by_name('policy_output:0') ++ value_output = graph.get_tensor_by_name('value_output:0') ++ return policy_output, value_output + + def freeze_graph(model_path): + n = DualNetwork(model_path) + out_graph = tf.graph_util.convert_variables_to_constants( + n.sess, n.sess.graph.as_graph_def(), ["policy_output", "value_output"]) ++ output_graph_def = optimize_for_inference( ++ out_graph, ++ ["pos_tensor"], ++ ["policy_output", "value_output"], ++ dtypes.float32.as_datatype_enum, ++ False) + with tf.gfile.GFile(model_path + '.pb', 'wb') as f: +- f.write(out_graph.SerializeToString()) +- ++ f.write(output_graph_def.SerializeToString()) + + def freeze_graph_tpu(model_path): + """Custom freeze_graph implementation for Cloud TPU.""" +diff --git a/ml_perf/eval_models.py b/ml_perf/eval_models.py +index 74702e4..b552f42 100644 +--- a/ml_perf/eval_models.py ++++ b/ml_perf/eval_models.py +@@ -23,7 +23,7 @@ import os + from absl import app + from reference_implementation import evaluate_model, wait + from rl_loop import fsdb +- ++import ml_perf.mlp_log as mll + + def load_train_times(): + models = [] +@@ -43,10 +43,17 @@ def main(unused_argv): + target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') + models = load_train_times() + for i, (timestamp, name, path) in enumerate(models): ++ mll.eval_start(i) + winrate = wait(evaluate_model(path, target, sgf_dir, i + 1)) ++ mll.eval_stop(i) ++ mll.eval_accuracy(i, winrate) + if winrate >= 0.50: + print('Model {} beat target after {}s'.format(name, timestamp)) +- break ++ mll.eval_result(i, timestamp) ++ mll.run_stop('success') ++ return ++ mll.eval_result(i, 0) ++ mll.run_stop('aborted') + + + if __name__ == '__main__': +diff --git a/ml_perf/execute.py b/ml_perf/execute.py +new file mode 100644 +index 0000000..00a6bed +--- /dev/null ++++ b/ml_perf/execute.py +@@ -0,0 +1,69 @@ ++# Copyright 2019 Google LLC ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++"""Run the command in multi-instance mode ++ ++If there is a --seed parameter from input, change seed to generate randomness among instances ++ ++Args: ++ num_instance: the number of instance needed to start ++""" ++ ++import sys ++sys.path.insert(0, '.') # nopep8 ++ ++import asyncio ++from ml_perf.utils import * ++ ++from absl import app, flags ++ ++flags.DEFINE_integer('num_instance', 1, 'Number of instances for selfplay') ++ ++FLAGS = flags.FLAGS ++ ++# Self-play a number of games. ++async def do_execute_mi(): ++ ++ num_instance = FLAGS.num_instance ++ ++ start_copy = False ++ arg_list = [] ++ for arg in sys.argv: ++ if start_copy: ++ arg_list.append(arg) ++ if arg == '--': ++ start_copy = True ++ ++ if num_instance > 1: ++ result_list = checked_run_mi( ++ num_instance, ++ *arg_list ++ ) ++ for result in result_list: ++ # TODO needs to be more generic ++ print ('\n'.join(result.split('\n')[-7:])) ++ else: ++ result = checked_run( ++ *arg_list ++ ) ++ print (result) ++ ++def main(unused_argv): ++ try: ++ wait(do_execute_mi()) ++ finally: ++ asyncio.get_event_loop().close() ++ ++if __name__ == '__main__': ++ app.run(main) +diff --git a/ml_perf/flags/9.mn/architecture.flags b/ml_perf/flags/9.mn/architecture.flags +new file mode 100644 +index 0000000..ec2abf4 +--- /dev/null ++++ b/ml_perf/flags/9.mn/architecture.flags +@@ -0,0 +1,7 @@ ++# architecture.flags: Flags that control the model architecture. ++ ++--conv_width=32 ++--fc_width=64 ++--trunk_layers=9 ++--value_cost_weight=0.25 ++--summary_steps=64 +diff --git a/ml_perf/flags/9.mn/bootstrap.flags b/ml_perf/flags/9.mn/bootstrap.flags +new file mode 100644 +index 0000000..0283a92 +--- /dev/null ++++ b/ml_perf/flags/9.mn/bootstrap.flags +@@ -0,0 +1,9 @@ ++# bootstrap.flags ++# Flags for the first bootstrap round of selfplay. ++ ++--flagfile=ml_perf/flags/9.mn/selfplay.flags ++ ++# Don't perform holdout for the first bootstrap round. ++--holdout_pct=0 ++ ++--num_readouts=20 +diff --git a/ml_perf/flags/9.mn/bootstrap_mi.flags b/ml_perf/flags/9.mn/bootstrap_mi.flags +new file mode 100644 +index 0000000..f4da7c1 +--- /dev/null ++++ b/ml_perf/flags/9.mn/bootstrap_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=8192 ++--parallel_games=4 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/eval.flags b/ml_perf/flags/9.mn/eval.flags +new file mode 100644 +index 0000000..f07d715 +--- /dev/null ++++ b/ml_perf/flags/9.mn/eval.flags +@@ -0,0 +1,6 @@ ++# eval.flags: Flags for playing eval games. ++ ++--flagfile=ml_perf/flags/9.mn/selfplay.flags ++ ++# Play fewer games for eval than selfplay. ++--parallel_games=1 +diff --git a/ml_perf/flags/9.mn/eval_mi.flags b/ml_perf/flags/9.mn/eval_mi.flags +new file mode 100644 +index 0000000..00e960c +--- /dev/null ++++ b/ml_perf/flags/9.mn/eval_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=100 ++--parallel_games=1 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/rl_loop.flags b/ml_perf/flags/9.mn/rl_loop.flags +new file mode 100644 +index 0000000..0f85640 +--- /dev/null ++++ b/ml_perf/flags/9.mn/rl_loop.flags +@@ -0,0 +1,11 @@ ++# rl_loop.flags: Flags for the reinforcement learning loop. ++ ++--flags_dir=ml_perf/flags/9.mn/ ++--checkpoint_dir=ml_perf/checkpoint/9/ ++ ++--iterations=30 ++--gating_win_rate=0.49 ++--window_size=10 ++--engine=tf ++--parallel_post_train=2 ++--train_instance_per_numa=2 +diff --git a/ml_perf/flags/9.mn/selfplay.flags b/ml_perf/flags/9.mn/selfplay.flags +new file mode 100644 +index 0000000..7084ec0 +--- /dev/null ++++ b/ml_perf/flags/9.mn/selfplay.flags +@@ -0,0 +1,14 @@ ++# selfplay.flags: Flags for selfplay. ++ ++# This flagfile also serves as the base for the boostrap & eval stages of ++# the RL loop. ++ ++--num_readouts=240 ++--value_init_penalty=0.2 ++--holdout_pct=0.03 ++--disable_resign_pct=0.1 ++--resign_threshold=-0.99 ++ ++# Device-specific selfplay flags. ++--parallel_games=2 ++--virtual_losses=8 +diff --git a/ml_perf/flags/9.mn/selfplay_mi.flags b/ml_perf/flags/9.mn/selfplay_mi.flags +new file mode 100644 +index 0000000..43df7a2 +--- /dev/null ++++ b/ml_perf/flags/9.mn/selfplay_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=4096 ++--parallel_games=2 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/train.flags b/ml_perf/flags/9.mn/train.flags +new file mode 100644 +index 0000000..19f8b9d +--- /dev/null ++++ b/ml_perf/flags/9.mn/train.flags +@@ -0,0 +1,15 @@ ++# train.flags: Flags for training. ++ ++--flagfile=ml_perf/flags/9.mn/architecture.flags ++ ++--shuffle_buffer_size=10000 ++--filter_amount=0.5 ++ ++# Device specific hyperparameters re: batch size and LR schedules. ++--train_batch_size=8192 ++--lr_rates=0.32 ++--lr_rates=0.032 ++--lr_rates=0.0032 ++--lr_boundaries=12500 ++--lr_boundaries=18750 ++--l2_strength=0.0001 +diff --git a/ml_perf/flags/9.mn/validate.flags b/ml_perf/flags/9.mn/validate.flags +new file mode 100644 +index 0000000..de4f22d +--- /dev/null ++++ b/ml_perf/flags/9.mn/validate.flags +@@ -0,0 +1,7 @@ ++# validate.flags Flags for validation. ++ ++--flagfile=ml_perf/flags/9.mn/architecture.flags ++ ++--examples_to_validate=256 ++--train_batch_size=64 ++--summary_steps=2 +diff --git a/ml_perf/flags/9/bootstrap.flags b/ml_perf/flags/9/bootstrap.flags +index 4e7341e..29c66d6 100644 +--- a/ml_perf/flags/9/bootstrap.flags ++++ b/ml_perf/flags/9/bootstrap.flags +@@ -6,5 +6,4 @@ + # Don't perform holdout for the first bootstrap round. + --holdout_pct=0 + +---num_games=8192 + --num_readouts=20 +diff --git a/ml_perf/flags/9/bootstrap_mi.flags b/ml_perf/flags/9/bootstrap_mi.flags +new file mode 100644 +index 0000000..f4da7c1 +--- /dev/null ++++ b/ml_perf/flags/9/bootstrap_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=8192 ++--parallel_games=4 ++--multi_instance=True +diff --git a/ml_perf/flags/9/eval.flags b/ml_perf/flags/9/eval.flags +index aecf855..9f8759e 100644 +--- a/ml_perf/flags/9/eval.flags ++++ b/ml_perf/flags/9/eval.flags +@@ -3,5 +3,4 @@ + --flagfile=ml_perf/flags/9/selfplay.flags + + # Play fewer games for eval than selfplay. +---num_games=100 +---parallel_games=100 ++--parallel_games=1 +diff --git a/ml_perf/flags/9/eval_mi.flags b/ml_perf/flags/9/eval_mi.flags +new file mode 100644 +index 0000000..00e960c +--- /dev/null ++++ b/ml_perf/flags/9/eval_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=100 ++--parallel_games=1 ++--multi_instance=True +diff --git a/ml_perf/flags/9/rl_loop.flags b/ml_perf/flags/9/rl_loop.flags +index c6b6dc2..4b8dc29 100644 +--- a/ml_perf/flags/9/rl_loop.flags ++++ b/ml_perf/flags/9/rl_loop.flags +@@ -3,8 +3,8 @@ + --flags_dir=ml_perf/flags/9/ + --checkpoint_dir=ml_perf/checkpoint/9/ + +---iterations=50 ++--iterations=30 + --gating_win_rate=0.49 + --window_size=10 + --engine=tf +---parallel_post_train=true ++--train_instance_per_numa=2 +diff --git a/ml_perf/flags/9/selfplay.flags b/ml_perf/flags/9/selfplay.flags +index 3d8d64c..26e58ef 100644 +--- a/ml_perf/flags/9/selfplay.flags ++++ b/ml_perf/flags/9/selfplay.flags +@@ -3,7 +3,6 @@ + # This flagfile also serves as the base for the boostrap & eval stages of + # the RL loop. + +---num_games=4096 + --num_readouts=240 + --value_init_penalty=0.2 + --holdout_pct=0.03 +@@ -11,5 +10,5 @@ + --resign_threshold=-0.99 + + # Device-specific selfplay flags. +---parallel_games=2048 ++--parallel_games=8 + --virtual_losses=8 +diff --git a/ml_perf/flags/9/selfplay_mi.flags b/ml_perf/flags/9/selfplay_mi.flags +new file mode 100644 +index 0000000..b1db0e8 +--- /dev/null ++++ b/ml_perf/flags/9/selfplay_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=4096 ++--parallel_games=8 ++--multi_instance=True +diff --git a/ml_perf/flags/9/train.flags b/ml_perf/flags/9/train.flags +index aa1a3cf..a65044d 100644 +--- a/ml_perf/flags/9/train.flags ++++ b/ml_perf/flags/9/train.flags +@@ -6,10 +6,10 @@ + --filter_amount=0.5 + + # Device specific hyperparameters re: batch size and LR schedules. +---train_batch_size=4096 +---lr_rates=0.16 +---lr_rates=0.016 +---lr_rates=0.0016 +---lr_boundaries=25000 +---lr_boundaries=37500 ++--train_batch_size=8192 ++--lr_rates=0.32 ++--lr_rates=0.032 ++--lr_rates=0.0032 ++--lr_boundaries=12500 ++--lr_boundaries=18750 + --l2_strength=0.0001 +diff --git a/ml_perf/hostlist.sh b/ml_perf/hostlist.sh +new file mode 100755 +index 0000000..94465f2 +--- /dev/null ++++ b/ml_perf/hostlist.sh +@@ -0,0 +1,3 @@ ++# generate a list of host ip or hostname ++# one ip/hostname per line ++cat $HOSTLIST.txt +diff --git a/ml_perf/mlp_log.py b/ml_perf/mlp_log.py +new file mode 100644 +index 0000000..501baf1 +--- /dev/null ++++ b/ml_perf/mlp_log.py +@@ -0,0 +1,118 @@ ++# Copyright 2019 MLBenchmark Group. All Rights Reserved. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ============================================================================== ++ ++"""Utilities for compliance logging.""" ++ ++import logging ++import time ++import inspect ++import sys ++ ++def init_start(): ++ log('init_start', caller_depth=3) ++ ++def init_stop(): ++ log('init_stop', caller_depth=3) ++ ++def run_start(): ++ log('run_start', caller_depth=3) ++ ++def run_stop(status): ++ assert status == 'success' or status == 'aborted' ++ log('run_stop', ++ meta_data = {'status':status}, ++ caller_depth=3) ++ ++def block_start(epoch, count): ++ log('block_start', ++ meta_data = {'first_epoch_num':epoch, ++ 'epoch_count':count}, ++ caller_depth=3) ++ ++def block_stop(epoch): ++ log('block_stop', ++ meta_data = {'first_epoch_num':epoch}, ++ caller_depth=3) ++ ++def epoch_start(epoch): ++ log('epoch_start', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def epoch_stop(epoch): ++ log('epoch_stop', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_start(epoch): ++ log('eval_start', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_stop(epoch): ++ log('eval_stop', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_accuracy(epoch, accuracy): ++ log('eval_accuracy', ++ val = '{}'.format(accuracy), ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def global_batch_size(batch_size): ++ log('global_batch_size', ++ val = '{}'.format(batch_size), ++ caller_depth=3) ++ ++def lr_rates(rates): ++ log('opt_base_learning_rate', ++ val = '{}'.format(rates), ++ caller_depth=3) ++ ++def lr_boundaries(boundaries): ++ log('opt_learning_rate_decay_boundary_steps', ++ val = '{}'.format(boundaries), ++ caller_depth=3) ++ ++def save_model(iteration): ++ log('save_model', ++ meta_data = {'iteration':iteration}, ++ caller_depth=3) ++ ++def eval_result(iteration, timestamp): ++ log('eval_result', ++ meta_data = {'iteration':iteration, 'timestamp':timestamp}, ++ caller_depth=3) ++ ++def log(key, val='null', meta_data = None, caller_depth=2): ++ filename, lineno = get_caller(caller_depth) ++ meta_dict = {'lineno': lineno, 'file': filename} ++ if meta_data != None: ++ meta_dict.update(meta_data) ++ meta_string = '{}'.format(meta_dict) ++ print(':::MLL %f %s: {"value": %s, "metadata": %s}'%(time.time(), key, val, meta_string), file=sys.stderr) ++ ++def get_caller(stack_index=2, root_dir=None): ++ ''' Returns file.py:lineno of your caller. A stack_index of 2 will provide ++ the caller of the function calling this function. Notice that stack_index ++ of 2 or more will fail if called from global scope. ''' ++ caller = inspect.getframeinfo(inspect.stack()[stack_index][0]) ++ ++ # Trim the filenames for readability. ++ filename = caller.filename ++ if root_dir is not None: ++ filename = re.sub("^" + root_dir + "/", "", filename) ++ return (filename, caller.lineno) +diff --git a/ml_perf/reference_implementation.py b/ml_perf/reference_implementation.py +index 1ca724e..e04d873 100644 +--- a/ml_perf/reference_implementation.py ++++ b/ml_perf/reference_implementation.py +@@ -26,17 +26,23 @@ import random + import re + import shutil + import subprocess ++import functools + import tensorflow as tf + import time ++import copy ++import multiprocessing as mp + from ml_perf.utils import * ++import ml_perf.mlp_log as mll + + from absl import app, flags + from rl_loop import example_buffer, fsdb +-from tensorflow import gfile ++import dual_net ++ ++from tensorflow.python.platform import gfile + + N = int(os.environ.get('BOARD_SIZE', 19)) + +-flags.DEFINE_string('checkpoint_dir', 'ml_perf/checkpoint/{}'.format(N), ++flags.DEFINE_string('checkpoint_dir', None, + 'The checkpoint directory specify a start model and a set ' + 'of golden chunks used to start training. If not ' + 'specified, will start from scratch.') +@@ -58,15 +64,33 @@ flags.DEFINE_string('flags_dir', None, + + flags.DEFINE_integer('window_size', 10, + 'Maximum number of recent selfplay rounds to train on.') ++flags.DEFINE_integer('golden_chunk_split', 16, ++ 'Golden chunk of each selfplay is splited to accelerate write golden chunk') + +-flags.DEFINE_boolean('parallel_post_train', False, +- 'If true, run the post-training stages (eval, validation ' +- '& selfplay) in parallel.') ++flags.DEFINE_integer('parallel_post_train', 0, ++ '0: run the post-training stages in serial mode' ++ '1: run the post-training stages (eval, validation ' ++ '& selfplay) in parallel.' ++ '2: run the post-train stage in pipeline mode.') + + flags.DEFINE_string('engine', 'tf', 'The engine to use for selfplay.') + +-FLAGS = flags.FLAGS ++flags.DEFINE_integer('physical_cores', None, 'The number of cores for each node.') ++flags.DEFINE_integer('virtual_cores', None, 'The number of SMT for each node.') ++flags.DEFINE_integer('numa_cores', None, 'The number of core for each numa node.') ++flags.DEFINE_integer('train_instance_per_numa', 2, 'The number of instance for each numa node.') ++ ++flags.DEFINE_multi_string('train_node', [], 'The node:core list for training') ++flags.DEFINE_multi_string('eval_node', [], 'The node list for evaluation') ++flags.DEFINE_multi_string('selfplay_node', [], 'The node list for selfplay.') + ++flags.DEFINE_bool('quantization', True, 'Using Int8 if true.') ++flags.DEFINE_bool('eval_min_max_every_epoch', True, 'Genereting min max log every epoch if true.') ++flags.DEFINE_boolean('random_rotation', True, 'Do random rotation when running for min&max log.') ++flags.DEFINE_integer('quantize_test_steps', 5, 'The steps to run for min&max log.') ++flags.DEFINE_integer('quantize_test_batch_size', 16, 'The batch size for running inference for min&max log.') ++ ++FLAGS = flags.FLAGS + + class State: + """State data used in each iteration of the RL loop. +@@ -133,17 +157,15 @@ class WinStats: + pattern = '\s*(\S+)' + '\s+(\d+)' * 8 + match = re.search(pattern, line) + if match is None: +- raise ValueError('Can\t parse line "{}"'.format(line)) ++ raise ValueError('Can\'t parse line "{}"'.format(line)) + self.model_name = match.group(1) + raw_stats = [float(x) for x in match.groups()[1:]] + self.black_wins = ColorWinStats(*raw_stats[:4]) + self.white_wins = ColorWinStats(*raw_stats[4:]) + self.total_wins = self.black_wins.total + self.white_wins.total + +- + def initialize_from_checkpoint(state): + """Initialize the reinforcement learning loop from a checkpoint.""" +- + # The checkpoint's work_dir should contain the most recently trained model. + model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, + 'work_dir/model.ckpt-*.pb')) +@@ -152,18 +174,19 @@ def initialize_from_checkpoint(state): + 'got [{}]'.format(', '.join(model_paths))) + start_model_path = model_paths[0] + +- # Copy the latest trained model into the models directory and use it on the +- # first round of selfplay. +- state.best_model_name = 'checkpoint' +- shutil.copy(start_model_path, +- os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) +- + # Copy the training chunks. + golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') + for basename in os.listdir(golden_chunks_dir): + path = os.path.join(golden_chunks_dir, basename) + shutil.copy(path, fsdb.golden_chunk_dir()) + ++ # Copy the latest trained model into the models directory and use it on the ++ # first round of selfplay. ++ state.best_model_name = 'checkpoint' ++ best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) ++ ++ dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) ++ + # Copy the training files. + work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') + for basename in os.listdir(work_dir): +@@ -171,22 +194,72 @@ def initialize_from_checkpoint(state): + shutil.copy(path, fsdb.working_dir()) + + ++ + def parse_win_stats_table(stats_str, num_lines): + result = [] + lines = stats_str.split('\n') +- while True: +- # Find the start of the win stats table. +- assert len(lines) > 1 +- if 'Black' in lines[0] and 'White' in lines[0] and 'm.lmt.' in lines[1]: +- break +- lines = lines[1:] +- +- # Parse the expected number of lines from the table. +- for line in lines[2:2 + num_lines]: +- result.append(WinStats(line)) + +- return result ++ while True: ++ while True: ++ # Find the start of the win stats table. ++ if len(lines) == 0: ++ return result ++ if 'Black' in lines[0] and 'White' in lines[0] and 'm.lmt.' in lines[1]: ++ break ++ lines = lines[1:] ++ ++ # Parse the expected number of lines from the table. ++ for line in lines[2:2 + num_lines]: ++ stat = WinStats(line) ++ for s in result: ++ if s.model_name == stat.model_name: ++ s.black_wins.total += stat.black_wins.total ++ s.white_wins.total += stat.white_wins.total ++ s.total_wins += stat.total_wins ++ stat = None ++ break ++ if stat != None: ++ result.append(stat) ++ lines = lines[2 + num_lines:] ++ ++def extract_multi_instance(cmd): ++ cmd_list = flags.FlagValues().read_flags_from_files(cmd) ++ new_cmd_list = [] ++ multi_instance = False ++ num_instance = 0 ++ num_games = 0 ++ parallel_games = 0 ++ ++ for arg in cmd_list: ++ argsplit = arg.split('=', 1) ++ flag = argsplit[0] ++ if flag == '--multi_instance': ++ if argsplit[1] == 'True': ++ multi_instance = True ++ else: ++ multi_instance = False ++ elif flag == '--num_games': ++ num_games = int(argsplit[1]) ++ elif flag == '--parallel_games': ++ parallel_games = int(argsplit[1]) ++ ++ if multi_instance: ++ if num_games % parallel_games != 0: ++ logging.error('Error num_games must be multiply of %d', parallel_games) ++ raise RuntimeError('incompatible num_games/parallel_games combination') ++ num_instance = num_games//parallel_games ++ ++ for arg in cmd_list: ++ argsplit = arg.split('=', 1) ++ flag = argsplit[0] ++ if flag == '--multi_instance': ++ pass ++ elif multi_instance and flag == '--num_games': ++ pass ++ else: ++ new_cmd_list.append(arg) + ++ return multi_instance, num_instance, new_cmd_list + + async def run(*cmd): + """Run the given subprocess command in a coroutine. +@@ -214,8 +287,35 @@ async def run(*cmd): + # Split stdout into lines. + return stdout.split('\n') + ++async def run_distributed(genvs, num_instance, hosts, proclists, numa_nodes, ++ seed, *cmd): ++ """Run the given subprocess command in a coroutine. ++ ++ Args: ++ *cmd: the command to run and its arguments. ++ ++ Returns: ++ The output that the command wrote to stdout as a list of strings, one line ++ per element (stderr output is piped to stdout). + +-def get_golden_chunk_records(): ++ Raises: ++ RuntimeError: if the command returns a non-zero result. ++ """ ++ ++ stdout = await checked_run_distributed(genvs, num_instance, hosts, proclists, ++ numa_nodes, seed, fsdb.mpi_log_dir(), *cmd) ++ ++ log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log') ++ with gfile.Open(log_path, 'a') as f: ++ f.write(expand_cmd_str(cmd)) ++ f.write('\n') ++ f.write(stdout) ++ f.write('\n') ++ ++ # Split stdout into lines. ++ return stdout.split('\n') ++ ++def get_golden_chunk_records(window_size): + """Return up to num_records of golden chunks to train on. + + Returns: +@@ -223,9 +323,17 @@ def get_golden_chunk_records(): + """ + + pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') +- return sorted(tf.gfile.Glob(pattern), reverse=True)[:FLAGS.window_size] ++ if window_size > FLAGS.golden_chunk_split * FLAGS.window_size: ++ window_size = FLAGS.golden_chunk_split * FLAGS.window_size ++ return sorted(tf.gfile.Glob(pattern), reverse=True)[:window_size] + + ++def gen_golden_chunk(files, state): ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ buffer.parallel_fill(files[1], threads=1) ++ buffer.flush(os.path.join(fsdb.golden_chunk_dir(), ++ state.output_model_name + '-{}.tfrecord.zz'.format(files[0]))) ++ + # Self-play a number of games. + async def selfplay(state, flagfile='selfplay'): + """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. +@@ -239,39 +347,80 @@ async def selfplay(state, flagfile='selfplay'): + output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) + holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) + +- lines = await run( +- 'bazel-bin/cc/selfplay', +- '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), +- '--model={}'.format(state.best_model_path), +- '--output_dir={}'.format(output_dir), +- '--holdout_dir={}'.format(holdout_dir), +- '--seed={}'.format(state.seed)) +- result = '\n'.join(lines[-6:]) +- logging.info(result) +- stats = parse_win_stats_table(result, 1)[0] +- num_games = stats.total_wins +- logging.info('Black won %0.3f, white won %0.3f', +- stats.black_wins.total / num_games, +- stats.white_wins.total / num_games) +- +- # Write examples to a single record. +- pattern = os.path.join(output_dir, '*', '*.zz') +- random.seed(state.seed) +- tf.set_random_seed(state.seed) +- np.random.seed(state.seed) +- # TODO(tommadams): This method of generating one golden chunk per generation +- # is sub-optimal because each chunk gets reused multiple times for training, +- # introducing bias. Instead, a fresh dataset should be uniformly sampled out +- # of *all* games in the training window before the start of each training run. +- buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) +- +- # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not +- # so. +- logging.info('Writing golden chunk from "{}"'.format(pattern)) +- buffer.parallel_fill(tf.gfile.Glob(pattern)) +- buffer.flush(os.path.join(fsdb.golden_chunk_dir(), +- state.output_model_name + '.tfrecord.zz')) +- ++ multi_instance, num_instance, flag_list = extract_multi_instance( ++ ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) ++ sp_cmd = ['bazel-bin/cc/selfplay', ++ '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), ++ '--model={}'.format(state.best_model_path), ++ '--output_dir={}'.format(output_dir), ++ '--holdout_dir={}'.format(holdout_dir)] ++ if not multi_instance: ++ lines = await run( ++ *sp_cmd, ++ '--seed={}'.format(state.seed)) ++ else: ++ if FLAGS.selfplay_node == []: ++ # run selfplay locally ++ lines = await run( ++ 'python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(num_instance), ++ '--', ++ *sp_cmd, ++ '--seed={}'.format(state.seed)) ++ else: ++ with logged_timer('selfplay mn'): ++ # run one selfplay instance per host ++ lines = await run_distributed( ++ ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], ++ num_instance, FLAGS.selfplay_node, None, None, state.seed, ++ *sp_cmd) ++ ++ result = '\n'.join(lines) ++ with logged_timer('parse win stats'): ++ stats = parse_win_stats_table(result, 1)[0] ++ num_games = stats.total_wins ++ black_total = stats.black_wins.total ++ white_total = stats.white_wins.total ++ ++ logging.info('Black won %0.3f, white won %0.3f', ++ black_total / num_games, ++ white_total / num_games) ++ bias = abs(white_total - black_total)/num_games ++ logging.info('Black total %d, white total %d, total games %d, bias %0.3f.', ++ black_total, white_total, num_games, bias) ++ ++ with logged_timer('generate golden chunk'): ++ # Write examples to a single record. ++ pattern = os.path.join(output_dir, '*', '*.zz') ++ files = tf.gfile.Glob(pattern) ++ ++ random.seed(state.seed) ++ tf.set_random_seed(state.seed) ++ np.random.seed(state.seed) ++ ++ # TODO(tommadams): This method of generating one golden chunk per generation ++ # is sub-optimal because each chunk gets reused multiple times for training, ++ # introducing bias. Instead, a fresh dataset should be uniformly sampled out ++ # of *all* games in the training window before the start of each training run. ++ ++ # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not ++ # so. ++ logging.info('Writing golden chunk from "{}"'.format(pattern)) ++ threads = FLAGS.golden_chunk_split ++ file_list = [] ++ files_number = len(files) ++ chunk_size = files_number // threads ++ ++ # split files into N seperate parts ++ for i in range(threads): ++ if i == threads - 1: ++ file_list += [[i, files[chunk_size * i :]]] ++ else: ++ file_list += [[i, files[chunk_size * i : chunk_size * (i + 1)]]] ++ pool = mp.Pool(threads) ++ pool.map(functools.partial(gen_golden_chunk, state=state), file_list) ++ ++ return bias + + async def train(state, tf_records): + """Run training and write a new model to the fsdb models_dir. +@@ -280,15 +429,66 @@ async def train(state, tf_records): + state: the RL loop State instance. + tf_records: a list of paths to TensorFlow records to train on. + """ ++ train_node = FLAGS.train_node ++ num_node = len(train_node) ++ if num_node == 0: ++ dist_train = False ++ else: ++ dist_train = True ++ ++ if dist_train: ++ intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1 ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ instance_per_node = numa_per_node * FLAGS.train_instance_per_numa ++ ++ mpi_async_progress = '' ++ for i in range(numa_per_node): ++ for j in range(FLAGS.train_instance_per_numa): ++ if (not i==0) or (not j==0): ++ mpi_async_progress += ',' ++ mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j) ++ else: ++ intra_threads = FLAGS.physical_cores + + model_path = os.path.join(fsdb.models_dir(), state.train_model_name) +- await run( +- 'python3', 'train.py', *tf_records, ++ cmd = ['python3', 'train.py', *tf_records, + '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), + '--work_dir={}'.format(fsdb.working_dir()), + '--export_path={}'.format(model_path), + '--training_seed={}'.format(state.seed), +- '--freeze=true') ++ '--freeze=True', ++ '--num_inter_threads=1', ++ '--num_intra_threads={}'.format(intra_threads)] ++ ++ if(dist_train): ++ genvs = ['HOROVOD_FUSION_THRESHOLD=134217728', ++ 'KMP_BLOCKTIME=0', ++ 'KMP_HW_SUBSET=1T', ++ 'OMP_BIND_PROC=true', ++ 'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress, ++ 'OMP_NUM_THREADS={}'.format(intra_threads)] ++ hosts = [] ++ proclists = [] ++ numa_nodes = [] ++ for node in range(num_node): ++ # add all instance to the list ++ for numa in range(numa_per_node): ++ for instance in range(FLAGS.train_instance_per_numa): ++ hosts += [train_node[node]] ++ proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads ++ proclists += ['{}'.format(proclist)] ++ numa_nodes += ['{}'.format(numa)] ++ ++ lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes, None, *cmd, '--dist_train=True') ++ else: ++ lines = await run(*cmd) ++ print('\n'.join(lines), file=sys.stderr) ++ ++def post_train(state): ++ model_path = os.path.join(fsdb.models_dir(), state.train_model_name) ++ dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) ++ mll.save_model(state.iter_num-1) ++ + # Append the time elapsed from when the RL was started to when this model + # was trained. + elapsed = time.time() - state.start_time +@@ -315,7 +515,7 @@ async def validate(state, holdout_glob): + '--work_dir={}'.format(fsdb.working_dir())) + + +-async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed): ++async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed, flagfile='eval'): + """Evaluate one model against a target. + + Args: +@@ -328,20 +528,49 @@ async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed): + The win-rate of eval_model against target_model in the range [0, 1]. + """ + +- lines = await run( +- 'bazel-bin/cc/eval', +- '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'eval.flags')), +- '--model={}'.format(eval_model_path), +- '--model_two={}'.format(target_model_path), +- '--sgf_dir={}'.format(sgf_dir), +- '--seed={}'.format(seed)) +- result = '\n'.join(lines[-7:]) ++ multi_instance, num_instance, flag_list = extract_multi_instance( ++ ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) ++ eval_cmd = ['bazel-bin/cc/eval', ++ '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), ++ '--model={}'.format(eval_model_path), ++ '--model_two={}'.format(target_model_path), ++ '--sgf_dir={}'.format(sgf_dir)] ++ if not multi_instance: ++ lines = await run(*eval_cmd, '--seed={}'.format(seed)) ++ else: ++ if FLAGS.eval_node == []: ++ # run eval locally ++ lines = await run( ++ 'python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(num_instance), ++ '--', ++ *eval_cmd, ++ '--seed={}'.format(seed)) ++ else: ++ # run one selfplay instance per host ++ lines = await run_distributed( ++ ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], ++ num_instance, FLAGS.eval_node, None, None, seed, ++ *eval_cmd) ++ result = '\n'.join(lines) + logging.info(result) + eval_stats, target_stats = parse_win_stats_table(result, 2) + num_games = eval_stats.total_wins + target_stats.total_wins + win_rate = eval_stats.total_wins / num_games ++ eval_total = eval_stats.total_wins ++ black_total = eval_stats.black_wins.total ++ white_total = eval_stats.white_wins.total ++ ++ if eval_total != 0: ++ bias = abs(white_total - black_total) / eval_total ++ else: ++ # by definition bias = 0.0 if eval model win zero games ++ bias = 0.0 + logging.info('Win rate %s vs %s: %.3f', eval_stats.model_name, + target_stats.model_name, win_rate) ++ logging.info('Black total %d, white total %d, eval total %d, bias %0.3f.', ++ black_total, white_total, eval_total, bias) ++ + return win_rate + + +@@ -357,24 +586,45 @@ async def evaluate_trained_model(state): + os.path.join(fsdb.eval_dir(), state.train_model_name), state.seed) + + ++async def evaluate_target_model(state): ++ sgf_dir = os.path.join(fsdb.eval_dir(), 'target') ++ target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') ++ return await evaluate_model( ++ state.train_model_path, target, sgf_dir, state.iter_num) ++ + def rl_loop(): + """The main reinforcement learning (RL) loop.""" + ++ # The 'window_size' reflect the split of golden chunk after selfplay ++ # basically each selfplay generate N golden chunks instead of one to ++ # accelerate write golden chunks (N determined by FLAGS.golden_chunk_slit). ++ # Yet this make effective_window_size dynamic. It should increase by N-1 ++ # to keep the effective window size not change. Then increase by N if no big ++ # chunk left. Until it reach FLAGS.window_size * FLAGS.golden_chunk_split ++ ++ window_size = 0 ++ big_chunk_remaining = 0 ++ + state = State() + +- if FLAGS.checkpoint_dir: ++ if FLAGS.checkpoint_dir != None: + # Start from a partially trained model. + initialize_from_checkpoint(state) ++ window_size = len(get_golden_chunk_records(FLAGS.window_size)) ++ big_chunk_remaining = window_size + else: + # Play the first round of selfplay games with a fake model that returns + # random noise. We do this instead of playing multiple games using a single + # model bootstrapped with random noise to avoid any initial bias. ++ mll.epoch_start(state.iter_num) + wait(selfplay(state, 'bootstrap')) ++ window_size += FLAGS.golden_chunk_split + + # Train a real model from the random selfplay games. +- tf_records = get_golden_chunk_records() ++ tf_records = get_golden_chunk_records(window_size) + state.iter_num += 1 + wait(train(state, tf_records)) ++ post_train(state) + + # Select the newly trained model as the best. + state.best_model_name = state.train_model_name +@@ -382,45 +632,124 @@ def rl_loop(): + + # Run selfplay using the new model. + wait(selfplay(state)) ++ window_size += FLAGS.golden_chunk_split ++ mll.epoch_stop(state.iter_num - 1) + ++ first_iter = True ++ state_copy = None ++ model_win_rate = -1.0 + # Now start the full training loop. + while state.iter_num <= FLAGS.iterations: +- # Build holdout glob before incrementing the iteration number because we +- # want to run validation on the previous generation. +- holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, +- '*') +- +- # Train on shuffled game data from recent selfplay rounds. +- tf_records = get_golden_chunk_records() +- state.iter_num += 1 +- wait(train(state, tf_records)) +- +- if FLAGS.parallel_post_train: +- # Run eval, validation & selfplay in parallel. +- model_win_rate, _, _ = wait([ +- evaluate_trained_model(state), +- validate(state, holdout_glob), +- selfplay(state)]) ++ with logged_timer('iteration time {}'.format(state.iter_num)): ++ mll.epoch_start(state.iter_num) ++ # Build holdout glob before incrementing the iteration number because we ++ # want to run validation on the previous generation. ++ holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, ++ '*') ++ ++ # Train on shuffled game data from recent selfplay rounds. ++ tf_records = get_golden_chunk_records(window_size) ++ ++ if FLAGS.parallel_post_train == 0: ++ state.iter_num += 1 ++ wait(train(state, tf_records)) ++ post_train(state) ++ # Run eval, validation & selfplay sequentially. ++ wait(selfplay(state)) ++ model_win_rate = wait(evaluate_trained_model(state)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model and increment the generation ++ # number. ++ state.best_model_name = state.train_model_name ++ state.gen_num += 1 ++ mll.epoch_stop(state.iter_num - 1) ++ # ^ compensate iter_num += 1 above ++ ++ if FLAGS.parallel_post_train == 1: ++ state.iter_num += 1 ++ wait([train(state, tf_records), ++ selfplay(state)]) ++ post_train(state) ++ # Run eval, validation & selfplay in parallel. ++ model_win_rate = wait(evaluate_trained_model(state)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model and increment the generation ++ # number. ++ state.best_model_name = state.train_model_name ++ state.gen_num += 1 ++ mll.epoch_stop(state.iter_num - 1) ++ # ^ compensate iter_num += 1 above ++ ++ if FLAGS.parallel_post_train == 2: ++ state_copy = copy.copy(state) ++ state.iter_num += 1 ++ # run training and evaluation/validation/selfplay in parallel ++ # this is software pipeline-ish parallelism ++ # start train[iter] ++ # | start valiation[iter-1] ++ # | wait for validation ++ # | if not first time start evaluation[iter-1] ++ # | if not first time wait for evaluation ++ # | if not first time check for promotion ++ # | start selfplay[iter] ++ # | wait selfplay ++ # wait train ++ train_handle = asyncio.gather(train(state, tf_records), return_exceptions=True) ++ if not first_iter: ++ post_train(state_copy) ++ model_win_rate = wait(evaluate_trained_model(state_copy)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model ++ state.best_model_name = state_copy.train_model_name ++ mll.epoch_stop(state.iter_num - 1 - 1) ++ # ^---^-- compensate iter_num += 1 above ++ # +-- it is actually last iteration ++ else: ++ first_iter = False ++ wait(selfplay(state)) ++ asyncio.get_event_loop().run_until_complete(train_handle) ++ if not first_iter: ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Increment the generation number. ++ train_model_name_before = state.train_model_name ++ state.gen_num += 1 ++ ++ # Output dependency: ++ # In parallel post train mode 1, there is output dependence between ++ # evaluation of iteration i (gen_num++) and train of iteration i+1 ++ # (use gen_num for export model path). In parallel post train mode ++ # 2 (this mode), the evluation of iteration i is postponed to ++ # iteration i+1 after the training started, thus train of iteration ++ # i+1 won't generate correct model name when promotion needs to ++ # happen. This part fix up the model name when evaluation decides ++ # there's a promotion ++ train_model_name_after = state.train_model_name ++ model_paths = glob.glob(os.path.join(fsdb.models_dir(), '{}.*'.format(train_model_name_before))) ++ for model in model_paths: ++ logging.info('moving {} --> {}'.format(model, ++ train_model_name_after.join(model.rsplit(train_model_name_before, 1)))) ++ shutil.copy(model, train_model_name_after.join(model.rsplit(train_model_name_before, 1))) ++ ++ if big_chunk_remaining > 0: ++ window_size += FLAGS.golden_chunk_split - 1 ++ big_chunk_remaining -= 1 + else: +- # Run eval, validation & selfplay sequentially. +- model_win_rate = wait(evaluate_trained_model(state)) +- wait(validate(state, holdout_glob)) +- wait(selfplay(state)) +- +- if model_win_rate >= FLAGS.gating_win_rate: +- # Promote the trained model to the best model and increment the generation +- # number. +- state.best_model_name = state.train_model_name +- state.gen_num += 1 ++ window_size += FLAGS.golden_chunk_split + ++ # after the main loop, if parallel_post_train = 2 ++ # needs to print epoch_stop for last epoch ++ if FLAGS.parallel_post_train == 2: ++ mll.epoch_stop(state.iter_num - 1) + + def main(unused_argv): + """Run the reinforcement learning loop.""" + ++ mll.init_start() + print('Wiping dir %s' % FLAGS.base_dir, flush=True) + shutil.rmtree(FLAGS.base_dir, ignore_errors=True) + dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), +- fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] ++ fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(), ++ fsdb.mpi_log_dir()] + for d in dirs: + ensure_dir_exists(d); + +@@ -440,8 +769,14 @@ def main(unused_argv): + for handler in logging.getLogger().handlers: + handler.setFormatter(formatter) + ++ logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node)) ++ logging.info('Train nodes = {}'.format(FLAGS.train_node)) ++ logging.info('Eval nodes = {}'.format(FLAGS.eval_node)) ++ + with logged_timer('Total time'): + try: ++ mll.init_stop() ++ mll.run_start() + rl_loop() + finally: + asyncio.get_event_loop().close() +diff --git a/ml_perf/utils.py b/ml_perf/utils.py +index 8e6b7c6..bf30f75 100644 +--- a/ml_perf/utils.py ++++ b/ml_perf/utils.py +@@ -20,18 +20,45 @@ sys.path.insert(0, '.') # nopep8 + import asyncio + import logging + import os ++import multiprocessing ++import subprocess ++import fcntl + + from absl import flags + from utils import * + + + def expand_cmd_str(cmd): +- return ' '.join(flags.FlagValues().read_flags_from_files(cmd)) ++ result = ' '.join(flags.FlagValues().read_flags_from_files(cmd)) ++ if cmd[0] == 'mpiexec' or cmd[0] == 'mpirun': ++ result = ' \\\n-host '.join(result.split(' -host ')) ++ # avoid buffer too big to block I/O ++ return result[:8192] + + + def get_cmd_name(cmd): + if cmd[0] == 'python' or cmd[0] == 'python3': + path = cmd[1] ++ for index in range(len(cmd)): ++ if cmd[index] == 'bazel-bin/cc/selfplay': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/eval': ++ path = cmd[index] ++ break ++ elif cmd[0] == 'mpirun' or cmd[0] == 'mpiexec': ++ for index in range(len(cmd)): ++ if cmd[index] == 'train.py': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/selfplay': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/eval': ++ path = cmd[index] ++ break ++ if cmd[index] == 'python' or cmd[index] == 'python3': ++ path = cmd[index+1] + else: + path = cmd[0] + return os.path.splitext(os.path.basename(path))[0] +@@ -73,6 +100,127 @@ async def checked_run(*cmd): + + return stdout + ++async def checked_run_distributed(genvs, num_instance, hosts, proclists, numa_nodes, seed, log_path, *cmd): ++ mpi_cmd = ['mpiexec', ++ '-outfile-pattern', ++ '{}/out-{}-{}-%r.txt'.format(log_path, get_cmd_name(cmd), seed)] ++ for genv in genvs: ++ mpi_cmd = mpi_cmd + ['-genv', genv] ++ num_nodes = len(hosts) ++ instance_per_node = num_instance // num_nodes ++ instance_remaining = num_instance - num_nodes * instance_per_node ++ for index in range(num_nodes): ++ if index < instance_remaining: ++ instance_to_launch = instance_per_node + 1 ++ else: ++ instance_to_launch = instance_per_node ++ ++ if index > 0: ++ mpi_cmd = mpi_cmd + [':'] ++ mpi_cmd = mpi_cmd + ['-host', hosts[index]] ++ ++ if proclists != None: ++ mpi_cmd = mpi_cmd + ['-env', 'KMP_AFFINITY=granularity=fine,compact,1,{}'.format(proclists[index])] ++ ++ if numa_nodes != None: ++ mpi_cmd = mpi_cmd + ['numactl', '-l', '-N', numa_nodes[index]] ++ ++ if num_instance > 1: ++ mpi_cmd = mpi_cmd + ['python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(instance_to_launch), ++ '--'] ++ mpi_cmd = mpi_cmd + [*cmd] ++ ++ if seed != None: ++ # ensure different seed for different node ++ mpi_cmd = mpi_cmd + ['--seed={}'.format(seed + index*1023779831)] ++ ++ result = await checked_run(*mpi_cmd) ++ for index in range(num_nodes): ++ filename = '{}/out-{}-{}-{}.txt'.format(log_path, get_cmd_name(cmd), seed, ++ index) ++ outfile = open(filename, 'r') ++ result += outfile.read() ++ outfile.close() ++ return result ++ ++def checked_run_mi(num_instance, *cmd): ++ name = get_cmd_name(cmd) ++ logging.debug('Running %s*%d: %s', name, num_instance, expand_cmd_str(cmd)) ++ num_parallel_instance = int(multiprocessing.cpu_count()) ++ procs=[None]*num_parallel_instance ++ results = [""]*num_parallel_instance ++ result_list = [] ++ ++ cur_instance = 0 ++ # add new proc into procs ++ while cur_instance < num_instance or not all ( ++ proc is None for proc in procs): ++ if None in procs and cur_instance < num_instance: ++ index = procs.index(None) ++ subproc_cmd = [ ++ 'OMP_NUM_THREADS=1', ++ 'KMP_AFFINITY=granularity=fine,proclist=[{}],explicit'.format( ++ ','.join(str(i) for i in list(range( ++ index, index+1)))), ++ *cmd, ++ '--instance_id={}'.format(cur_instance), ++ ] ++ subproc_cmd = ' '.join(subproc_cmd) ++ if (cur_instance == 0): ++ logging.debug("subproc_cmd = {}".format(subproc_cmd)) ++ procs[index] = subprocess.Popen(subproc_cmd, shell=True, ++ stdout=subprocess.PIPE, ++ stderr=subprocess.STDOUT) ++ ++ proc_count = 0 ++ for i in range(num_parallel_instance): ++ if procs[i] != None: ++ proc_count += 1 ++ logging.debug('started instance {} in proc {}. proc count = {}'.format( ++ cur_instance, index, proc_count)) ++ ++ # change stdout of the process to non-blocking ++ # this is for collect output in a single thread ++ flags = fcntl.fcntl(procs[index].stdout, fcntl.F_GETFL) ++ fcntl.fcntl(procs[index].stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) ++ ++ cur_instance += 1 ++ for index in range(num_parallel_instance): ++ if procs[index] != None: ++ # collect proc output ++ while True: ++ try: ++ line = procs[index].stdout.readline() ++ if line == b'': ++ break ++ results[index] = results[index] + line.decode() ++ except IOError: ++ break ++ ++ ret_val = procs[index].poll() ++ if ret_val == None: ++ continue ++ elif ret_val != 0: ++ logging.info(results[index]) ++ raise RuntimeError( ++ 'Non-zero return code (%d) executing %s' % ( ++ ret_val, subproc_cmd)) ++ ++ if index == 0: ++ logging.debug(results[index]) ++ result_list.append(results[index]) ++ results[index] = "" ++ procs[index] = None ++ ++ proc_count = 0 ++ for i in range(num_parallel_instance): ++ if procs[i] != None: ++ proc_count += 1 ++ logging.debug('proc {} finished. proc count = {}'.format( ++ index, proc_count)) ++ time.sleep(0.001) # avoid busy loop ++ return result_list + + def wait(aws): + """Waits for all of the awaitable objects (e.g. coroutines) in aws to finish. +diff --git a/preprocessing.py b/preprocessing.py +index 595db38..d5a99a6 100644 +--- a/preprocessing.py ++++ b/preprocessing.py +@@ -26,6 +26,9 @@ import symmetries + import numpy as np + import tensorflow as tf + ++import horovod.tensorflow as hvd ++from tensorflow.python.data.experimental.ops import optimization ++ + TF_RECORD_CONFIG = tf.python_io.TFRecordOptions( + tf.python_io.TFRecordCompressionType.ZLIB) + +@@ -84,11 +87,11 @@ def batch_parse_tf_example(batch_size, example_batch): + 'outcome': tf.FixedLenFeature([], tf.float32), + } + parsed = tf.parse_example(example_batch, features) +- x = tf.decode_raw(parsed['x'], tf.uint8) ++ x = tf.io.decode_raw(parsed['x'], tf.uint8) + x = tf.cast(x, tf.float32) + x = tf.reshape(x, [batch_size, go.N, go.N, + features_lib.NEW_FEATURES_PLANES]) +- pi = tf.decode_raw(parsed['pi'], tf.float32) ++ pi = tf.io.decode_raw(parsed['pi'], tf.float32) + pi = tf.reshape(pi, [batch_size, go.N * go.N + 1]) + outcome = parsed['outcome'] + outcome.set_shape([batch_size]) +@@ -98,7 +101,7 @@ def batch_parse_tf_example(batch_size, example_batch): + def read_tf_records(batch_size, tf_records, num_repeats=1, + shuffle_records=True, shuffle_examples=True, + shuffle_buffer_size=None, interleave=True, +- filter_amount=1.0): ++ filter_amount=1.0, dist_train=False, seed = 0): + """ + Args: + batch_size: batch size to return +@@ -116,6 +119,9 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + raise ValueError("Must set shuffle buffer size if shuffling examples") + + tf_records = list(tf_records) ++ ++ random.seed(seed) ++ + if shuffle_records: + random.shuffle(tf_records) + record_list = tf.data.Dataset.from_tensor_slices(tf_records) +@@ -126,20 +132,29 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + buffer_size=8 * 1024 * 1024, + compression_type='ZLIB') + ++ if dist_train: ++ # no need to interleave in data parallelism ++ interleave = False ++ + if interleave: + # cycle_length = how many tfrecord files are read in parallel + # The idea is to shuffle both the order of the files being read, + # and the examples being read from the files. +- dataset = record_list.apply(tf.contrib.data.parallel_interleave( ++ dataset = record_list.apply(tf.data.experimental.parallel_interleave( + map_func, cycle_length=64, sloppy=True)) + else: + dataset = record_list.flat_map(map_func) + + if filter_amount < 1.0: + dataset = dataset.filter( +- lambda _: tf.random_uniform([]) < filter_amount) ++ lambda _: tf.random.uniform([], seed=seed) < filter_amount) ++ dataset = dataset.apply(optimization.optimize(["filter_with_random_uniform_fusion"])) ++ ++ if dist_train: ++ dataset = dataset.shard(hvd.size(), hvd.rank()) + + dataset = dataset.repeat(num_repeats) ++ + if shuffle_examples: + dataset = dataset.shuffle(buffer_size=shuffle_buffer_size) + +@@ -181,7 +196,8 @@ def _random_rotation_pure_tf(x_tensor, outcome_tensor): + def get_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_records=True, shuffle_examples=True, + shuffle_buffer_size=None, +- filter_amount=0.05, random_rotation=True): ++ filter_amount=0.05, random_rotation=True, ++ dist_train=False, seed = 0, make_one_shot = False): + """Read tf.Records and prepare them for ingestion by dual_net. + + See `read_tf_records` for parameter documentation. +@@ -197,18 +213,22 @@ def get_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_examples=shuffle_examples, + shuffle_buffer_size=shuffle_buffer_size, + filter_amount=filter_amount, +- interleave=True) ++ interleave=True, ++ dist_train=dist_train, seed=seed) + dataset = dataset.filter(lambda t: tf.equal(tf.shape(t)[0], batch_size)) + dataset = dataset.map( + functools.partial(batch_parse_tf_example, batch_size)) + if random_rotation: + dataset = dataset.map(_random_rotation_pyfunc) + +- return dataset.make_one_shot_iterator().get_next() +- ++ dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE) ++ if make_one_shot: ++ return dataset.make_one_shot_iterator().get_next() ++ else: ++ return dataset + + def get_tpu_input_tensors(batch_size, tf_records, num_repeats=1, +- filter_amount=1, random_rotation=True): ++ filter_amount=1, random_rotation=True, seed=0): + # TPUs trains on sequential golden chunks to simplify preprocessing and + # reproducibility. + assert len(tf_records) < 101, "Use example_buffer to build a golden_chunk" +@@ -221,7 +241,7 @@ def get_tpu_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_examples=False, + shuffle_buffer_size=None, + filter_amount=filter_amount, +- interleave=False) ++ interleave=False, seed=seed) + dataset = dataset.filter(lambda t: tf.equal(tf.shape(t)[0], batch_size)) + dataset = dataset.map( + functools.partial(batch_parse_tf_example, batch_size)) +diff --git a/produce_min_max_log.py b/produce_min_max_log.py +new file mode 100644 +index 0000000..493ce38 +--- /dev/null ++++ b/produce_min_max_log.py +@@ -0,0 +1,94 @@ ++# Copyright 2019 Google LLC ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++#!/usr/bin/env python ++# encoding: utf-8 ++ ++import time ++import os ++ ++import tensorflow as tf ++from tensorflow.core.framework import graph_pb2 ++from tensorflow.python.platform import gfile ++ ++from absl import app, flags ++ ++import preprocessing ++import dual_net ++ ++ ++flags.DEFINE_string('input_graph', None, 'The path of input graph.') ++flags.DEFINE_string('data_location', None, 'The path of input data.') ++flags.DEFINE_integer('num_steps', 20, 'Number of eval steps.') ++flags.DEFINE_integer('batch_size', 20, 'eval batch size.') ++flags.DEFINE_boolean('random_rotation', True, 'Do random rotation if true.') ++ ++ ++FLAGS = flags.FLAGS ++ ++def run_graph(graph, tf_records): ++ ++ data_graph = tf.Graph() ++ with data_graph.as_default(): ++ features, labels = preprocessing.get_input_tensors( ++ FLAGS.batch_size, ++ tf_records, ++ shuffle_buffer_size=100000000, ++ random_rotation=FLAGS.random_rotation, seed=2, ++ dist_train=False, make_one_shot=True) ++ ++ infer_graph = tf.Graph() ++ with infer_graph.as_default(): ++ tf.import_graph_def(graph, name='') ++ ++ input_tensor = dual_net.get_input_tensor(infer_graph) ++ output_tensor = dual_net.get_output_tensor(infer_graph) ++ ++ config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) ++ data_sess = tf.Session(graph=data_graph, config=config) ++ infer_sess = tf.Session(graph=infer_graph, config=config) ++ ++ elapsed = 0 ++ #with tf.contrib.tfprof.ProfileContext('/home/letiank/skx-8180/train_dir/minigo', trace_steps=range(70, 80), dump_steps=[110]): ++ for it in range(FLAGS.num_steps): ++ features_np = data_sess.run(features) ++ start_time = time.time() ++ infer_sess.run(output_tensor, feed_dict={input_tensor: features_np}) ++ elapsed += time.time() - start_time ++ ++def read_graph(input_graph): ++ if not gfile.Exists(input_graph): ++ print("Input graph file '" + input_graph + "' does not exist!") ++ exit(-1) ++ ++ input_graph_def = graph_pb2.GraphDef() ++ with gfile.Open(input_graph, "rb") as f: ++ data = f.read() ++ input_graph_def.ParseFromString(data) ++ ++ return input_graph_def ++ ++ ++def main(unused_argv): ++ """Run the reinforcement learning loop.""" ++ ++ graph = read_graph(FLAGS.input_graph) ++ tf_records = sorted(tf.gfile.Glob(FLAGS.data_location), reverse=True)[:1] ++ print(tf_records) ++ run_graph(graph, tf_records) ++ ++if __name__ == "__main__": ++ app.run(main) +diff --git a/requirements-colab.txt b/requirements-colab.txt +index febb463..f24b44d 100644 +--- a/requirements-colab.txt ++++ b/requirements-colab.txt +@@ -9,7 +9,6 @@ google.cloud.bigtable + #keras + #numpy>=1.14.0 + #protobuf +-pylint + sgf==0.5 + #six + #tqdm>=4.17 +diff --git a/requirements.txt b/requirements.txt +index dcb70ae..86cdf9c 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -7,7 +7,6 @@ grpcio-tools + keras + numpy>=1.14.0 + protobuf +-pylint + sgf==0.5 + six + tqdm>=4.17 +diff --git a/rl_loop/example_buffer.py b/rl_loop/example_buffer.py +index 14c7cef..28c77fd 100644 +--- a/rl_loop/example_buffer.py ++++ b/rl_loop/example_buffer.py +@@ -92,9 +92,15 @@ class ExampleBuffer(): + if len(games) > max_games: + games = games[-max_games:] + +- with mp.Pool(threads) as pool: +- res = tqdm(pool.imap(self.func, games), total=len(games)) +- self.examples.extend(itertools.chain.from_iterable(res)) ++ if threads > 1: ++ with mp.Pool(threads) as pool: ++ res = tqdm(pool.imap(self.func, games), total=len(games)) ++ self.examples.extend(itertools.chain.from_iterable(res)) ++ else: ++ res = [] ++ for game in games: ++ res += [self.func(game)] ++ self.examples.extend(itertools.chain.from_iterable(res)) + print("Got", len(self.examples), "examples") + + def update(self, new_games): +diff --git a/rl_loop/fsdb.py b/rl_loop/fsdb.py +index ab9d107..442692c 100644 +--- a/rl_loop/fsdb.py ++++ b/rl_loop/fsdb.py +@@ -62,6 +62,7 @@ models_dir = _with_base('models') + selfplay_dir = _with_base('data', 'selfplay') + holdout_dir = _with_base('data', 'holdout') + sgf_dir = _with_base('sgf') ++mpi_log_dir = _with_base('mpi') + eval_dir = _with_base('sgf', 'eval') + golden_chunk_dir = _with_base('data', 'golden_chunks') + flags_path = _with_base('flags.txt') +diff --git a/run.sh b/run.sh +new file mode 100755 +index 0000000..7cc74e7 +--- /dev/null ++++ b/run.sh +@@ -0,0 +1,24 @@ ++#!/bin/bash ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++echo Physical cores = $PHY_CORES ++echo Virtual cores = $VIRT_CORES ++echo NUMA cores = $NUMA_CORES ++ ++export KMP_HW_SUBSET=2T ++echo KMP_HW_SUBSET = $KMP_HW_SUBSET ++ ++output_dir=${SCRATCH:-$(pwd)} ++echo Output to ${output_dir} ++ ++export KMP_BLOCKTIME=1 ++export KMP_AFFINITY=compact,granularity=fine ++export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/cc/tensorflow ++ulimit -u 760000 ++ ++export PYTHONPATH=$(pwd)/ml_perf/tools/tensorflow_quantization/quantization:$PYTHONPATH ++ ++./run_minigo.sh ${output_dir}/results/$(hostname) ml_perf/flags/9 $1 +diff --git a/run_minigo.sh b/run_minigo.sh +new file mode 100755 +index 0000000..d3cfab1 +--- /dev/null ++++ b/run_minigo.sh +@@ -0,0 +1,24 @@ ++#!/bin/bash ++BASE_DIR=$1 ++FLAG_DIR=$2 ++ ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++# Run training loop ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$3 \ ++ --train_node=localhost ++ ++# Once the training loop has finished, run model evaluation to find the ++# first trained model that's better than the target ++BOARD_SIZE=9 python3 ml_perf/eval_models.py \ ++ --base_dir=$BASE_DIR \ ++ --flags_dir=$FLAG_DIR +diff --git a/run_minigo_mn.sh b/run_minigo_mn.sh +new file mode 100755 +index 0000000..44df2ea +--- /dev/null ++++ b/run_minigo_mn.sh +@@ -0,0 +1,31 @@ ++#!/bin/bash ++BASE_DIR=$1 ++FLAG_DIR=$2 ++ ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++NUM_NODES=`ml_perf/hostlist.sh|wc -l` ++TRAIN_NODES=$3 ++PLAY_NODES=$(expr $NUM_NODES - $TRAIN_NODES) ++EVAL_NODES=$PLAY_NODES ++ ++# Run training loop ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$4 \ ++ `ml_perf/hostlist.sh |head -n $PLAY_NODES |awk '/./{print "--selfplay_node="$0}'` \ ++ `ml_perf/hostlist.sh |tail -n $TRAIN_NODES|awk '/./{print "--train_node="$0}'` \ ++ `ml_perf/hostlist.sh |head -n $EVAL_NODES |awk '/./{print "--eval_node="$0}'` ++ ++# Once the training loop has finished, run model evaluation to find the ++# first trained model that's better than the target ++BOARD_SIZE=9 python3 ml_perf/eval_models.py \ ++ --base_dir=$BASE_DIR \ ++ --flags_dir=$FLAG_DIR +diff --git a/run_mn.sh b/run_mn.sh +new file mode 100755 +index 0000000..a72b116 +--- /dev/null ++++ b/run_mn.sh +@@ -0,0 +1,24 @@ ++#!/bin/bash ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++echo Physical cores = $PHY_CORES ++echo Virtual cores = $VIRT_CORES ++echo NUMA cores = $NUMA_CORES ++ ++export KMP_HW_SUBSET=2T ++echo KMP_HW_SUBSET = $KMP_HW_SUBSET ++ ++output_dir=${SCRATCH:-$(pwd)} ++echo Output to ${output_dir} ++ ++export KMP_BLOCKTIME=1 ++export KMP_AFFINITY=compact,granularity=fine ++export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/cc/tensorflow ++ulimit -u 760000 ++ ++export PYTHONPATH=./ml_perf/tools/tensorflow_quantization/quantization:$PYTHONPATH ++ ++./run_minigo_mn.sh ${output_dir}/results/$(hostname) ml_perf/flags/9.mn $1 $2 +diff --git a/set_avx2_build b/set_avx2_build +new file mode 100755 +index 0000000..b60a540 +--- /dev/null ++++ b/set_avx2_build +@@ -0,0 +1,61 @@ ++#This file exports the bazel build opts for AVX2 platforms (broadwell and haswell). By setting -march=haswell and -mtune=broadwell, the binary will run on systems haswell and newer, but will be tuned for broadwell. ++ ++MIN_GCC_MAJOR_VERSION=5 ++MIN_GCC_MINOR_VERSION=3 ++MIN_GCC_REVISION=0 ++GCC_VERSION_STR=$(gcc -dumpversion) ++echo "GCC Version: ${GCC_VERSION_STR}" ++IFS='.' read -r -a GCC_VERSION <<< ${GCC_VERSION_STR} ++ ++if [ "${GCC_VERSION[0]}" -lt "${MIN_GCC_MAJOR_VERSION}" ] ; ++then ++ echo "Your MAJOR version of GCC is too old: ${GCC_VERSION_STR}; it must be at least ${MIN_GCC_MAJOR_VERSION}.${MIN_GCC_MINOR_VERSION}.${MIN_GCC_REVISION}" ++ return 1 ++ ++elif [ "${GCC_VERSION[0]}" -eq "${MIN_GCC_MAJOR_VERSION}" ] ; ++then ++ if [ "${GCC_VERSION[1]}" -lt "${MIN_GCC_MINOR_VERSION}" ] ; ++ then ++ echo "Your MINOR version of GCC is too old: ${GCC_VERSION_STR}; it must be at least ${MIN_GCC_MAJOR_VERSION}.${MIN_GCC_MINOR_VERSION}." ++ return 1 ++ fi ++fi ++ ++echo "GCC ${GCC_VERSION_STR}: OK" ++ ++#Don't use the C++11 ABI; use the old one ++#These two options should be equivalent to all the options commented out below ++BAZEL_BUILD_OPTS_BASIC="--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 \ ++ --copt=-march=haswell \ ++ --copt=-mtune=broadwell \ ++ --copt=-O3" ++BAZEL_SECURE_BUILD_OPTS="--copt=-Wformat \ ++ --copt=-Wformat-security \ ++ --copt=-fstack-protector \ ++ --copt=-fPIC \ ++ --copt=-fpic \ ++ --linkopt=-znoexecstack \ ++ --linkopt=-zrelro \ ++ --linkopt=-znow \ ++ --linkopt=-fstack-protector \ ++ --linkopt=-pie" ++ ++#basic build flags ++echo "exporting BAZEL_BUILD_OPTS_BASIC=${BAZEL_BUILD_OPTS_BASIC}" ++export BAZEL_BUILD_OPTS_BASIC="${BAZEL_BUILD_OPTS_BASIC}" ++ ++#secure build flags ++BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS_BASIC} ${BAZEL_SECURE_BUILD_OPTS}" ++echo "exporting BAZEL_BUILD_OPTS=${BAZEL_BUILD_OPTS}" ++export BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS}" ++ ++#basic mkl flags ++BAZEL_MKL_BUILD_OPTS_BASIC="--config=mkl ${BAZEL_BUILD_OPTS_BASIC}" ++echo "exporting BAZEL_MKL_BUILD_OPTS_BASIC=${BAZEL_MKL_BUILD_OPTS_BASIC}" ++export BAZEL_MKL_BUILD_OPTS_BASIC="${BAZEL_MKL_BUILD_OPTS_BASIC}" ++ ++#secure mkl flags ++BAZEL_SECURE_MKL_BUILD_OPTS="--config=mkl ${BAZEL_BUILD_OPTS}" ++echo "exporting BAZEL_SECURE_MKL_BUILD_OPTS=${BAZEL_SECURE_MKL_BUILD_OPTS}" ++export BAZEL_SECURE_MKL_BUILD_OPTS="${BAZEL_SECURE_MKL_BUILD_OPTS}" ++ +diff --git a/test.sh b/test.sh +index 5a2d7fa..45d4672 100755 +--- a/test.sh ++++ b/test.sh +@@ -25,11 +25,6 @@ + cd "$(dirname "$0")" + + lint_fail=0 +-python3 -m pylint *.py || { +- lint_fail=1 +- echo >&2 "--------------------------------------" +- echo >&2 "Py linting did not pass successfully!" +-} + + PYTHONPATH= BOARD_SIZE=9 python3 tests/run_tests.py || { + echo >&2 "--------------------------------------" +diff --git a/train.py b/train.py +index d6b24bc..2554826 100644 +--- a/train.py ++++ b/train.py +@@ -29,6 +29,9 @@ import dual_net + import preprocessing + import utils + ++import ml_perf.mlp_log as mll ++import horovod.tensorflow as hvd ++ + # See www.moderndescartes.com/essays/shuffle_viz for discussion on sizing + flags.DEFINE_integer('shuffle_buffer_size', 2000, + 'Size of buffer used to shuffle train examples.') +@@ -77,6 +80,8 @@ flags.declare_key_flag('work_dir') + flags.declare_key_flag('train_batch_size') + flags.declare_key_flag('num_tpu_cores') + flags.declare_key_flag('use_tpu') ++flags.declare_key_flag('dist_train') ++flags.declare_key_flag('training_seed') + + FLAGS = flags.FLAGS + +@@ -145,6 +150,8 @@ def train(*tf_records: "Records to train on"): + estimator = dual_net.get_estimator() + + effective_batch_size = FLAGS.train_batch_size ++ if FLAGS.dist_train: ++ effective_batch_size = int(FLAGS.train_batch_size/hvd.size()) + if FLAGS.use_tpu: + effective_batch_size *= FLAGS.num_tpu_cores + +@@ -172,14 +179,17 @@ def train(*tf_records: "Records to train on"): + else: + def _input_fn(): + return preprocessing.get_input_tensors( +- FLAGS.train_batch_size, ++ effective_batch_size, + tf_records, + filter_amount=FLAGS.filter_amount, + shuffle_buffer_size=FLAGS.shuffle_buffer_size, +- random_rotation=True) ++ random_rotation=True, seed=FLAGS.training_seed, ++ dist_train=FLAGS.dist_train) + + hooks = [UpdateRatioSessionHook(FLAGS.work_dir), + EchoStepCounterHook(output_dir=FLAGS.work_dir)] ++ if FLAGS.dist_train: ++ hooks.append(hvd.BroadcastGlobalVariablesHook(0)) + + steps = FLAGS.steps_to_train + logging.info("Training, steps = %s, batch = %s -> %s examples", +@@ -209,18 +219,25 @@ def train(*tf_records: "Records to train on"): + + def main(argv): + """Train on examples and export the updated model weights.""" ++ if FLAGS.dist_train: ++ hvd.init() ++ mll.global_batch_size(FLAGS.train_batch_size) ++ mll.lr_rates(FLAGS.lr_rates) ++ mll.lr_boundaries(FLAGS.lr_boundaries) + tf_records = argv[1:] + logging.info("Training on %s records: %s to %s", + len(tf_records), tf_records[0], tf_records[-1]) + with utils.logged_timer("Training"): + train(*tf_records) +- if FLAGS.export_path: +- dual_net.export_model(FLAGS.export_path) +- if FLAGS.freeze: +- if FLAGS.use_tpu: +- dual_net.freeze_graph_tpu(FLAGS.export_path) +- else: +- dual_net.freeze_graph(FLAGS.export_path) ++ ++ if(not FLAGS.dist_train) or hvd.rank()==0: ++ if FLAGS.export_path: ++ dual_net.export_model(FLAGS.export_path) ++ if FLAGS.freeze: ++ if FLAGS.use_tpu: ++ dual_net.freeze_graph_tpu(FLAGS.export_path) ++ else: ++ dual_net.freeze_graph(FLAGS.export_path) + + + if __name__ == "__main__": diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf_large_scale.patch b/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf_large_scale.patch new file mode 100644 index 000000000..062ed2acd --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/minigo_mlperf_large_scale.patch @@ -0,0 +1,5265 @@ +diff --git a/WORKSPACE b/WORKSPACE +index cabf233..1977a70 100644 +--- a/WORKSPACE ++++ b/WORKSPACE +@@ -12,11 +12,8 @@ http_archive( + + http_archive( + name = "com_google_absl", +- strip_prefix = "abseil-cpp-666fc1266bccfd8e6eaaa084e7b42580bb8eb199", +- urls = [ +- "http://mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/666fc1266bccfd8e6eaaa084e7b42580bb8eb199.tar.gz", +- "https://github.com/abseil/abseil-cpp/archive/666fc1266bccfd8e6eaaa084e7b42580bb8eb199.tar.gz", +- ], ++ strip_prefix = "abseil-cpp-93dfcf74cb5fccae3da07897d8613ae6cab958a0", ++ urls = ["https://github.com/abseil/abseil-cpp/archive/93dfcf74cb5fccae3da07897d8613ae6cab958a0.tar.gz"], + ) + + http_archive( +diff --git a/build.sh b/build.sh +new file mode 100755 +index 0000000..1f682cc +--- /dev/null ++++ b/build.sh +@@ -0,0 +1,4 @@ ++#!/bin/bash ++. ./set_avx2_build ++bazel build --incompatible_remove_native_http_archive=false -c opt --verbose_failures --define=tf=1 --define=board_size=9 $BAZEL_BUILD_OPTS cc:selfplay cc:eval ++ +diff --git a/cc/configure_tensorflow.sh b/cc/configure_tensorflow.sh +index 8b39178..2ea9789 100755 +--- a/cc/configure_tensorflow.sh ++++ b/cc/configure_tensorflow.sh +@@ -4,109 +4,100 @@ set -e + + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + dst_dir="${script_dir}/tensorflow" +-tmp_dir="/tmp/minigo_tf" +-tmp_pkg_dir="/tmp/tensorflow_pkg" ++tmp_dir="${script_dir}/minigo_tf" ++tmp_pkg_dir="${script_dir}/tensorflow_pkg" + +-rm -rfd ${tmp_dir} + rm -rfd ${tmp_pkg_dir} +-mkdir -p ${tmp_dir} + + rm -rf ${dst_dir}/* + mkdir -p ${dst_dir} + ++if [ -d "${script_dir}/ml_perf/tools" ]; then ++ echo "Intel AI tools exist." ++else ++ git clone https://github.com/IntelAI/tools.git ${script_dir}/ml_perf/tools/ ++fi ++ + # TODO(tommadams): we should probably switch to Clang at some point. +-commit_tag="v1.11.0" + +-echo "Cloning tensorflow to ${tmp_dir}" +-git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" ++if [ -d "${tmp_dir}" ]; then ++ pushd "${tmp_dir}" ++else ++ echo "Cloning tensorflow to ${tmp_dir}" ++ git clone https://github.com/tensorflow/tensorflow "${tmp_dir}" ++ cp cc/tf_int8_fusion.patch "${tmp_dir}" ++ ++ pushd "${tmp_dir}" + +-pushd "${tmp_dir}" ++ cherry_pick_tag="02c111ab4269ab73a506164e4b54ba996d28a8cf" ++ prev_tag="8be9158c7a701d933bbe532f5d54df17f47a4284" + +-echo "Checking out ${commit_tag}" +-git checkout "${commit_tag}" ++ git diff "${prev_tag}" "${cherry_pick_tag}" > sample.patch ++ ++ commit_tag="961bb02b882a8bb921e5be1c09c34b51fffd25dc" ++ echo "Checking out ${commit_tag}" ++ git checkout "${commit_tag}" ++ git apply sample.patch ++ git apply tf_int8_fusion.patch ++fi + + # Run the TensorFlow configuration script, setting reasonable values for most + # of the options. + echo "Configuring tensorflow" + cc_opt_flags="${CC_OPT_FLAGS:--march=native}" + ++PYTHON_BIN_PATH=`which python` ++ + CC_OPT_FLAGS="${cc_opt_flags}" \ +-TF_NEED_JEMALLOC=${TF_NEED_JEMALLOC:-1} \ +-TF_NEED_GCP=${TF_NEED_GCP:-1} \ ++PYTHON_BIN_PATH=${PYTHON_BIN_PATH} \ ++USE_DEFAULT_PYTHON_LIB_PATH="${USE_DEFAULT_PYTHON_LIB_PATH:-1}" \ ++TF_NEED_JEMALLOC=${TF_NEED_JEMALLOC:-0} \ ++TF_NEED_GCP=${TF_NEED_GCP:-0} \ + TF_NEED_HDFS=${TF_NEED_HDFS:-0} \ + TF_NEED_S3=${TF_NEED_S3:-0} \ + TF_NEED_KAFKA=${TF_NEED_KAFKA:-0} \ +-TF_NEED_CUDA=${TF_NEED_CUDA:-1} \ ++TF_NEED_CUDA=${TF_NEED_CUDA:-0} \ + TF_NEED_GDR=${TF_NEED_GDR:-0} \ + TF_NEED_VERBS=${TF_NEED_VERBS:-0} \ + TF_NEED_OPENCL_SYCL=${TF_NEED_OPENCL_SYCL:-0} \ ++TF_NEED_ROCM=${TF_NEED_ROCM:-0} \ + TF_CUDA_CLANG=${TF_CUDA_CLANG:-0} \ ++TF_DOWNLOAD_CLANG=${TF_DOWNLOAD_CLANG:-0} \ + TF_NEED_TENSORRT=${TF_NEED_TENSORRT:-0} \ + TF_NEED_MPI=${TF_NEED_MPI:-0} \ + TF_SET_ANDROID_WORKSPACE=${TF_SET_ANDROID_WORKSPACE:-0} \ + TF_NCCL_VERSION=${TF_NCCL_VERSION:-1.3} \ ++TF_ENABLE_XLA=${TF_ENABLE_XLA:-0} \ + ./configure + ++. ${script_dir}/../set_avx2_build ++BAZEL_OPTS="-c opt --config=mkl \ ++ --action_env=PATH \ ++ --action_env=LD_LIBRARY_PATH \ ++ $BAZEL_BUILD_OPTS \ ++ --copt=-DINTEL_MKLDNN" + echo "Building tensorflow package" +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" //tensorflow/tools/pip_package:build_pip_package ++bazel build -s $BAZEL_OPTS //tensorflow/tools/pip_package:build_pip_package + bazel-bin/tensorflow/tools/pip_package/build_pip_package ${tmp_pkg_dir} + + echo "Tensorflow built-ish" + echo "Unpacking tensorflow package..." + unzip -q ${tmp_pkg_dir}/tensorflow-*.whl -d ${tmp_dir} + ++ + echo "Copying tensor flow headers to ${dst_dir}" + cp -r ${tmp_dir}/tensorflow-*.data/purelib/tensorflow/include/* "${dst_dir}" +- + echo "Building tensorflow libraries" + +-# Add a custom BUILD target for the gRPC runtime. +-# TODO(tommadams): Remove this once the gRPC runtime is linked in to TensorFlow. +-cat <> tensorflow/BUILD +- +-tf_cc_shared_object( +- name = "libgrpc_runtime.so", +- linkopts = select({ +- "//tensorflow:darwin": [ +- "-Wl,-exported_symbols_list", # This line must be directly followed by the exported_symbols.lds file +- "\$(location //tensorflow:tf_exported_symbols.lds)", +- ], +- "//tensorflow:windows": [], +- "//conditions:default": [ +- "-z defs", +- "-Wl,--version-script", # This line must be directly followed by the version_script.lds file +- "\$(location //tensorflow:tf_version_script.lds)", +- ], +- }), +- deps = [ +- "//tensorflow:tf_exported_symbols.lds", +- "//tensorflow:tf_version_script.lds", +- "//tensorflow/core/distributed_runtime/rpc:grpc_runtime", +- ] +-) +-EOF +- +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" \ +- //tensorflow:libgrpc_runtime.so \ ++bazel build -s $BAZEL_OPTS \ + //tensorflow:libtensorflow_cc.so \ + //tensorflow:libtensorflow_framework.so + + echo "Copying tensorflow libraries to ${dst_dir}" +-cp bazel-bin/tensorflow/{libgrpc_runtime,libtensorflow_*}.so "${dst_dir}" +- +-echo "Building toco" +-bazel build -c opt --config=opt --copt="${cc_opt_flags}" //tensorflow/contrib/lite/toco:toco +-cp bazel-bin/tensorflow/contrib/lite/toco/toco "${dst_dir}" +- +-echo "Building TF Lite" +- +-./tensorflow/contrib/lite/tools/make/download_dependencies.sh +-make -j $(nproc) -f tensorflow/contrib/lite/tools/make/Makefile +-cp tensorflow/contrib/lite/tools/make/gen/linux_x86_64/lib/libtensorflow-lite.a $dst_dir/libtensorflow_lite.a +-for dir in contrib/lite contrib/lite/kernels contrib/lite/profiling contrib/lite/schema; do +- mkdir -p $dst_dir/tensorflow/$dir +- cp tensorflow/$dir/*.h $dst_dir/tensorflow/$dir/ +-done +-cp -r tensorflow/contrib/lite/tools/make/downloads/flatbuffers/include/flatbuffers $dst_dir/ ++cp bazel-bin/tensorflow/libtensorflow_*.so "${dst_dir}" ++cp bazel-bin/tensorflow/libtensorflow_*.so.1 "${dst_dir}" ++ ++cp `find ${tmp_dir} |grep libiomp5.so` ${dst_dir} ++cp `find ${tmp_dir} |grep libmklml_intel.so` ${dst_dir} + + popd +diff --git a/cc/dual_net/tf_dual_net.cc b/cc/dual_net/tf_dual_net.cc +index a400cc2..3bee107 100644 +--- a/cc/dual_net/tf_dual_net.cc ++++ b/cc/dual_net/tf_dual_net.cc +@@ -58,6 +58,9 @@ class TfDualNet : public DualNet { + public: + TfWorker(const GraphDef& graph_def) : batch_capacity_(0) { + SessionOptions options; ++ options.config.set_intra_op_parallelism_threads(1); ++ options.config.set_inter_op_parallelism_threads(0); ++ options.config.set_use_per_session_threads(false); + options.config.mutable_gpu_options()->set_allow_growth(true); + session_.reset(NewSession(options)); + TF_CHECK_OK(session_->Create(graph_def)); +diff --git a/cc/eval.cc b/cc/eval.cc +index bde9011..525c840 100644 +--- a/cc/eval.cc ++++ b/cc/eval.cc +@@ -68,6 +68,7 @@ DEFINE_string(model, "", + "engine=lite, the model should be .tflite flatbuffer."); + DEFINE_string(model_two, "", "Descriptor for the second model"); + DEFINE_int32(parallel_games, 32, "Number of games to play in parallel."); ++DEFINE_int32(instance_id, 0, "Unique id with multi-instance."); + + // Output flags. + DEFINE_string(output_bigtable, "", +@@ -170,7 +171,10 @@ class Evaluator { + ParseOptionsFromFlags(&game_options_, &player_options_); + + int num_games = FLAGS_parallel_games; +- for (int thread_id = 0; thread_id < num_games; ++thread_id) { ++ int instance_id = FLAGS_instance_id; ++ int thread_id_begin = instance_id*num_games; ++ for (int thread_id = thread_id_begin; ++ thread_id < thread_id_begin+num_games; ++thread_id) { + bool swap_models = (thread_id & 1) != 0; + threads_.emplace_back(std::bind(&Evaluator::ThreadRun, this, thread_id, + swap_models ? &model_b : &model_a, +diff --git a/cc/selfplay.cc b/cc/selfplay.cc +index a3d4d9e..9d3cfc0 100644 +--- a/cc/selfplay.cc ++++ b/cc/selfplay.cc +@@ -119,6 +119,7 @@ DEFINE_int32(parallel_games, 32, "Number of games to play in parallel."); + DEFINE_int32(num_games, 0, + "Total number of games to play. Defaults to parallel_games. " + "Only one of num_games and run_forever must be set."); ++DEFINE_int32(instance_id, 0, "Unique id with multi-instance."); + + // Output flags. + DEFINE_string(output_dir, "", +@@ -244,7 +245,10 @@ class SelfPlayer { + batcher_ = + absl::make_unique(std::move(model_factory)); + } +- for (int i = 0; i < FLAGS_parallel_games; ++i) { ++ int instance_id = FLAGS_instance_id; ++ int thread_id_begin = instance_id * FLAGS_parallel_games; ++ for (int i = thread_id_begin; ++ i < thread_id_begin+FLAGS_parallel_games; ++i) { + threads_.emplace_back(std::bind(&SelfPlayer::ThreadRun, this, i)); + } + for (auto& t : threads_) { +diff --git a/cc/tf_int8_fusion.patch b/cc/tf_int8_fusion.patch +new file mode 100644 +index 0000000..4d1b8e3 +--- /dev/null ++++ b/cc/tf_int8_fusion.patch +@@ -0,0 +1,422 @@ ++diff --git a/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc ++index 5aa2dd4..c860473 100644 ++--- a/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc +++++ b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc ++@@ -1,11 +1,8 @@ ++ /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. ++- ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++- ++ http://www.apache.org/licenses/LICENSE-2.0 ++- ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++@@ -36,23 +33,28 @@ Status FuseQuantizedConvolutionAndRequantize( ++ GraphDef* output_graph_def) { ++ std::map node_map; ++ MapNamesToNodes(input_graph_def, &node_map); +++ bool is_perchannel = false; ++ GraphDef replaced_graph_def; ++ TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( ++ input_graph_def, // clang-format off ++ ++- {"Requantize", +++ {"Requantize|RequantizePerChannel", ++ { ++ {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|" ++- "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"}, +++ "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu|" +++ "QuantizedDepthwiseConv2DWithBiasAndRelu"}, ++ {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|" ++- "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"}, +++ "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu|" +++ "QuantizedDepthwiseConv2DWithBiasAndRelu"}, ++ {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|" ++- "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"}, +++ "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu|" +++ "QuantizedDepthwiseConv2DWithBiasAndRelu"}, ++ {"Const"}, ++ {"Const"} ++ } ++ }, // clang-format on */ ++- [&node_map](const NodeMatch& match, const std::set& input_nodes, +++ [&node_map, &is_perchannel](const NodeMatch& match, +++ const std::set& input_nodes, ++ const std::set& output_nodes, ++ std::vector* new_nodes) { ++ // TODO(mdfaijul/sheng): Current implementation assumed all ++@@ -61,13 +63,14 @@ Status FuseQuantizedConvolutionAndRequantize( ++ ++ // Find all the nodes we expect in the subgraph. ++ const NodeDef& requantize_node = match.node; ++- CHECK_EQ("Requantize", requantize_node.op()); ++ const NodeDef& quantized_conv2D_node = match.inputs[0].node; ++ const NodeDef& const_requantize_range_min_node = match.inputs[3].node; ++ CHECK_EQ("Const", const_requantize_range_min_node.op()); ++ const NodeDef& const_requantize_range_max_node = match.inputs[4].node; ++ CHECK_EQ("Const", const_requantize_range_max_node.op()); ++ +++ is_perchannel = ("RequantizePerChannel" == requantize_node.op()); +++ ++ string quantized_conv2D_op_name = quantized_conv2D_node.op(); ++ // Set up the new fused version of the convolution op. ++ NodeDef fused_conv; ++@@ -78,47 +81,177 @@ Status FuseQuantizedConvolutionAndRequantize( ++ "QuantizedConv2DWithBiasSumAndRelu") == 0) ++ n_input -= 1; // -1 since summand is moved after frozen min-max ++ ++- for (int i=0; i < n_input; i++) ++- AddNodeInput(quantized_conv2D_node.input(i), &fused_conv); ++- +++ string control_input; +++ string current_input; +++ for (int i=0; i < n_input; i++) { +++ current_input = quantized_conv2D_node.input(i); +++ if (current_input.length() > 0 && current_input[0] == '^') { +++ control_input = current_input; +++ } else { +++ AddNodeInput(current_input, &fused_conv); +++ } +++ } ++ AddNodeInput(const_requantize_range_min_node.name(), &fused_conv); ++ AddNodeInput(const_requantize_range_max_node.name(), &fused_conv); ++ ++- // Add additional inputs to ++- // QuantizedConv2DWithBiasSumAndReluAndRequantize +++ // Ensure QuantizedConv2DWithBiasSumAndReluAndRequantize receives +++ // integer summand. Because requantization fusion is registered +++ // for integer summand only. ++ if (quantized_conv2D_op_name.compare( ++ "QuantizedConv2DWithBiasSumAndRelu") == 0) { ++- const NodeDef *in_requantize = node_map[node_map[ ++- quantized_conv2D_node.input(n_input)]->input(0)]; ++- string summand(in_requantize->name()); ++- string min_summand(in_requantize->name() + ":1"); ++- string max_summand(in_requantize->name() + ":2"); +++ const NodeDef *summand_node = node_map[quantized_conv2D_node.input( +++ n_input)]; +++ NodeDef* new_summand_node = nullptr; +++ NodeDef quantize_node; +++ if (summand_node->op() != "Dequantize") { +++ // Quantizing the summand. +++ // Add some common constants we need for reshaping inputs. +++ NodeDef reshape_dims; +++ reshape_dims.set_op("Const"); +++ reshape_dims.set_name(summand_node->name() + "/reshape_dims"); +++ SetNodeAttr("dtype", DT_INT32, &reshape_dims); +++ Tensor reshape_dims_tensor(DT_INT32, {1}); +++ reshape_dims_tensor.flat()(0) = -1; +++ SetNodeTensorAttr( +++ "value", reshape_dims_tensor, &reshape_dims); +++ AddNodeInput("^" + summand_node->name(), &reshape_dims); +++ +++ NodeDef reduction_dims; +++ reduction_dims.set_op("Const"); +++ reduction_dims.set_name(summand_node->name() + "/reduction_dims"); +++ SetNodeAttr("dtype", DT_INT32, &reduction_dims); +++ Tensor reduction_dims_tensor(DT_INT32, {1}); +++ reduction_dims_tensor.flat()(0) = 0; +++ SetNodeTensorAttr("value", reduction_dims_tensor, +++ &reduction_dims); +++ AddNodeInput("^" + summand_node->name(), &reduction_dims); +++ +++ NodeDef reshape_node; +++ reshape_node.set_op("Reshape"); +++ reshape_node.set_name(summand_node->name() + "/reshape"); +++ SetNodeAttr("T", DT_FLOAT, &reshape_node); +++ +++ NodeDef min_node; +++ min_node.set_op("Min"); +++ min_node.set_name(summand_node->name() + "/min"); +++ SetNodeAttr("T", DT_FLOAT, &min_node); +++ SetNodeAttr("keep_dims", false, &min_node); +++ AddNodeInput(reshape_node.name(), &min_node); +++ AddNodeInput(reduction_dims.name(), &min_node); +++ +++ NodeDef max_node; +++ max_node.set_op("Max"); +++ max_node.set_name(summand_node->name() + "/max"); +++ SetNodeAttr("T", DT_FLOAT, &max_node); +++ SetNodeAttr("keep_dims", false, &max_node); +++ AddNodeInput(reshape_node.name(), &max_node); +++ AddNodeInput(reduction_dims.name(), &max_node); +++ +++ // NodeDef quantize_node; +++ quantize_node.set_op("QuantizeV2"); +++ quantize_node.set_name(summand_node->name() + "/quantize"); +++ // Decide data type of quantize op +++ std::vector relu_ops = { +++ "Relu", +++ "Relu6" +++ }; +++ bool is_relu = std::find(relu_ops.begin(), relu_ops.end(), +++ summand_node->op()) != relu_ops.end(); +++ if (is_relu) +++ SetNodeAttr("T", DT_QUINT8, &quantize_node); +++ else +++ SetNodeAttr("T", DT_QINT8, &quantize_node); +++ SetNodeAttr("mode", "SCALED", &quantize_node); +++ +++ AddNodeInput(summand_node->name(), &reshape_node); +++ AddNodeInput(reshape_dims.name(), &reshape_node); +++ +++ AddNodeInput(summand_node->name(), &quantize_node); +++ AddNodeInput(min_node.name(), &quantize_node); +++ AddNodeInput(max_node.name(), &quantize_node); +++ +++ new_nodes->push_back(reshape_dims); +++ new_nodes->push_back(reduction_dims); +++ new_nodes->push_back(reshape_node); +++ new_nodes->push_back(min_node); +++ new_nodes->push_back(max_node); +++ new_nodes->push_back(quantize_node); +++ // Set the new summand node for fused_conv +++ new_summand_node = &quantize_node; +++ } else { +++ // If summand node is "Dequantize" then either "QuantizeV2" or +++ // "Requantize{PerChannel}" is feeding Dequantize op. Set new_summand_node +++ // as the input of summand node. +++ new_summand_node = const_cast(node_map[ +++ summand_node->input(0)]); +++ } +++ string summand(new_summand_node->name()); +++ string min_summand(new_summand_node->name() + ":1"); +++ string max_summand(new_summand_node->name() + ":2"); ++ AddNodeInput(summand, &fused_conv); ++ AddNodeInput(min_summand, &fused_conv); ++ AddNodeInput(max_summand, &fused_conv); ++ ++- // Signed version QuantizedConv2DWithBiasSumAndReluAndRequantize ++- // if Relu does not follow the convolution operation ++- std::vector signed_ops = { ++- "QuantizedConv2DWithBias", ++- "QuantizedConv2D" ++- }; ++- bool is_signed_summand = +++ DataType summand_type; +++ // New summand node should be QuantizeV2 or +++ // Requantize{PerChannel} +++ if (new_summand_node->op() == "QuantizeV2") { +++ TF_RETURN_IF_ERROR(GetNodeAttr(*new_summand_node, +++ "T", &summand_type)); +++ } else if (new_summand_node->op() == "RequantizePerChannel") { +++ TF_RETURN_IF_ERROR(GetNodeAttr(*new_summand_node, +++ "out_type", &summand_type)); +++ } else if (new_summand_node->op() == "Requantize") { +++ // Requantize op is Eigen kernel that does non-SCALED quantization +++ // and always maps into quint8. However, for MKLDNN fusion, which is +++ // SCALED quantization, the summand fused requantize op may have +++ // qint8 or quint8 as its output type. Therefore, it is needed to set +++ // the summand_type correctly. +++ std::vector signed_ops = { +++ "QuantizedConv2DWithBias", +++ "QuantizedConv2D" +++ }; +++ bool is_signed_summand = ++ std::find(signed_ops.begin(), signed_ops.end(), ++- node_map[in_requantize->input(0)]->op()) != signed_ops.end(); ++- if (is_signed_summand) { ++- fused_conv.set_op( ++- "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"); ++- SetNodeAttr("Tsummand", DT_QINT8, &fused_conv); +++ node_map[new_summand_node->input(0)]->op()) != signed_ops.end(); +++ summand_type = is_signed_summand ? DT_QINT8 : DT_QUINT8; ++ } else { ++- SetNodeAttr("Tsummand", DT_QUINT8, &fused_conv); +++ return Status(error::Code::FAILED_PRECONDITION, +++ "Fusion is not supported, a fix is required."); ++ } +++ SetNodeAttr("Tsummand", summand_type, &fused_conv); +++ // Decide whether signed version of +++ // QuantizedConv2DWithBiasSumAndReluAndRequantize or not +++ if (summand_type == DT_QINT8) +++ fused_conv.set_op( +++ "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"); ++ } ++- CopyNodeAttr(quantized_conv2D_node, "Tinput", "Tinput", &fused_conv); +++ +++ // Add control input to the very end of the input list +++ // of the newly fused op +++ if (control_input.length() > 0) +++ AddNodeInput(control_input, &fused_conv); +++ +++ CopyNodeAttr(quantized_conv2D_node, "Tinput", "Tinput", &fused_conv); ++ CopyNodeAttr(quantized_conv2D_node, "Tfilter", "Tfilter", &fused_conv); ++ CopyNodeAttr(quantized_conv2D_node, "strides", "strides", &fused_conv); ++ CopyNodeAttr(quantized_conv2D_node, "padding", "padding", &fused_conv); ++ +++ std::vector fused_quantized_bias_ops = { +++ "QuantizedConv2DWithBias", +++ "QuantizedConv2DWithBiasAndRelu", +++ "QuantizedDepthwiseConv2DWithBias", +++ "QuantizedDepthwiseConv2DWithBiasAndRelu", +++ "QuantizedConv2DWithBiasSumAndRelu", +++ }; +++ if (std::find(fused_quantized_bias_ops.begin(), +++ fused_quantized_bias_ops.end(), +++ quantized_conv2D_node.op()) != fused_quantized_bias_ops.end()) { +++ SetNodeAttr("Tbias", DT_FLOAT, &fused_conv); +++ } +++ if (HasNodeAttr(quantized_conv2D_node, "padding_list")) +++ CopyNodeAttr(quantized_conv2D_node, "padding_list", +++ "padding_list", &fused_conv); ++ // Copy dilation attribute if exsit in the orginal node ++ if (HasNodeAttr(quantized_conv2D_node, "dilations")) ++ CopyNodeAttr(quantized_conv2D_node, "dilations", ++@@ -135,30 +268,53 @@ Status FuseQuantizedConvolutionAndRequantize( ++ return Status::OK(); ++ }, ++ {}, &replaced_graph_def)); ++- ++- // Convert bias float -> int32 on replaced_graph_def ++- std::vector fused_requantized_bias_ops = { +++ +++ // After Requantize op fusion, fix attributes for nodes in the graph, if threre is +++ // some discrepency. And also quantize the bias (float -> int32) +++ // List of requantize fused ops that have biases. +++ std::vector fused_requantized_bias_ops = { ++ "QuantizedConv2DWithBiasAndRequantize", ++ "QuantizedConv2DWithBiasAndReluAndRequantize", ++ "QuantizedConv2DWithBiasSumAndReluAndRequantize", ++- "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" ++- }; +++ "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize", +++ "QuantizedDepthwiseConv2DWithBiasAndRequantize", +++ "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize" +++ }; +++ ++ node_map.clear(); ++ MapNamesToNodes(replaced_graph_def, &node_map); ++ for (auto& node_pair : node_map) { ++ const NodeDef *node = node_pair.second; +++ // An workaround to fix attributes of "Dequantize" op with non-perchannel +++ // quantization. "Dequantize" node should accept DT_QINT8 if the input node +++ // is "QuantizedConv2DAndRequantize" or +++ // "QuantizedConv2DWithBiasAndRequantize". +++ if (str_util::StartsWith(node->op(), "Dequantize")) { +++ std::string input_node_op = +++ node_map[NodeNameFromInput(node->input(0))]->op(); +++ if (str_util::StartsWith(input_node_op, +++ "QuantizedConv2DAndRequantize") || +++ str_util::StartsWith(input_node_op, +++ "QuantizedConv2DWithBiasAndRequantize")) { +++ SetNodeAttr("T", DT_QINT8, const_cast(node)); +++ SetNodeAttr("mode", "SCALED", const_cast(node)); +++ } +++ continue; +++ } +++ // Quantize bias to int32 if input min-max values are constants. +++ // This is guaranteed if the preceeding op is a fused requantize op. ++ bool is_fused_requantized_conv_op = ++- std::find(fused_requantized_bias_ops.begin(), ++- fused_requantized_bias_ops.end(), ++- node->op()) != fused_requantized_bias_ops.end(); +++ std::find(fused_requantized_bias_ops.begin(), +++ fused_requantized_bias_ops.end(), node->op()) +++ != fused_requantized_bias_ops.end(); ++ if (is_fused_requantized_conv_op) { ++- // If the op is not fed by Another Requantize op, ++- // then we coonvert bias as Int32 ++- string input_op = node_map[NodeNameFromInput(node->input(0))]->op(); ++- if (str_util::StartsWith(input_op, "QuantizedConv2D") && ++- str_util::EndsWith(input_op, "AndRequantize")) { +++ std::string preceeding_op = node_map[NodeNameFromInput( +++ node->input(0))]->op(); +++ if (str_util::StartsWith(preceeding_op, "Quantized") && +++ str_util::StrContains(preceeding_op, "Conv2D") && +++ str_util::EndsWith(preceeding_op, "AndRequantize")) { ++ NodeDef *bias_node = const_cast(node_map[NodeNameFromInput( ++- node->input(2))]); +++ node->input(2))]); ++ const NodeDef *min_input_node = node_map[NodeNameFromInput( ++ node_map[node->input(0)]->input(7))]; ++ const NodeDef *max_input_node = node_map[NodeNameFromInput( ++@@ -171,41 +327,50 @@ Status FuseQuantizedConvolutionAndRequantize( ++ GetNodeTensorAttr(*min_input_node, "value").flat()(0); ++ const float max_input = ++ GetNodeTensorAttr(*max_input_node, "value").flat()(0); ++- const float min_filter = ++- GetNodeTensorAttr(*min_filter_node, "value").flat()(0); ++- const float max_filter = ++- GetNodeTensorAttr(*max_filter_node, "value").flat()(0); ++- ++- TensorProto float_tensor_proto = bias_node->attr().at("value").tensor(); ++- Tensor float_tensor; ++- if(!float_tensor.FromProto(float_tensor_proto)) { ++- TF_RETURN_IF_ERROR(::tensorflow::errors::InvalidArgument( ++- "TensorProto object is not valid.")); ++- } ++- if (float_tensor.dtype() != DT_FLOAT) { ++- TF_RETURN_IF_ERROR(::tensorflow::errors::Unimplemented( ++- "Expected float tensor.")); ++- } ++- float *p_bias_float = float_tensor.flat().data(); +++ const Tensor& min_filter_tensor = +++ GetNodeTensorAttr(*min_filter_node, "value"); +++ const Tensor& max_filter_tensor = +++ GetNodeTensorAttr(*max_filter_node, "value"); +++ const float* min_filter = min_filter_tensor.flat().data(); +++ const float* max_filter = max_filter_tensor.flat().data(); +++ size_t num_scale_factors = min_filter_tensor.NumElements(); ++ ++- Tensor int32_tensor = Tensor(DT_QINT32, float_tensor.shape()); ++- qint32 *p_bias_int32 = int32_tensor.flat().data(); ++- ++- float bias_scale = 255.0 * 127.0 / ++- (std::max(std::abs(max_input), std::abs(min_input)) * ++- std::max(std::abs(max_filter), std::abs(min_filter))); ++- int64 nelems = float_tensor.NumElements(); ++- for (int64 n = 0; n < nelems; n++) ++- p_bias_int32[n] = (int32_t) (p_bias_float[n] * bias_scale); +++ TensorProto float_tensor_proto = +++ bias_node->attr().at("value").tensor(); +++ Tensor float_bias_tensor; +++ CHECK(float_bias_tensor.FromProto(float_tensor_proto)); +++ CHECK_EQ(float_bias_tensor.dtype(), DT_FLOAT); +++ float *float_bias = float_bias_tensor.flat().data(); ++ +++ Tensor int32_bias_tensor = Tensor(DT_QINT32, float_bias_tensor.shape()); +++ qint32 *int32_bias = int32_bias_tensor.flat().data(); +++ std::vector scales(num_scale_factors); +++ for (size_t i = 0; i < num_scale_factors; ++i) { +++ scales[i] = 255.0 * 127.0 / +++ (std::max(std::abs(max_input), std::abs(min_input)) * +++ std::max(std::abs(max_filter[i]), std::abs(min_filter[i]))); +++ } +++ int64 bias_length = float_bias_tensor.NumElements(); +++ if (num_scale_factors > 1) { +++ if (bias_length != num_scale_factors) +++ return Status(error::Code::FAILED_PRECONDITION, +++ "Number of filter output channels is not" +++ "equal to bias size"); +++ else { +++ for (int64 i = 0; i < bias_length; i++) +++ int32_bias[i] = (int32_t) (float_bias[i] * scales[i]); +++ } +++ } else { +++ for (int64 i = 0; i < bias_length; i++) +++ int32_bias[i] = (int32_t) (float_bias[i] * scales[0]); +++ } ++ bias_node->clear_attr(); ++ AttrValue attr_type; ++- attr_type.set_type(int32_tensor.dtype()); +++ attr_type.set_type(int32_bias_tensor.dtype()); ++ bias_node->mutable_attr()->insert({"dtype", attr_type}); ++- ++ AttrValue attr_tensor; ++ TensorProto* t = attr_tensor.mutable_tensor(); ++- int32_tensor.AsProtoTensorContent(t); +++ int32_bias_tensor.AsProtoTensorContent(t); ++ bias_node->mutable_attr()->insert({"value", attr_tensor}); ++ SetNodeAttr("Tbias", DT_QINT32, const_cast(node)); ++ } else { ++@@ -222,4 +387,4 @@ REGISTER_GRAPH_TRANSFORM("fuse_quantized_conv_and_requantize", ++ ++ } // namespace graph_transforms ++ } // namespace tensorflow ++-#endif // INTEL_MKL +++#endif // INTEL_MKL ++\ No newline at end of file +diff --git a/cluster/common.sh b/cluster/common.sh +old mode 100644 +new mode 100755 +diff --git a/cluster/minigui/run-local.sh b/cluster/minigui/run-local.sh +old mode 100755 +new mode 100644 +diff --git a/cluster/unset-common.sh b/cluster/unset-common.sh +old mode 100644 +new mode 100755 +diff --git a/cluster/utils.sh b/cluster/utils.sh +old mode 100644 +new mode 100755 +diff --git a/common.py b/common.py +new file mode 100644 +index 0000000..37516cf +--- /dev/null ++++ b/common.py +@@ -0,0 +1,31 @@ ++import os ++ ++class Config(): ++ def __init__(self, tf_root): ++ self.demo_dir = os.path.join(tf_root, 'demo') ++ self.demo_tmp_dir = os.path.join(tf_root, '../demo_tmp') ++ ++ self.pb_dir = os.path.join(self.demo_dir, 'pb') ++ if not os.path.exists(self.pb_dir): ++ os.makedirs(self.pb_dir) ++ self.fp32_optimized_graph = os.path.join(self.pb_dir, 'freezed_resnet50_opt.pb') ++ self.int8_graph = os.path.join(self.pb_dir, 'int8_resnet50.pb') ++ self.int8_graph_logged = os.path.join(self.pb_dir, 'int8_resnet50_logged.pb') ++ self.int8_graph_freese = os.path.join(self.pb_dir, 'int8_resnet50_freese.pb') ++ self.int8_graph_final = os.path.join(self.pb_dir, 'int8_resnet50_final.pb') ++ ++ self.accuracy_script = os.path.join(self.demo_dir, 'accuracy.py') ++ self.benchmark_script = os.path.join(self.demo_dir, 'benchmark.py') ++ self.quantize_script = os.path.join(self.demo_dir, 'quantize_graph.py') ++ ++ self.min_max_log = os.path.join(self.demo_dir, 'min_max.log') ++ ++ ++ input_names = 'input' ++ output_names = 'predict' ++ ++ def set_fp32_graph(self, pb): ++ self.fp32_original_graph = pb ++ ++ def set_dataset(self, ds): ++ self.imagenet_data = ds +diff --git a/dual_net.py b/dual_net.py +index edf946d..36dd3b8 100644 +--- a/dual_net.py ++++ b/dual_net.py +@@ -22,6 +22,7 @@ from absl import flags + import functools + import logging + import os.path ++import shutil + import time + import numpy as np + import random +@@ -36,6 +37,15 @@ import features as features_lib + import go + import symmetries + ++import horovod.tensorflow as hvd ++ ++from tensorflow.python.framework import dtypes ++from tensorflow.core.framework import graph_pb2 ++from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference ++from tensorflow.tools.graph_transforms import TransformGraph ++from ml_perf.utils import * ++ ++import quantize_graph + + flags.DEFINE_integer('train_batch_size', 256, + 'Batch size to use for train/eval evaluation. For GPU ' +@@ -120,6 +130,18 @@ flags.DEFINE_integer( + flags.DEFINE_integer( + 'keep_checkpoint_max', default=5, help='Number of checkpoints to keep.') + ++flags.DEFINE_integer( ++ 'num_inter_threads', default=0, ++ help=('Number of inter threads.')) ++ ++flags.DEFINE_integer( ++ 'num_intra_threads', default=0, ++ help=('Number of intra threads.')) ++ ++flags.DEFINE_bool( ++ 'dist_train', default=False, ++ help=('Using distributed training or not.')) ++ + flags.DEFINE_bool( + 'use_random_symmetry', True, + help='If true random symmetries be used when doing inference.') +@@ -157,7 +179,9 @@ class DualNetwork(): + self.save_file = save_file + self.inference_input = None + self.inference_output = None +- config = tf.ConfigProto() ++ config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) + config.gpu_options.allow_growth = True + self.sess = tf.Session(graph=tf.Graph(), config=config) + self.initialize_graph() +@@ -273,6 +297,8 @@ def model_fn(features, labels, mode, params): + + optimizer = tf.train.MomentumOptimizer( + learning_rate, params['sgd_momentum']) ++ if(params['dist_train']): ++ optimizer = hvd.DistributedOptimizer(optimizer) + if params['use_tpu']: + optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) + with tf.control_dependencies(update_ops): +@@ -376,6 +402,164 @@ def model_fn(features, labels, mode, params): + return tpu_estimator_spec.as_estimator_spec() + + ++def model_fn_new(features, labels, mode, params): ++ """ ++ Create the model for estimator api ++ Args: ++ features: tensor with shape ++ [BATCH_SIZE, go.N, go.N, features_lib.NEW_FEATURES_PLANES] ++ labels: dict from string to tensor with shape ++ 'pi_tensor': [BATCH_SIZE, go.N * go.N + 1] ++ 'value_tensor': [BATCH_SIZE] ++ mode: a tf.estimator.ModeKeys (batchnorm params update for TRAIN only) ++ params: A dictionary (Typically derived from the FLAGS object.) ++ Returns: tf.estimator.EstimatorSpec with props ++ mode: same as mode arg ++ predictions: dict of tensors ++ 'policy': [BATCH_SIZE, go.N * go.N + 1] ++ 'value': [BATCH_SIZE] ++ loss: a single value tensor ++ train_op: train op ++ eval_metric_ops ++ return dict of tensors ++ logits: [BATCH_SIZE, go.N * go.N + 1] ++ """ ++ ++ policy_output, value_output, logits = model_inference_fn( ++ features, mode == tf.estimator.ModeKeys.TRAIN, params) ++ ++ # train ops ++ policy_cost = tf.reduce_mean( ++ tf.nn.softmax_cross_entropy_with_logits_v2( ++ logits=logits, labels=tf.stop_gradient(labels['pi_tensor']))) ++ ++ value_cost = params['value_cost_weight'] * tf.reduce_mean( ++ tf.square(value_output - labels['value_tensor'])) ++ ++ reg_vars = [v for v in tf.trainable_variables() ++ if 'bias' not in v.name and 'beta' not in v.name] ++ l2_cost = params['l2_strength'] * \ ++ tf.add_n([tf.nn.l2_loss(v) for v in reg_vars]) ++ ++ combined_cost = policy_cost + value_cost + l2_cost ++ ++ global_step = tf.train.get_or_create_global_step() ++ learning_rate = tf.train.piecewise_constant( ++ global_step, params['lr_boundaries'], params['lr_rates']) ++ update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) ++ ++ # Insert quantization ops if requested ++ if params['quantize']: ++ if mode == tf.estimator.ModeKeys.TRAIN: ++ tf.contrib.quantize.create_training_graph( ++ quant_delay=params['quant_delay']) ++ else: ++ tf.contrib.quantize.create_eval_graph() ++ ++ optimizer = tf.train.MomentumOptimizer( ++ learning_rate, params['sgd_momentum']) ++ if(params['dist_train']): ++ optimizer = hvd.DistributedOptimizer(optimizer) ++ if params['use_tpu']: ++ optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) ++ with tf.control_dependencies(update_ops): ++ train_op = optimizer.minimize(combined_cost, global_step=global_step) ++ ++ # Computations to be executed on CPU, outside of the main TPU queues. ++ def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, ++ value_cost, l2_cost, combined_cost, step, ++ est_mode=tf.estimator.ModeKeys.TRAIN): ++ policy_entropy = -tf.reduce_mean(tf.reduce_sum( ++ policy_output * tf.log(policy_output), axis=1)) ++ # pi_tensor is one_hot when generated from sgfs (for supervised learning) ++ # and soft-max when using self-play records. argmax normalizes the two. ++ policy_target_top_1 = tf.argmax(pi_tensor, axis=1) ++ ++ policy_output_in_top1 = tf.to_float( ++ tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) ++ policy_output_in_top3 = tf.to_float( ++ tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) ++ ++ policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) ++ policy_target_top_1_confidence = tf.boolean_mask( ++ policy_output, ++ tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) ++ ++ value_cost_normalized = value_cost / params['value_cost_weight'] ++ ++ with tf.variable_scope("metrics"): ++ metric_ops = { ++ 'policy_cost': tf.metrics.mean(policy_cost), ++ 'value_cost': tf.metrics.mean(value_cost), ++ 'value_cost_normalized': tf.metrics.mean(value_cost_normalized), ++ 'l2_cost': tf.metrics.mean(l2_cost), ++ 'policy_entropy': tf.metrics.mean(policy_entropy), ++ 'combined_cost': tf.metrics.mean(combined_cost), ++ ++ 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), ++ 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), ++ 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), ++ 'policy_target_top_1_confidence': tf.metrics.mean( ++ policy_target_top_1_confidence), ++ 'value_confidence': tf.metrics.mean(tf.abs(value_output)), ++ } ++ ++ if est_mode == tf.estimator.ModeKeys.EVAL: ++ return metric_ops ++ ++ # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. ++ eval_step = tf.reduce_min(step) ++ ++ # Create summary ops so that they show up in SUMMARIES collection ++ # That way, they get logged automatically during training ++ summary_writer = summary.create_file_writer(FLAGS.work_dir) ++ with summary_writer.as_default(), \ ++ summary.record_summaries_every_n_global_steps( ++ params['summary_steps'], eval_step): ++ for metric_name, metric_op in metric_ops.items(): ++ summary.scalar(metric_name, metric_op[1], step=eval_step) ++ ++ # Reset metrics occasionally so that they are mean of recent batches. ++ reset_op = tf.variables_initializer(tf.local_variables("metrics")) ++ cond_reset_op = tf.cond( ++ tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), ++ lambda: reset_op, ++ lambda: tf.no_op()) ++ ++ return summary.all_summary_ops() + [cond_reset_op] ++ ++ metric_args = [ ++ policy_output, ++ value_output, ++ labels['pi_tensor'], ++ tf.reshape(policy_cost, [1]), ++ tf.reshape(value_cost, [1]), ++ tf.reshape(l2_cost, [1]), ++ tf.reshape(combined_cost, [1]), ++ tf.reshape(global_step, [1]), ++ ] ++ ++ predictions = { ++ 'policy_output': policy_output, ++ 'value_output': value_output, ++ } ++ ++ eval_metrics_only_fn = functools.partial( ++ eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.EVAL) ++ host_call_fn = functools.partial( ++ eval_metrics_host_call_fn, est_mode=tf.estimator.ModeKeys.TRAIN) ++ ++ tpu_estimator_spec = tpu_estimator.TPUEstimatorSpec( ++ mode=mode, ++ predictions=predictions, ++ loss=combined_cost, ++ train_op=train_op, ++ eval_metrics=(eval_metrics_only_fn, metric_args), ++ host_call=(host_call_fn, metric_args) ++ ) ++ return train_op ++ ++ + def model_inference_fn(features, training, params): + """Builds just the inference part of the model graph. + +@@ -428,7 +612,8 @@ def model_inference_fn(features, training, params): + + def mg_res_layer(inputs): + residual = residual_inner(inputs) +- output = mg_activation(inputs + residual) ++ fixed = tf.math.add_n([inputs, residual]) ++ output = mg_activation(fixed) + return output + + def mg_squeeze_excitation_layer(inputs): +@@ -538,15 +723,26 @@ def get_estimator(): + + + def _get_nontpu_estimator(): +- session_config = tf.ConfigProto() ++ session_config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) + session_config.gpu_options.allow_growth = True ++ model_dir = None ++ if(not FLAGS.dist_train) or (hvd.rank()==0): ++ model_dir = FLAGS.work_dir ++ step_count_steps = 50 ++ summary_steps = FLAGS.summary_steps ++ else: ++ step_count_steps = 1000000 ++ summary_steps = 1000000 + run_config = tf.estimator.RunConfig( +- save_summary_steps=FLAGS.summary_steps, ++ log_step_count_steps = step_count_steps, ++ save_summary_steps=summary_steps, + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + session_config=session_config) + return tf.estimator.Estimator( + model_fn, +- model_dir=FLAGS.work_dir, ++ model_dir=model_dir, + config=run_config, + params=FLAGS.flag_values_dict()) + +@@ -618,14 +814,97 @@ def export_model(model_path): + print("Copying {} to {}".format(filename, destination_path)) + tf.gfile.Copy(filename, destination_path) + ++def generate_min_max_log(log_graph_file, tf_records, log_file): ++ cmd = 'numactl -N 0 -l python3 produce_min_max_log.py' ++ cmd += ' --input_graph={0}'.format(log_graph_file) ++ cmd += ' --data_location={0}'.format(tf_records) ++ cmd += ' --num_steps={0}'.format(FLAGS.quantize_test_steps) ++ cmd += ' --batch_size={0}'.format(FLAGS.quantize_test_batch_size) ++ cmd += ' --random_rotation={0}'.format(FLAGS.random_rotation) ++ cmd += ' 2> {0}'.format(log_file) ++ print(cmd) ++ subprocess.call(cmd, shell=True) ++ ++def quantization(opt_graph, model_path, tf_records, eval_min_max_every_epoch): ++ # first_quantize ++ #rewriter = quantize_graph.GraphRewriter(opt_graph, 'eightbit', None, None, True, [], []) ++ rewriter = quantize_graph.GraphRewriter(opt_graph, 'eightbit', None, None, True) ++ first_quantize_graph = rewriter.rewrite(["policy_output", "value_output"]) ++ ++ if eval_min_max_every_epoch: ++ # insert_min_max_log ++ transform = 'insert_logging(op=RequantizationRange, show_name=true, message="__requant_min_max:")' ++ log_graph = TransformGraph(first_quantize_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ with tf.gfile.FastGFile(model_path + '_for_min_max.pb', 'wb') as f: ++ f.write(log_graph.SerializeToString()) ++ ++ # generate_min_max_log ++ with logged_timer('minmax time'): ++ generate_min_max_log(model_path + '_for_min_max.pb', tf_records, model_path + 'log.txt') ++ shutil.copy(model_path + 'log.txt', os.path.join(os.path.dirname(model_path), 'lastlog.txt')) ++ else: ++ print('min max skipped') ++ ++ # apply_calibration ++ if eval_min_max_every_epoch: ++ transform = 'freeze_requantization_ranges(min_max_log_file="{0}")'.format(model_path + 'log.txt') ++ else: ++ transform = 'freeze_requantization_ranges(min_max_log_file="{0}")'.format(os.path.join(os.path.dirname(model_path), 'lastlog.txt')) ++ calibration_graph = TransformGraph(first_quantize_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ ++ # fuse_requantize ++ transform = 'fuse_quantized_conv_and_requantize strip_unused_nodes' ++ output_graph = TransformGraph(calibration_graph, ["pos_tensor"], ++ ["policy_output", "value_output"], [transform]) ++ return output_graph ++ ++def optimize_graph(input_graph, model_path, quantizing_graph, tf_records, eval_min_max_every_epoch, freeze=False): ++ if freeze: ++ n = DualNetwork(model_path) ++ fp32_graph = tf.graph_util.convert_variables_to_constants( ++ n.sess, n.sess.graph.as_graph_def(), ["policy_output", "value_output"]) ++ else: ++ fp32_graph = graph_pb2.GraphDef() ++ with tf.gfile.Open(input_graph, "rb") as read_f: ++ weight = read_f.read() ++ fp32_graph.ParseFromString(weight) ++ ++ opt_graph = optimize_for_inference( ++ fp32_graph, ++ ["pos_tensor"], ++ ["policy_output", "value_output"], ++ dtypes.float32.as_datatype_enum, ++ False) ++ ++ if(quantizing_graph): ++ output_graph = quantization(opt_graph, model_path, tf_records, eval_min_max_every_epoch) ++ else: ++ output_graph = opt_graph ++ ++ with tf.gfile.GFile(model_path + '.pb', 'wb') as write_f: ++ write_f.write(output_graph.SerializeToString()) ++ ++def get_input_tensor(graph): ++ return graph.get_tensor_by_name('pos_tensor:0') ++def get_output_tensor(graph): ++ policy_output = graph.get_tensor_by_name('policy_output:0') ++ value_output = graph.get_tensor_by_name('value_output:0') ++ return policy_output, value_output + + def freeze_graph(model_path): + n = DualNetwork(model_path) + out_graph = tf.graph_util.convert_variables_to_constants( + n.sess, n.sess.graph.as_graph_def(), ["policy_output", "value_output"]) ++ output_graph_def = optimize_for_inference( ++ out_graph, ++ ["pos_tensor"], ++ ["policy_output", "value_output"], ++ dtypes.float32.as_datatype_enum, ++ False) + with tf.gfile.GFile(model_path + '.pb', 'wb') as f: +- f.write(out_graph.SerializeToString()) +- ++ f.write(output_graph_def.SerializeToString()) + + def freeze_graph_tpu(model_path): + """Custom freeze_graph implementation for Cloud TPU.""" +diff --git a/minigui/minigui-common.sh b/minigui/minigui-common.sh +old mode 100644 +new mode 100755 +diff --git a/minigui/unset-minigui-common.sh b/minigui/unset-minigui-common.sh +old mode 100644 +new mode 100755 +diff --git a/ml_perf/divide_golden_chunk.py b/ml_perf/divide_golden_chunk.py +new file mode 100644 +index 0000000..02e6e33 +--- /dev/null ++++ b/ml_perf/divide_golden_chunk.py +@@ -0,0 +1,73 @@ ++# Copyright 2018 Google LLC ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++import os ++import random ++import functools ++import shutil ++ ++import numpy as np ++import tensorflow as tf ++import threading ++ ++from mpi4py import MPI ++from absl import app, flags ++from rl_loop import example_buffer ++ ++flags.DEFINE_string('read_path', '/tmp/minigo', ++ 'Path to the read origin data.') ++ ++flags.DEFINE_string('write_path', '/tmp/minigo/output', ++ 'Path to the read origin data.') ++ ++flags.DEFINE_integer('out_files_number', 2, ++ 'Num of files to produce.') ++ ++flags.DEFINE_integer('physical_cores', 56, ++ 'Num of cores.') ++ ++flags.DEFINE_integer('seed', 0, ++ 'Random seed.') ++ ++FLAGS = flags.FLAGS ++ ++ ++def main(unused_argv): ++ mpi_comm = MPI.COMM_WORLD ++ mpi_rank = mpi_comm.Get_rank() ++ mpi_size = mpi_comm.Get_size() ++ # avoid seed out of range ++ random.seed(FLAGS.seed % 1048576) ++ tf.set_random_seed(FLAGS.seed % 1048576) ++ np.random.seed(FLAGS.seed % 1048576) ++ ++ pattern = os.path.join(FLAGS.read_path, '*.zz') ++ files = tf.gfile.Glob(pattern) ++ ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ example_num = buffer.parallel_fill(files, threads=FLAGS.physical_cores) ++ # make sure all nodes generate same number of examples ++ example_num = int(mpi_comm.allreduce(example_num, op=MPI.MIN)) ++ buffer.flush_new(FLAGS.write_path+'_{}'.format(mpi_rank), example_num, FLAGS.out_files_number, threads=1) ++ ++ shutil.rmtree('/tmp/minigo/home', ignore_errors=True) ++ ++if __name__ == '__main__': ++ app.run(main) ++ ++ ++ ++ ++ ++ +diff --git a/ml_perf/eval_models.py b/ml_perf/eval_models.py +index 74702e4..b552f42 100644 +--- a/ml_perf/eval_models.py ++++ b/ml_perf/eval_models.py +@@ -23,7 +23,7 @@ import os + from absl import app + from reference_implementation import evaluate_model, wait + from rl_loop import fsdb +- ++import ml_perf.mlp_log as mll + + def load_train_times(): + models = [] +@@ -43,10 +43,17 @@ def main(unused_argv): + target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') + models = load_train_times() + for i, (timestamp, name, path) in enumerate(models): ++ mll.eval_start(i) + winrate = wait(evaluate_model(path, target, sgf_dir, i + 1)) ++ mll.eval_stop(i) ++ mll.eval_accuracy(i, winrate) + if winrate >= 0.50: + print('Model {} beat target after {}s'.format(name, timestamp)) +- break ++ mll.eval_result(i, timestamp) ++ mll.run_stop('success') ++ return ++ mll.eval_result(i, 0) ++ mll.run_stop('aborted') + + + if __name__ == '__main__': +diff --git a/ml_perf/execute.py b/ml_perf/execute.py +new file mode 100644 +index 0000000..00a6bed +--- /dev/null ++++ b/ml_perf/execute.py +@@ -0,0 +1,69 @@ ++# Copyright 2019 Google LLC ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++"""Run the command in multi-instance mode ++ ++If there is a --seed parameter from input, change seed to generate randomness among instances ++ ++Args: ++ num_instance: the number of instance needed to start ++""" ++ ++import sys ++sys.path.insert(0, '.') # nopep8 ++ ++import asyncio ++from ml_perf.utils import * ++ ++from absl import app, flags ++ ++flags.DEFINE_integer('num_instance', 1, 'Number of instances for selfplay') ++ ++FLAGS = flags.FLAGS ++ ++# Self-play a number of games. ++async def do_execute_mi(): ++ ++ num_instance = FLAGS.num_instance ++ ++ start_copy = False ++ arg_list = [] ++ for arg in sys.argv: ++ if start_copy: ++ arg_list.append(arg) ++ if arg == '--': ++ start_copy = True ++ ++ if num_instance > 1: ++ result_list = checked_run_mi( ++ num_instance, ++ *arg_list ++ ) ++ for result in result_list: ++ # TODO needs to be more generic ++ print ('\n'.join(result.split('\n')[-7:])) ++ else: ++ result = checked_run( ++ *arg_list ++ ) ++ print (result) ++ ++def main(unused_argv): ++ try: ++ wait(do_execute_mi()) ++ finally: ++ asyncio.get_event_loop().close() ++ ++if __name__ == '__main__': ++ app.run(main) +diff --git a/ml_perf/flags/9.mn/architecture.flags b/ml_perf/flags/9.mn/architecture.flags +new file mode 100644 +index 0000000..ec2abf4 +--- /dev/null ++++ b/ml_perf/flags/9.mn/architecture.flags +@@ -0,0 +1,7 @@ ++# architecture.flags: Flags that control the model architecture. ++ ++--conv_width=32 ++--fc_width=64 ++--trunk_layers=9 ++--value_cost_weight=0.25 ++--summary_steps=64 +diff --git a/ml_perf/flags/9.mn/bootstrap.flags b/ml_perf/flags/9.mn/bootstrap.flags +new file mode 100644 +index 0000000..0283a92 +--- /dev/null ++++ b/ml_perf/flags/9.mn/bootstrap.flags +@@ -0,0 +1,9 @@ ++# bootstrap.flags ++# Flags for the first bootstrap round of selfplay. ++ ++--flagfile=ml_perf/flags/9.mn/selfplay.flags ++ ++# Don't perform holdout for the first bootstrap round. ++--holdout_pct=0 ++ ++--num_readouts=20 +diff --git a/ml_perf/flags/9.mn/bootstrap_mi.flags b/ml_perf/flags/9.mn/bootstrap_mi.flags +new file mode 100644 +index 0000000..f4da7c1 +--- /dev/null ++++ b/ml_perf/flags/9.mn/bootstrap_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=8192 ++--parallel_games=4 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/eval.flags b/ml_perf/flags/9.mn/eval.flags +new file mode 100644 +index 0000000..f07d715 +--- /dev/null ++++ b/ml_perf/flags/9.mn/eval.flags +@@ -0,0 +1,6 @@ ++# eval.flags: Flags for playing eval games. ++ ++--flagfile=ml_perf/flags/9.mn/selfplay.flags ++ ++# Play fewer games for eval than selfplay. ++--parallel_games=1 +diff --git a/ml_perf/flags/9.mn/eval_mi.flags b/ml_perf/flags/9.mn/eval_mi.flags +new file mode 100644 +index 0000000..00e960c +--- /dev/null ++++ b/ml_perf/flags/9.mn/eval_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=100 ++--parallel_games=1 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/rl_loop.flags b/ml_perf/flags/9.mn/rl_loop.flags +new file mode 100644 +index 0000000..c5f2b23 +--- /dev/null ++++ b/ml_perf/flags/9.mn/rl_loop.flags +@@ -0,0 +1,15 @@ ++# rl_loop.flags: Flags for the reinforcement learning loop. ++ ++--flags_dir=ml_perf/flags/9.mn/ ++--checkpoint_dir=ml_perf/checkpoint/9/ ++ ++--iterations=30 ++--gating_win_rate=0.49 ++--window_size=10 ++--engine=tf ++--parallel_post_train=3 ++--train_instance_per_numa=2 ++--eval_min_max_every_epoch=True ++--quantize_test_steps=1 ++--quantize_test_batch_size=80 ++ +diff --git a/ml_perf/flags/9.mn/selfplay.flags b/ml_perf/flags/9.mn/selfplay.flags +new file mode 100644 +index 0000000..a1d5815 +--- /dev/null ++++ b/ml_perf/flags/9.mn/selfplay.flags +@@ -0,0 +1,14 @@ ++# selfplay.flags: Flags for selfplay. ++ ++# This flagfile also serves as the base for the boostrap & eval stages of ++# the RL loop. ++ ++--num_readouts=240 ++--value_init_penalty=0.2 ++--holdout_pct=0.03 ++--disable_resign_pct=0.1 ++--resign_threshold=-0.99 ++ ++# Device-specific selfplay flags. ++--parallel_games=1 ++--virtual_losses=8 +diff --git a/ml_perf/flags/9.mn/selfplay_mi.flags b/ml_perf/flags/9.mn/selfplay_mi.flags +new file mode 100644 +index 0000000..4c63d58 +--- /dev/null ++++ b/ml_perf/flags/9.mn/selfplay_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=4096 ++--parallel_games=1 ++--multi_instance=True +diff --git a/ml_perf/flags/9.mn/train.flags b/ml_perf/flags/9.mn/train.flags +new file mode 100644 +index 0000000..19f8b9d +--- /dev/null ++++ b/ml_perf/flags/9.mn/train.flags +@@ -0,0 +1,15 @@ ++# train.flags: Flags for training. ++ ++--flagfile=ml_perf/flags/9.mn/architecture.flags ++ ++--shuffle_buffer_size=10000 ++--filter_amount=0.5 ++ ++# Device specific hyperparameters re: batch size and LR schedules. ++--train_batch_size=8192 ++--lr_rates=0.32 ++--lr_rates=0.032 ++--lr_rates=0.0032 ++--lr_boundaries=12500 ++--lr_boundaries=18750 ++--l2_strength=0.0001 +diff --git a/ml_perf/flags/9.mn/validate.flags b/ml_perf/flags/9.mn/validate.flags +new file mode 100644 +index 0000000..de4f22d +--- /dev/null ++++ b/ml_perf/flags/9.mn/validate.flags +@@ -0,0 +1,7 @@ ++# validate.flags Flags for validation. ++ ++--flagfile=ml_perf/flags/9.mn/architecture.flags ++ ++--examples_to_validate=256 ++--train_batch_size=64 ++--summary_steps=2 +diff --git a/ml_perf/flags/9/bootstrap.flags b/ml_perf/flags/9/bootstrap.flags +index 4e7341e..29c66d6 100644 +--- a/ml_perf/flags/9/bootstrap.flags ++++ b/ml_perf/flags/9/bootstrap.flags +@@ -6,5 +6,4 @@ + # Don't perform holdout for the first bootstrap round. + --holdout_pct=0 + +---num_games=8192 + --num_readouts=20 +diff --git a/ml_perf/flags/9/bootstrap_mi.flags b/ml_perf/flags/9/bootstrap_mi.flags +new file mode 100644 +index 0000000..f4da7c1 +--- /dev/null ++++ b/ml_perf/flags/9/bootstrap_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=8192 ++--parallel_games=4 ++--multi_instance=True +diff --git a/ml_perf/flags/9/eval.flags b/ml_perf/flags/9/eval.flags +index aecf855..9f8759e 100644 +--- a/ml_perf/flags/9/eval.flags ++++ b/ml_perf/flags/9/eval.flags +@@ -3,5 +3,4 @@ + --flagfile=ml_perf/flags/9/selfplay.flags + + # Play fewer games for eval than selfplay. +---num_games=100 +---parallel_games=100 ++--parallel_games=1 +diff --git a/ml_perf/flags/9/eval_mi.flags b/ml_perf/flags/9/eval_mi.flags +new file mode 100644 +index 0000000..00e960c +--- /dev/null ++++ b/ml_perf/flags/9/eval_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=100 ++--parallel_games=1 ++--multi_instance=True +diff --git a/ml_perf/flags/9/rl_loop.flags b/ml_perf/flags/9/rl_loop.flags +index c6b6dc2..4b8dc29 100644 +--- a/ml_perf/flags/9/rl_loop.flags ++++ b/ml_perf/flags/9/rl_loop.flags +@@ -3,8 +3,8 @@ + --flags_dir=ml_perf/flags/9/ + --checkpoint_dir=ml_perf/checkpoint/9/ + +---iterations=50 ++--iterations=30 + --gating_win_rate=0.49 + --window_size=10 + --engine=tf +---parallel_post_train=true ++--train_instance_per_numa=2 +diff --git a/ml_perf/flags/9/selfplay.flags b/ml_perf/flags/9/selfplay.flags +index 3d8d64c..5164768 100644 +--- a/ml_perf/flags/9/selfplay.flags ++++ b/ml_perf/flags/9/selfplay.flags +@@ -3,7 +3,6 @@ + # This flagfile also serves as the base for the boostrap & eval stages of + # the RL loop. + +---num_games=4096 + --num_readouts=240 + --value_init_penalty=0.2 + --holdout_pct=0.03 +@@ -11,5 +10,5 @@ + --resign_threshold=-0.99 + + # Device-specific selfplay flags. +---parallel_games=2048 ++--parallel_games=16 + --virtual_losses=8 +diff --git a/ml_perf/flags/9/selfplay_mi.flags b/ml_perf/flags/9/selfplay_mi.flags +new file mode 100644 +index 0000000..7b30dc8 +--- /dev/null ++++ b/ml_perf/flags/9/selfplay_mi.flags +@@ -0,0 +1,3 @@ ++--num_games=4096 ++--parallel_games=16 ++--multi_instance=True +diff --git a/ml_perf/flags/9/train.flags b/ml_perf/flags/9/train.flags +index aa1a3cf..a65044d 100644 +--- a/ml_perf/flags/9/train.flags ++++ b/ml_perf/flags/9/train.flags +@@ -6,10 +6,10 @@ + --filter_amount=0.5 + + # Device specific hyperparameters re: batch size and LR schedules. +---train_batch_size=4096 +---lr_rates=0.16 +---lr_rates=0.016 +---lr_rates=0.0016 +---lr_boundaries=25000 +---lr_boundaries=37500 ++--train_batch_size=8192 ++--lr_rates=0.32 ++--lr_rates=0.032 ++--lr_rates=0.0032 ++--lr_boundaries=12500 ++--lr_boundaries=18750 + --l2_strength=0.0001 +diff --git a/ml_perf/hostlist.sh b/ml_perf/hostlist.sh +new file mode 100755 +index 0000000..94465f2 +--- /dev/null ++++ b/ml_perf/hostlist.sh +@@ -0,0 +1,3 @@ ++# generate a list of host ip or hostname ++# one ip/hostname per line ++cat $HOSTLIST.txt +diff --git a/ml_perf/mlp_log.py b/ml_perf/mlp_log.py +new file mode 100644 +index 0000000..501baf1 +--- /dev/null ++++ b/ml_perf/mlp_log.py +@@ -0,0 +1,118 @@ ++# Copyright 2019 MLBenchmark Group. All Rights Reserved. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ============================================================================== ++ ++"""Utilities for compliance logging.""" ++ ++import logging ++import time ++import inspect ++import sys ++ ++def init_start(): ++ log('init_start', caller_depth=3) ++ ++def init_stop(): ++ log('init_stop', caller_depth=3) ++ ++def run_start(): ++ log('run_start', caller_depth=3) ++ ++def run_stop(status): ++ assert status == 'success' or status == 'aborted' ++ log('run_stop', ++ meta_data = {'status':status}, ++ caller_depth=3) ++ ++def block_start(epoch, count): ++ log('block_start', ++ meta_data = {'first_epoch_num':epoch, ++ 'epoch_count':count}, ++ caller_depth=3) ++ ++def block_stop(epoch): ++ log('block_stop', ++ meta_data = {'first_epoch_num':epoch}, ++ caller_depth=3) ++ ++def epoch_start(epoch): ++ log('epoch_start', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def epoch_stop(epoch): ++ log('epoch_stop', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_start(epoch): ++ log('eval_start', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_stop(epoch): ++ log('eval_stop', ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def eval_accuracy(epoch, accuracy): ++ log('eval_accuracy', ++ val = '{}'.format(accuracy), ++ meta_data = {'epoch_num':epoch}, ++ caller_depth=3) ++ ++def global_batch_size(batch_size): ++ log('global_batch_size', ++ val = '{}'.format(batch_size), ++ caller_depth=3) ++ ++def lr_rates(rates): ++ log('opt_base_learning_rate', ++ val = '{}'.format(rates), ++ caller_depth=3) ++ ++def lr_boundaries(boundaries): ++ log('opt_learning_rate_decay_boundary_steps', ++ val = '{}'.format(boundaries), ++ caller_depth=3) ++ ++def save_model(iteration): ++ log('save_model', ++ meta_data = {'iteration':iteration}, ++ caller_depth=3) ++ ++def eval_result(iteration, timestamp): ++ log('eval_result', ++ meta_data = {'iteration':iteration, 'timestamp':timestamp}, ++ caller_depth=3) ++ ++def log(key, val='null', meta_data = None, caller_depth=2): ++ filename, lineno = get_caller(caller_depth) ++ meta_dict = {'lineno': lineno, 'file': filename} ++ if meta_data != None: ++ meta_dict.update(meta_data) ++ meta_string = '{}'.format(meta_dict) ++ print(':::MLL %f %s: {"value": %s, "metadata": %s}'%(time.time(), key, val, meta_string), file=sys.stderr) ++ ++def get_caller(stack_index=2, root_dir=None): ++ ''' Returns file.py:lineno of your caller. A stack_index of 2 will provide ++ the caller of the function calling this function. Notice that stack_index ++ of 2 or more will fail if called from global scope. ''' ++ caller = inspect.getframeinfo(inspect.stack()[stack_index][0]) ++ ++ # Trim the filenames for readability. ++ filename = caller.filename ++ if root_dir is not None: ++ filename = re.sub("^" + root_dir + "/", "", filename) ++ return (filename, caller.lineno) +diff --git a/ml_perf/reference_implementation.py b/ml_perf/reference_implementation.py +index 1ca724e..c3c9489 100644 +--- a/ml_perf/reference_implementation.py ++++ b/ml_perf/reference_implementation.py +@@ -26,17 +26,27 @@ import random + import re + import shutil + import subprocess ++import functools + import tensorflow as tf + import time ++import copy ++import multiprocessing as mp + from ml_perf.utils import * ++import ml_perf.mlp_log as mll ++ ++from fractions import gcd + + from absl import app, flags + from rl_loop import example_buffer, fsdb +-from tensorflow import gfile ++import dual_net ++ ++from tensorflow.python.platform import gfile ++ ++import socket + + N = int(os.environ.get('BOARD_SIZE', 19)) + +-flags.DEFINE_string('checkpoint_dir', 'ml_perf/checkpoint/{}'.format(N), ++flags.DEFINE_string('checkpoint_dir', None, + 'The checkpoint directory specify a start model and a set ' + 'of golden chunks used to start training. If not ' + 'specified, will start from scratch.') +@@ -58,15 +68,35 @@ flags.DEFINE_string('flags_dir', None, + + flags.DEFINE_integer('window_size', 10, + 'Maximum number of recent selfplay rounds to train on.') ++flags.DEFINE_integer('golden_chunk_split', 2, ++ 'Golden chunk of each selfplay is splited to accelerate write golden chunk') + +-flags.DEFINE_boolean('parallel_post_train', False, +- 'If true, run the post-training stages (eval, validation ' +- '& selfplay) in parallel.') ++flags.DEFINE_integer('parallel_post_train', 0, ++ '0: run the post-training stages in serial mode' ++ '1: run the post-training stages (eval, validation ' ++ '& selfplay) in parallel.' ++ '2: run the post-train stage in pipeline mode.') + + flags.DEFINE_string('engine', 'tf', 'The engine to use for selfplay.') + +-FLAGS = flags.FLAGS ++flags.DEFINE_integer('physical_cores', None, 'The number of cores for each node.') ++flags.DEFINE_integer('virtual_cores', None, 'The number of SMT for each node.') ++flags.DEFINE_integer('numa_cores', None, 'The number of core for each numa node.') ++flags.DEFINE_integer('train_instance_per_numa', 2, 'The number of instance for each numa node.') ++ ++flags.DEFINE_bool('setup_train_workers', False, 'True if setting up train workers.') ++ ++flags.DEFINE_multi_string('train_node', [], 'The node:core list for training') ++flags.DEFINE_multi_string('eval_node', [], 'The node list for evaluation') ++flags.DEFINE_multi_string('selfplay_node', [], 'The node list for selfplay.') + ++flags.DEFINE_bool('quantization', True, 'Using Int8 if true.') ++flags.DEFINE_bool('eval_min_max_every_epoch', True, 'Genereting min max log every epoch if true.') ++flags.DEFINE_boolean('random_rotation', True, 'Do random rotation when running for min&max log.') ++flags.DEFINE_integer('quantize_test_steps', 5, 'The steps to run for min&max log.') ++flags.DEFINE_integer('quantize_test_batch_size', 16, 'The batch size for running inference for min&max log.') ++ ++FLAGS = flags.FLAGS + + class State: + """State data used in each iteration of the RL loop. +@@ -133,17 +163,15 @@ class WinStats: + pattern = '\s*(\S+)' + '\s+(\d+)' * 8 + match = re.search(pattern, line) + if match is None: +- raise ValueError('Can\t parse line "{}"'.format(line)) ++ raise ValueError('Can\'t parse line "{}"'.format(line)) + self.model_name = match.group(1) + raw_stats = [float(x) for x in match.groups()[1:]] + self.black_wins = ColorWinStats(*raw_stats[:4]) + self.white_wins = ColorWinStats(*raw_stats[4:]) + self.total_wins = self.black_wins.total + self.white_wins.total + +- +-def initialize_from_checkpoint(state): ++def initialize_from_checkpoint(state, out_files_number): + """Initialize the reinforcement learning loop from a checkpoint.""" +- + # The checkpoint's work_dir should contain the most recently trained model. + model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, + 'work_dir/model.ckpt-*.pb')) +@@ -152,17 +180,20 @@ def initialize_from_checkpoint(state): + 'got [{}]'.format(', '.join(model_paths))) + start_model_path = model_paths[0] + ++ golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') ++ for basename in os.listdir(golden_chunks_dir): ++ path = os.path.join(golden_chunks_dir, basename) ++ out_path = os.path.join(fsdb.golden_chunk_dir(), basename) ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ example_num = buffer.parallel_fill(tf.gfile.Glob(path),FLAGS.physical_cores) ++ buffer.flush_new(out_path, example_num, out_files_number, 1)# FLAGS.physical_cores) ++ + # Copy the latest trained model into the models directory and use it on the + # first round of selfplay. + state.best_model_name = 'checkpoint' +- shutil.copy(start_model_path, +- os.path.join(fsdb.models_dir(), state.best_model_name + '.pb')) ++ best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) + +- # Copy the training chunks. +- golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') +- for basename in os.listdir(golden_chunks_dir): +- path = os.path.join(golden_chunks_dir, basename) +- shutil.copy(path, fsdb.golden_chunk_dir()) ++ dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz*', FLAGS.eval_min_max_every_epoch) + + # Copy the training files. + work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') +@@ -174,19 +205,68 @@ def initialize_from_checkpoint(state): + def parse_win_stats_table(stats_str, num_lines): + result = [] + lines = stats_str.split('\n') +- while True: +- # Find the start of the win stats table. +- assert len(lines) > 1 +- if 'Black' in lines[0] and 'White' in lines[0] and 'm.lmt.' in lines[1]: +- break +- lines = lines[1:] + +- # Parse the expected number of lines from the table. +- for line in lines[2:2 + num_lines]: +- result.append(WinStats(line)) +- +- return result ++ while True: ++ while True: ++ # Find the start of the win stats table. ++ if len(lines) == 0: ++ return result ++ if 'Black' in lines[0] and 'White' in lines[0] and 'm.lmt.' in lines[1]: ++ break ++ lines = lines[1:] ++ ++ # Parse the expected number of lines from the table. ++ for line in lines[2:2 + num_lines]: ++ stat = WinStats(line) ++ for s in result: ++ if s.model_name == stat.model_name: ++ s.black_wins.total += stat.black_wins.total ++ s.white_wins.total += stat.white_wins.total ++ s.total_wins += stat.total_wins ++ stat = None ++ break ++ if stat != None: ++ result.append(stat) ++ lines = lines[2 + num_lines:] ++ ++def extract_multi_instance(cmd): ++ cmd_list = flags.FlagValues().read_flags_from_files(cmd) ++ new_cmd_list = [] ++ multi_instance = False ++ num_instance = 0 ++ num_games = 0 ++ parallel_games = 0 ++ ++ for arg in cmd_list: ++ argsplit = arg.split('=', 1) ++ flag = argsplit[0] ++ if flag == '--multi_instance': ++ if argsplit[1] == 'True': ++ multi_instance = True ++ else: ++ multi_instance = False ++ elif flag == '--num_games': ++ num_games = int(argsplit[1]) ++ elif flag == '--parallel_games': ++ parallel_games = int(argsplit[1]) ++ ++ if multi_instance: ++ if num_games % parallel_games != 0: ++ logging.error('Error num_games must be multiply of %d', parallel_games) ++ raise RuntimeError('incompatible num_games/parallel_games combination') ++ num_instance = num_games//parallel_games ++ ++ for arg in cmd_list: ++ argsplit = arg.split('=', 1) ++ flag = argsplit[0] ++ if flag == '--multi_instance': ++ pass ++ elif multi_instance and flag == '--num_games': ++ pass ++ else: ++ new_cmd_list.append(arg) + ++ return multi_instance, num_instance, new_cmd_list + + async def run(*cmd): + """Run the given subprocess command in a coroutine. +@@ -214,20 +294,55 @@ async def run(*cmd): + # Split stdout into lines. + return stdout.split('\n') + ++async def run_distributed(genvs, num_instance, hosts, proclists, numa_nodes, ++ seed, *cmd): ++ """Run the given subprocess command in a coroutine. + +-def get_golden_chunk_records(): ++ Args: ++ *cmd: the command to run and its arguments. ++ ++ Returns: ++ The output that the command wrote to stdout as a list of strings, one line ++ per element (stderr output is piped to stdout). ++ ++ Raises: ++ RuntimeError: if the command returns a non-zero result. ++ """ ++ ++ stdout = await checked_run_distributed(genvs, num_instance, hosts, proclists, ++ numa_nodes, seed, fsdb.mpi_log_dir(), *cmd) ++ ++ log_path = os.path.join(FLAGS.base_dir, get_cmd_name(cmd) + '.log') ++ with gfile.Open(log_path, 'a') as f: ++ f.write(expand_cmd_str(cmd)) ++ f.write('\n') ++ f.write(stdout) ++ f.write('\n') ++ ++ # Split stdout into lines. ++ return stdout.split('\n') ++ ++def get_golden_chunk_records(window_size): + """Return up to num_records of golden chunks to train on. + + Returns: + A list of golden chunks up to num_records in length, sorted by path. + """ + +- pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') +- return sorted(tf.gfile.Glob(pattern), reverse=True)[:FLAGS.window_size] ++ pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz*') ++ #if window_size > FLAGS.golden_chunk_split * FLAGS.window_size: ++ # window_size = FLAGS.golden_chunk_split * FLAGS.window_size ++ return sorted(tf.gfile.Glob(pattern), reverse=True)[:window_size] ++ + ++def gen_golden_chunk(files, state): ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ buffer.parallel_fill(files[1], threads=1) ++ buffer.flush(os.path.join(fsdb.golden_chunk_dir(), ++ state.output_model_name + '-{}.tfrecord.zz'.format(files[0]))) + + # Self-play a number of games. +-async def selfplay(state, flagfile='selfplay'): ++async def selfplay(state, flagfile='selfplay', post=True): + """Run selfplay and write a training chunk to the fsdb golden_chunk_dir. + + Args: +@@ -235,43 +350,92 @@ async def selfplay(state, flagfile='selfplay'): + flagfile: the name of the flagfile to use for selfplay, either 'selfplay' + (the default) or 'boostrap'. + """ +- + output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) + holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) ++ output_dir = '/tmp/minigo' + output_dir ++ ++ multi_instance, num_instance, flag_list = extract_multi_instance( ++ ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) ++ sp_cmd = ['bazel-bin/cc/selfplay', ++ '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), ++ '--model={}'.format(state.best_model_path), ++ '--output_dir={}'.format(output_dir), ++ '--holdout_dir={}'.format(holdout_dir)] ++ if not multi_instance: ++ lines = await run( ++ *sp_cmd, ++ '--seed={}'.format(state.seed)) ++ else: ++ if FLAGS.selfplay_node == []: ++ # run selfplay locally ++ lines = await run( ++ 'python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(num_instance), ++ '--', ++ *sp_cmd, ++ '--seed={}'.format(state.seed)) ++ else: ++ with logged_timer('selfplay mn'): ++ # run one selfplay instance per host ++ lines = await run_distributed( ++ ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], ++ num_instance, FLAGS.selfplay_node, None, None, state.seed, ++ *sp_cmd) ++ ++ result = '\n'.join(lines) ++ bias = 0.0 ++ #with logged_timer('parse win stats'): ++ # stats = parse_win_stats_table(result, 1)[0] ++ # num_games = stats.total_wins ++ # black_total = stats.black_wins.total ++ # white_total = stats.white_wins.total ++ ++ # logging.info('Black won %0.3f, white won %0.3f', ++ # black_total / num_games, ++ # white_total / num_games) ++ # bias = abs(white_total - black_total)/num_games ++ # logging.info('Black total %d, white total %d, total games %d, bias %0.3f.', ++ # black_total, white_total, num_games, bias) ++ ++ if post: ++ await post_selfplay(state) ++ ++ return bias ++ ++# pack records into golden chunks ++async def post_selfplay(state, clean=False): ++ output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) ++ output_dir = '/tmp/minigo' + output_dir + +- lines = await run( +- 'bazel-bin/cc/selfplay', +- '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), +- '--model={}'.format(state.best_model_path), +- '--output_dir={}'.format(output_dir), +- '--holdout_dir={}'.format(holdout_dir), +- '--seed={}'.format(state.seed)) +- result = '\n'.join(lines[-6:]) +- logging.info(result) +- stats = parse_win_stats_table(result, 1)[0] +- num_games = stats.total_wins +- logging.info('Black won %0.3f, white won %0.3f', +- stats.black_wins.total / num_games, +- stats.white_wins.total / num_games) +- +- # Write examples to a single record. +- pattern = os.path.join(output_dir, '*', '*.zz') +- random.seed(state.seed) +- tf.set_random_seed(state.seed) +- np.random.seed(state.seed) +- # TODO(tommadams): This method of generating one golden chunk per generation +- # is sub-optimal because each chunk gets reused multiple times for training, +- # introducing bias. Instead, a fresh dataset should be uniformly sampled out +- # of *all* games in the training window before the start of each training run. +- buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) +- +- # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not +- # so. +- logging.info('Writing golden chunk from "{}"'.format(pattern)) +- buffer.parallel_fill(tf.gfile.Glob(pattern)) +- buffer.flush(os.path.join(fsdb.golden_chunk_dir(), +- state.output_model_name + '.tfrecord.zz')) ++ if clean: ++ hosts = FLAGS.selfplay_node ++ if hosts == []: ++ hosts = ['localhost'] ++ cmd = ['rm', '-rf', '/tmp/minigo'] ++ lines = await run_distributed([], 1, hosts, None, None, 0, *cmd) + ++ else: ++ with logged_timer('generate golden chunk'): ++ # Write examples to a single record. ++ hosts = FLAGS.selfplay_node ++ if hosts == []: ++ hosts = ['localhost'] ++ num_instance = len(hosts) ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ train_instance_num = FLAGS.train_instance_per_numa * len(FLAGS.train_node) * numa_per_node ++ selfplay_node_num = len(hosts) ++ selfplay_num = selfplay_node_num ++ out_files_number = int(train_instance_num/gcd(train_instance_num, selfplay_num)) ++ ++ cmd = ['python3', 'ml_perf/divide_golden_chunk.py', ++ '--read_path={}'.format(output_dir + "/*"), ++ '--write_path={}'.format(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz')), ++ '--out_files_number={}'.format(out_files_number), ++ '--physical_cores={}'.format(FLAGS.physical_cores), ++ '--base_dir={}'.format(FLAGS.base_dir)] ++ lines = await run_distributed([], 1, hosts, None, None, state.seed, *cmd) ++ ++ print(lines) + + async def train(state, tf_records): + """Run training and write a new model to the fsdb models_dir. +@@ -280,15 +444,66 @@ async def train(state, tf_records): + state: the RL loop State instance. + tf_records: a list of paths to TensorFlow records to train on. + """ ++ train_node = FLAGS.train_node ++ num_node = len(train_node) ++ if num_node == 0: ++ dist_train = False ++ else: ++ dist_train = True ++ ++ if dist_train: ++ intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1 ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ instance_per_node = numa_per_node * FLAGS.train_instance_per_numa ++ ++ mpi_async_progress = '' ++ for i in range(numa_per_node): ++ for j in range(FLAGS.train_instance_per_numa): ++ if (not i==0) or (not j==0): ++ mpi_async_progress += ',' ++ mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j) ++ else: ++ intra_threads = FLAGS.physical_cores + + model_path = os.path.join(fsdb.models_dir(), state.train_model_name) +- await run( +- 'python3', 'train.py', *tf_records, ++ cmd = ['python3', 'train.py', + '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), + '--work_dir={}'.format(fsdb.working_dir()), + '--export_path={}'.format(model_path), ++ '--window_size={}'.format(FLAGS.window_size), ++ '--data_path={}'.format(fsdb.golden_chunk_dir()), + '--training_seed={}'.format(state.seed), +- '--freeze=true') ++ '--freeze=True', ++ '--num_inter_threads=1', ++ '--num_intra_threads={}'.format(intra_threads)] ++ ++ if(dist_train): ++ genvs = ['HOROVOD_FUSION_THRESHOLD=134217728', ++ 'KMP_BLOCKTIME=0', ++ 'KMP_HW_SUBSET=1T', ++ 'OMP_BIND_PROC=true', ++ 'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress, ++ 'OMP_NUM_THREADS={}'.format(intra_threads)] ++ hosts = [] ++ proclists = [] ++ numa_nodes = [] ++ for node in range(num_node): ++ # add all instance to the list ++ for numa in range(numa_per_node): ++ for instance in range(FLAGS.train_instance_per_numa): ++ hosts += [train_node[node]] ++ proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads ++ proclists += ['{}'.format(proclist)] ++ numa_nodes += ['{}'.format(numa)] ++ ++ lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes, None, *cmd, '--dist_train=True') ++ else: ++ lines = await run(*cmd) ++ print('\n'.join(lines), file=sys.stderr) ++ ++def post_train(state): ++ mll.save_model(state.iter_num-1) ++ + # Append the time elapsed from when the RL was started to when this model + # was trained. + elapsed = time.time() - state.start_time +@@ -315,7 +530,7 @@ async def validate(state, holdout_glob): + '--work_dir={}'.format(fsdb.working_dir())) + + +-async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed): ++async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed, flagfile='eval', gating_win_rate=None): + """Evaluate one model against a target. + + Args: +@@ -328,24 +543,53 @@ async def evaluate_model(eval_model_path, target_model_path, sgf_dir, seed): + The win-rate of eval_model against target_model in the range [0, 1]. + """ + +- lines = await run( +- 'bazel-bin/cc/eval', +- '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'eval.flags')), +- '--model={}'.format(eval_model_path), +- '--model_two={}'.format(target_model_path), +- '--sgf_dir={}'.format(sgf_dir), +- '--seed={}'.format(seed)) +- result = '\n'.join(lines[-7:]) ++ multi_instance, num_instance, flag_list = extract_multi_instance( ++ ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) ++ eval_cmd = ['bazel-bin/cc/eval', ++ '--flagfile={}.flags'.format(os.path.join(FLAGS.flags_dir, flagfile)), ++ '--model={}'.format(eval_model_path), ++ '--model_two={}'.format(target_model_path), ++ '--sgf_dir={}'.format(sgf_dir)] ++ if not multi_instance: ++ lines = await run(*eval_cmd, '--seed={}'.format(seed)) ++ else: ++ if FLAGS.eval_node == []: ++ # run eval locally ++ lines = await run( ++ 'python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(num_instance), ++ '--', ++ *eval_cmd, ++ '--seed={}'.format(seed)) ++ else: ++ # run one selfplay instance per host ++ lines = await run_distributed( ++ ['LD_LIBRARY_PATH=$LD_LIBRARY_PATH:cc/tensorflow'], ++ num_instance, FLAGS.eval_node, None, None, seed, ++ *eval_cmd) ++ result = '\n'.join(lines) + logging.info(result) + eval_stats, target_stats = parse_win_stats_table(result, 2) + num_games = eval_stats.total_wins + target_stats.total_wins + win_rate = eval_stats.total_wins / num_games ++ eval_total = eval_stats.total_wins ++ black_total = eval_stats.black_wins.total ++ white_total = eval_stats.white_wins.total ++ ++ if eval_total != 0: ++ bias = abs(white_total - black_total) / eval_total ++ else: ++ # by definition bias = 0.0 if eval model win zero games ++ bias = 0.0 + logging.info('Win rate %s vs %s: %.3f', eval_stats.model_name, + target_stats.model_name, win_rate) ++ logging.info('Black total %d, white total %d, eval total %d, bias %0.3f.', ++ black_total, white_total, eval_total, bias) ++ + return win_rate + + +-async def evaluate_trained_model(state): ++async def evaluate_trained_model(state, gating_win_rate=None): + """Evaluate the most recently trained model against the current best model. + + Args: +@@ -354,27 +598,151 @@ async def evaluate_trained_model(state): + + return await evaluate_model( + state.train_model_path, state.best_model_path, +- os.path.join(fsdb.eval_dir(), state.train_model_name), state.seed) ++ os.path.join(fsdb.eval_dir(), state.train_model_name), state.seed, gating_win_rate=gating_win_rate) ++ ++ ++async def evaluate_target_model(state, gating_win_rate=None): ++ sgf_dir = os.path.join(fsdb.eval_dir(), 'target') ++ target = 'tf,' + os.path.join(fsdb.models_dir(), 'target.pb') ++ return await evaluate_model( ++ state.train_model_path, target, sgf_dir, state.iter_num, gating_win_rate=gating_win_rate) + ++async def set_up_train(): ++ train_node = FLAGS.train_node ++ num_node = len(train_node) ++ if num_node == 0: ++ dist_train = False ++ else: ++ dist_train = True ++ ++ if dist_train: ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ instance_per_node = numa_per_node * FLAGS.train_instance_per_numa ++ intra_threads = FLAGS.numa_cores // FLAGS.train_instance_per_numa - 1 ++ ++ mpi_async_progress = '' ++ for i in range(numa_per_node): ++ for j in range(FLAGS.train_instance_per_numa): ++ if (not i==0) or (not j==0): ++ mpi_async_progress += ',' ++ mpi_async_progress += '{}'.format(i * FLAGS.numa_cores + j) ++ else: ++ intra_threads = FLAGS.physical_cores + +-def rl_loop(): ++ cmd = ['python3', 'train.py', ++ '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), ++ '--work_dir={}'.format(fsdb.working_dir()), ++ '--window_size={}'.format(FLAGS.window_size), ++ '--data_path={}'.format(fsdb.golden_chunk_dir()), ++ '--training_seed={}'.format(0), ++ '--freeze=True', ++ '--quantization={}'.format(FLAGS.quantization), ++ '--quantize_test_steps={}'.format(FLAGS.quantize_test_steps), ++ '--quantize_test_batch_size={}'.format(FLAGS.quantize_test_batch_size), ++ '--random_rotation={}'.format(FLAGS.random_rotation), ++ '--eval_min_max_every_epoch={}'.format(FLAGS.eval_min_max_every_epoch), ++ '--host_addr={}'.format(FLAGS.train_node[0]), ++ '--num_inter_threads=1', ++ '--num_intra_threads={}'.format(intra_threads)] ++ ++ if(dist_train): ++ genvs = ['HOROVOD_FUSION_THRESHOLD=134217728', ++ 'KMP_BLOCKTIME=0', ++ 'KMP_HW_SUBSET=1T', ++ 'OMP_BIND_PROC=true', ++ 'I_MPI_ASYNC_PROGRESS_PIN=' + mpi_async_progress, ++ 'OMP_NUM_THREADS={}'.format(intra_threads)] ++ hosts = [] ++ proclists = [] ++ numa_nodes = [] ++ for node in range(num_node): ++ # add all instance to the list ++ for numa in range(numa_per_node): ++ for instance in range(FLAGS.train_instance_per_numa): ++ hosts += [train_node[node]] ++ proclist = numa * FLAGS.numa_cores + FLAGS.train_instance_per_numa + instance * intra_threads ++ proclists += ['{}'.format(proclist)] ++ numa_nodes += ['{}'.format(numa)] ++ ++ lines = await run_distributed(genvs, 1, hosts, proclists, numa_nodes, None, *cmd, '--dist_train=True') ++ else: ++ lines = run(*cmd) ++ print('\n'.join(lines), file=sys.stderr) ++ ++def init_socket(host): ++ count = 0 ++ while(True): ++ try: ++ if count == 0: ++ print("init_socket():") ++ reception = socket.socket() ++ addr = (host, 52175) ++ if count == 0: ++ print('connecting to server...') ++ reception.connect(addr) ++ print('worker: connection established.') ++ except: ++ count += 1 ++ if count % 10 == 0: ++ print("Error: server {} not found {} times ...\r".format(host, count)) ++ time.sleep(0.2) ++ else: ++ return reception ++ ++def train_begin(export_path): ++ reception = init_socket(FLAGS.train_node[0]) ++ reception.send(export_path.encode()) ++ return reception ++ ++def train_finished(reception): ++ reception.recv(1024) ++ reception.close() ++ ++def train_one_step(export_path): ++ reception = train_begin(export_path) ++ train_finished(reception) ++ ++def train_stop(): ++ reception = train_begin('stop training') ++ train_finished(reception) ++ ++def rl_loop(out_files_number): + """The main reinforcement learning (RL) loop.""" + ++ # The 'window_size' reflect the split of golden chunk after selfplay ++ # basically each selfplay generate N golden chunks instead of one to ++ # accelerate write golden chunks (N determined by FLAGS.golden_chunk_slit). ++ # Yet this make effective_window_size dynamic. It should increase by N-1 ++ # to keep the effective window size not change. Then increase by N if no big ++ # chunk left. Until it reach FLAGS.window_size * FLAGS.golden_chunk_split ++ ++ window_size = FLAGS.window_size ++ + state = State() + +- if FLAGS.checkpoint_dir: ++ if FLAGS.checkpoint_dir != None: + # Start from a partially trained model. +- initialize_from_checkpoint(state) ++ initialize_from_checkpoint(state, out_files_number) ++ window_size = len(get_golden_chunk_records(window_size)) ++ mll.init_stop() ++ mll.run_start() ++ state.start_time = time.time() + else: + # Play the first round of selfplay games with a fake model that returns + # random noise. We do this instead of playing multiple games using a single + # model bootstrapped with random noise to avoid any initial bias. ++ mll.init_stop() ++ mll.run_start() ++ state.start_time = time.time() ++ mll.epoch_start(state.iter_num) + wait(selfplay(state, 'bootstrap')) ++ window_size += FLAGS.golden_chunk_split + + # Train a real model from the random selfplay games. +- tf_records = get_golden_chunk_records() ++ tf_records = get_golden_chunk_records(window_size) + state.iter_num += 1 + wait(train(state, tf_records)) ++ post_train(state) + + # Select the newly trained model as the best. + state.best_model_name = state.train_model_name +@@ -382,45 +750,198 @@ def rl_loop(): + + # Run selfplay using the new model. + wait(selfplay(state)) ++ window_size += FLAGS.golden_chunk_split ++ mll.epoch_stop(state.iter_num - 1) ++ ++ first_iter = True ++ state_copy = None ++ model_win_rate = -1.0 + ++ socket.setdefaulttimeout(99999999) + # Now start the full training loop. + while state.iter_num <= FLAGS.iterations: +- # Build holdout glob before incrementing the iteration number because we +- # want to run validation on the previous generation. +- holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, +- '*') +- +- # Train on shuffled game data from recent selfplay rounds. +- tf_records = get_golden_chunk_records() +- state.iter_num += 1 +- wait(train(state, tf_records)) ++ with logged_timer('iteration time {}'.format(state.iter_num)): ++ mll.epoch_start(state.iter_num) ++ # Build holdout glob before incrementing the iteration number because we ++ # want to run validation on the previous generation. ++ holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, ++ '*') ++ ++ # Train on shuffled game data from recent selfplay rounds. ++ #tf_records = get_golden_chunk_records(window_size) ++ ++ if FLAGS.parallel_post_train == 0: ++ state.iter_num += 1 ++ train_one_step(os.path.join(fsdb.models_dir(), state.train_model_name)) ++ post_train(state) ++ # Run eval, validation & selfplay sequentially. ++ wait(selfplay(state)) ++ model_win_rate = wait(evaluate_trained_model(state)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model and increment the generation ++ # number. ++ state.best_model_name = state.train_model_name ++ state.gen_num += 1 ++ mll.epoch_stop(state.iter_num - 1) ++ # ^ compensate iter_num += 1 above ++ ++ if FLAGS.parallel_post_train == 1: ++ state.iter_num += 1 ++ reception = train_begin(os.path.join(fsdb.models_dir(), state.train_model_name)) ++ wait(selfplay(state)) ++ train_finished(reception) ++ post_train(state) ++ # Run eval, validation & selfplay in parallel. ++ model_win_rate = wait(evaluate_trained_model(state)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model and increment the generation ++ # number. ++ state.best_model_name = state.train_model_name ++ state.gen_num += 1 ++ mll.epoch_stop(state.iter_num - 1) ++ # ^ compensate iter_num += 1 above ++ ++ if FLAGS.parallel_post_train == 2: ++ state_copy = copy.copy(state) ++ state.iter_num += 1 ++ # run training and evaluation/validation/selfplay in parallel ++ # this is software pipeline-ish parallelism ++ # start train[iter] ++ # | start valiation[iter-1] ++ # | wait for validation ++ # | if not first time start evaluation[iter-1] ++ # | if not first time wait for evaluation ++ # | if not first time check for promotion ++ # | start selfplay[iter] ++ # | wait selfplay ++ # wait train ++ reception = train_begin(os.path.join(fsdb.models_dir(), state.train_model_name)) ++ if not first_iter: ++ post_train(state_copy) ++ model_win_rate = wait(evaluate_trained_model(state_copy)) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Promote the trained model to the best model ++ state.best_model_name = state_copy.train_model_name ++ mll.epoch_stop(state.iter_num - 1 - 1) ++ # ^---^-- compensate iter_num += 1 above ++ # +-- it is actually last iteration ++ else: ++ first_iter = False ++ wait(selfplay(state)) ++ train_finished(reception) ++ if not first_iter: ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Increment the generation number. ++ train_model_name_before = state.train_model_name ++ state.gen_num += 1 ++ ++ # Output dependency: ++ # In parallel post train mode 1, there is output dependence between ++ # evaluation of iteration i (gen_num++) and train of iteration i+1 ++ # (use gen_num for export model path). In parallel post train mode ++ # 2 (this mode), the evluation of iteration i is postponed to ++ # iteration i+1 after the training started, thus train of iteration ++ # i+1 won't generate correct model name when promotion needs to ++ # happen. This part fix up the model name when evaluation decides ++ # there's a promotion ++ train_model_name_after = state.train_model_name ++ model_paths = glob.glob(os.path.join(fsdb.models_dir(), '{}.*'.format(train_model_name_before))) ++ for model in model_paths: ++ logging.info('moving {} --> {}'.format(model, ++ train_model_name_after.join(model.rsplit(train_model_name_before, 1)))) ++ shutil.copy(model, train_model_name_after.join(model.rsplit(train_model_name_before, 1))) ++ ++ if FLAGS.parallel_post_train == 3: ++ state_copy = copy.copy(state) ++ state.iter_num += 1 ++ # run training and evaluation/validation/selfplay in parallel ++ # this is software pipeline-ish parallelism ++ # start train[iter] ++ # | start valiation[iter-1] ++ # | wait for validation ++ # | if not first time start evaluation[iter-1] ++ # | if not first time wait for evaluation ++ # | if not first time check for promotion ++ # | start selfplay[iter] ++ # | wait selfplay ++ # wait train ++ reception = train_begin(os.path.join(fsdb.models_dir(), state.train_model_name)) ++ #train_handle = asyncio.gather(train(state, tf_records), return_exceptions=True) ++ if not first_iter: ++ post_train(state_copy) ++ # predict the play model as promoted model ++ state_play = copy.copy(state_copy) ++ state_play.best_model_name = state_copy.train_model_name ++ model_win_rate, _ = wait([evaluate_trained_model(state_copy, FLAGS.gating_win_rate), ++ selfplay(state_play, post=False)]) ++ if model_win_rate >= FLAGS.gating_win_rate: ++ logging.info('promote model') ++ # Promote the trained model to the best model ++ state.best_model_name = state_copy.train_model_name ++ else: ++ logging.info('no promote model') ++ mll.epoch_stop(state.iter_num - 1 - 1) ++ # ^---^-- compensate iter_num += 1 above ++ # +-- it is actually last iteration ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # prediction hit ++ wait(post_selfplay(state_play)) ++ else: ++ # prediction not hit ++ wait(selfplay(state)) ++ else: ++ first_iter = False ++ wait(selfplay(state)) ++ train_finished(reception) ++ if not first_iter: ++ if model_win_rate >= FLAGS.gating_win_rate: ++ # Increment the generation number. ++ train_model_name_before = state.train_model_name ++ state.gen_num += 1 ++ ++ # Output dependency: ++ # In parallel post train mode 1, there is output dependence between ++ # evaluation of iteration i (gen_num++) and train of iteration i+1 ++ # (use gen_num for export model path). In parallel post train mode ++ # 2 (this mode), the evluation of iteration i is postponed to ++ # iteration i+1 after the training started, thus train of iteration ++ # i+1 won't generate correct model name when promotion needs to ++ # happen. This part fix up the model name when evaluation decides ++ # there's a promotion ++ train_model_name_after = state.train_model_name ++ model_paths = glob.glob(os.path.join(fsdb.models_dir(), '{}.*'.format(train_model_name_before))) ++ for model in model_paths: ++ logging.info('moving {} --> {}'.format(model, ++ train_model_name_after.join(model.rsplit(train_model_name_before, 1)))) ++ shutil.copy(model, train_model_name_after.join(model.rsplit(train_model_name_before, 1))) ++ ++ # after the main loop, if parallel_post_train = 2 ++ # needs to print epoch_stop for last epoch ++ if FLAGS.parallel_post_train == 2 or FLAGS.parallel_post_train == 3: ++ mll.epoch_stop(state.iter_num - 1) ++ train_stop() + +- if FLAGS.parallel_post_train: +- # Run eval, validation & selfplay in parallel. +- model_win_rate, _, _ = wait([ +- evaluate_trained_model(state), +- validate(state, holdout_glob), +- selfplay(state)]) +- else: +- # Run eval, validation & selfplay sequentially. +- model_win_rate = wait(evaluate_trained_model(state)) +- wait(validate(state, holdout_glob)) +- wait(selfplay(state)) ++def main(unused_argv): ++ """Run the reinforcement learning loop.""" + +- if model_win_rate >= FLAGS.gating_win_rate: +- # Promote the trained model to the best model and increment the generation +- # number. +- state.best_model_name = state.train_model_name +- state.gen_num += 1 ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ train_instance_num = FLAGS.train_instance_per_numa * len(FLAGS.train_node) * numa_per_node ++ selfplay_node_num = max(len(FLAGS.selfplay_node), 1) ++ selfplay_num = selfplay_node_num ++ out_files_number = int(train_instance_num/gcd(train_instance_num, selfplay_num)*selfplay_node_num) + ++ FLAGS.window_size = out_files_number * FLAGS.window_size + +-def main(unused_argv): +- """Run the reinforcement learning loop.""" ++ if(FLAGS.setup_train_workers): ++ wait(set_up_train()) ++ return + ++ mll.init_start() + print('Wiping dir %s' % FLAGS.base_dir, flush=True) + shutil.rmtree(FLAGS.base_dir, ignore_errors=True) + dirs = [fsdb.models_dir(), fsdb.selfplay_dir(), fsdb.holdout_dir(), +- fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir()] ++ fsdb.eval_dir(), fsdb.golden_chunk_dir(), fsdb.working_dir(), ++ fsdb.mpi_log_dir()] + for d in dirs: + ensure_dir_exists(d); + +@@ -440,9 +961,13 @@ def main(unused_argv): + for handler in logging.getLogger().handlers: + handler.setFormatter(formatter) + ++ logging.info('Selfplay nodes = {}'.format(FLAGS.selfplay_node)) ++ logging.info('Train nodes = {}'.format(FLAGS.train_node)) ++ logging.info('Eval nodes = {}'.format(FLAGS.eval_node)) ++ + with logged_timer('Total time'): + try: +- rl_loop() ++ rl_loop(out_files_number) + finally: + asyncio.get_event_loop().close() + +diff --git a/ml_perf/repeat_run.sh b/ml_perf/repeat_run.sh +old mode 100644 +new mode 100755 +diff --git a/ml_perf/utils.py b/ml_perf/utils.py +index 8e6b7c6..a26be49 100644 +--- a/ml_perf/utils.py ++++ b/ml_perf/utils.py +@@ -20,18 +20,46 @@ sys.path.insert(0, '.') # nopep8 + import asyncio + import logging + import os ++import os.path ++import multiprocessing ++import subprocess ++import fcntl + + from absl import flags + from utils import * + + + def expand_cmd_str(cmd): +- return ' '.join(flags.FlagValues().read_flags_from_files(cmd)) ++ result = ' '.join(flags.FlagValues().read_flags_from_files(cmd)) ++ if cmd[0] == 'mpiexec' or cmd[0] == 'mpirun': ++ result = ' \\\n-host '.join(result.split(' -host ')) ++ # avoid buffer too big to block I/O ++ return result[:8192] + + + def get_cmd_name(cmd): + if cmd[0] == 'python' or cmd[0] == 'python3': + path = cmd[1] ++ for index in range(len(cmd)): ++ if cmd[index] == 'bazel-bin/cc/selfplay': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/eval': ++ path = cmd[index] ++ break ++ elif cmd[0] == 'mpirun' or cmd[0] == 'mpiexec': ++ for index in range(len(cmd)): ++ if cmd[index] == 'train.py': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/selfplay': ++ path = cmd[index] ++ break ++ if cmd[index] == 'bazel-bin/cc/eval': ++ path = cmd[index] ++ break ++ if cmd[index] == 'python' or cmd[index] == 'python3': ++ path = cmd[index+1] + else: + path = cmd[0] + return os.path.splitext(os.path.basename(path))[0] +@@ -73,6 +101,127 @@ async def checked_run(*cmd): + + return stdout + ++async def checked_run_distributed(genvs, num_instance, hosts, proclists, numa_nodes, seed, log_path, *cmd): ++ mpi_cmd = ['mpiexec', ++ '-outfile-pattern', ++ '{}/out-{}-{}-%r.txt'.format(log_path, get_cmd_name(cmd), seed)] ++ for genv in genvs: ++ mpi_cmd = mpi_cmd + ['-genv', genv] ++ num_nodes = len(hosts) ++ instance_per_node = num_instance // num_nodes ++ instance_remaining = num_instance - num_nodes * instance_per_node ++ for index in range(num_nodes): ++ if index < instance_remaining: ++ instance_to_launch = instance_per_node + 1 ++ else: ++ instance_to_launch = instance_per_node ++ ++ if index > 0: ++ mpi_cmd = mpi_cmd + [':'] ++ mpi_cmd = mpi_cmd + ['-host', hosts[index]] ++ ++ if proclists != None: ++ mpi_cmd = mpi_cmd + ['-env', 'KMP_AFFINITY=granularity=fine,compact,1,{}'.format(proclists[index])] ++ ++ if numa_nodes != None: ++ mpi_cmd = mpi_cmd + ['numactl', '-l', '-N', numa_nodes[index]] ++ ++ if num_instance > 1: ++ mpi_cmd = mpi_cmd + ['python3', 'ml_perf/execute.py', ++ '--num_instance={}'.format(instance_to_launch), ++ '--'] ++ mpi_cmd = mpi_cmd + [*cmd] ++ ++ if seed != None: ++ # ensure different seed for different node ++ mpi_cmd = mpi_cmd + ['--seed={}'.format(seed + index*1023779831)] ++ ++ result = await checked_run(*mpi_cmd) ++ for index in range(num_nodes): ++ filename = '{}/out-{}-{}-{}.txt'.format(log_path, get_cmd_name(cmd), seed, ++ index) ++ outfile = open(filename, 'r') ++ result += outfile.read() ++ outfile.close() ++ return result ++ ++def checked_run_mi(num_instance, *cmd): ++ name = get_cmd_name(cmd) ++ logging.debug('Running %s*%d: %s', name, num_instance, expand_cmd_str(cmd)) ++ num_parallel_instance = int(multiprocessing.cpu_count()) ++ procs=[None]*num_parallel_instance ++ results = [""]*num_parallel_instance ++ result_list = [] ++ ++ cur_instance = 0 ++ # add new proc into procs ++ while cur_instance < num_instance or not all ( ++ proc is None for proc in procs): ++ if None in procs and cur_instance < num_instance: ++ index = procs.index(None) ++ subproc_cmd = [ ++ 'OMP_NUM_THREADS=1', ++ 'KMP_AFFINITY=granularity=fine,proclist=[{}],explicit'.format( ++ ','.join(str(i) for i in list(range( ++ index, index+1)))), ++ *cmd, ++ '--instance_id={}'.format(cur_instance), ++ ] ++ subproc_cmd = ' '.join(subproc_cmd) ++ if (cur_instance == 0): ++ logging.debug("subproc_cmd = {}".format(subproc_cmd)) ++ procs[index] = subprocess.Popen(subproc_cmd, shell=True, ++ stdout=subprocess.PIPE, ++ stderr=subprocess.STDOUT) ++ ++ proc_count = 0 ++ for i in range(num_parallel_instance): ++ if procs[i] != None: ++ proc_count += 1 ++ logging.debug('started instance {} in proc {}. proc count = {}'.format( ++ cur_instance, index, proc_count)) ++ ++ # change stdout of the process to non-blocking ++ # this is for collect output in a single thread ++ flags = fcntl.fcntl(procs[index].stdout, fcntl.F_GETFL) ++ fcntl.fcntl(procs[index].stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK) ++ ++ cur_instance += 1 ++ for index in range(num_parallel_instance): ++ if procs[index] != None: ++ # collect proc output ++ while True: ++ try: ++ line = procs[index].stdout.readline() ++ if line == b'': ++ break ++ results[index] = results[index] + line.decode() ++ except IOError: ++ break ++ ++ ret_val = procs[index].poll() ++ if ret_val == None: ++ continue ++ elif ret_val != 0: ++ logging.info(results[index]) ++ raise RuntimeError( ++ 'Non-zero return code (%d) executing %s' % ( ++ ret_val, subproc_cmd)) ++ ++ if index == 0: ++ logging.debug(results[index]) ++ result_list.append(results[index]) ++ results[index] = "" ++ procs[index] = None ++ ++ proc_count = 0 ++ for i in range(num_parallel_instance): ++ if procs[i] != None: ++ proc_count += 1 ++ logging.debug('proc {} finished. proc count = {}'.format( ++ index, proc_count)) ++ time.sleep(0.001) # avoid busy loop ++ return result_list + + def wait(aws): + """Waits for all of the awaitable objects (e.g. coroutines) in aws to finish. +diff --git a/oneoffs/distillation.py b/oneoffs/distillation.py +old mode 100755 +new mode 100644 +diff --git a/oneoffs/embeddings.py b/oneoffs/embeddings.py +old mode 100755 +new mode 100644 +diff --git a/oneoffs/embeddings_graphs.py b/oneoffs/embeddings_graphs.py +old mode 100755 +new mode 100644 +diff --git a/oneoffs/l2_cost_by_var.py b/oneoffs/l2_cost_by_var.py +old mode 100755 +new mode 100644 +diff --git a/oneoffs/modelstats.sh b/oneoffs/modelstats.sh +old mode 100755 +new mode 100644 +diff --git a/oneoffs/training_curve.py b/oneoffs/training_curve.py +old mode 100755 +new mode 100644 +diff --git a/preprocessing.py b/preprocessing.py +index 595db38..bc134e2 100644 +--- a/preprocessing.py ++++ b/preprocessing.py +@@ -26,6 +26,9 @@ import symmetries + import numpy as np + import tensorflow as tf + ++import horovod.tensorflow as hvd ++from tensorflow.python.data.experimental.ops import optimization ++ + TF_RECORD_CONFIG = tf.python_io.TFRecordOptions( + tf.python_io.TFRecordCompressionType.ZLIB) + +@@ -84,11 +87,11 @@ def batch_parse_tf_example(batch_size, example_batch): + 'outcome': tf.FixedLenFeature([], tf.float32), + } + parsed = tf.parse_example(example_batch, features) +- x = tf.decode_raw(parsed['x'], tf.uint8) ++ x = tf.io.decode_raw(parsed['x'], tf.uint8) + x = tf.cast(x, tf.float32) + x = tf.reshape(x, [batch_size, go.N, go.N, + features_lib.NEW_FEATURES_PLANES]) +- pi = tf.decode_raw(parsed['pi'], tf.float32) ++ pi = tf.io.decode_raw(parsed['pi'], tf.float32) + pi = tf.reshape(pi, [batch_size, go.N * go.N + 1]) + outcome = parsed['outcome'] + outcome.set_shape([batch_size]) +@@ -98,7 +101,7 @@ def batch_parse_tf_example(batch_size, example_batch): + def read_tf_records(batch_size, tf_records, num_repeats=1, + shuffle_records=True, shuffle_examples=True, + shuffle_buffer_size=None, interleave=True, +- filter_amount=1.0): ++ filter_amount=1.0, dist_train=False, seed = 0): + """ + Args: + batch_size: batch size to return +@@ -116,10 +119,17 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + raise ValueError("Must set shuffle buffer size if shuffling examples") + + tf_records = list(tf_records) +- if shuffle_records: +- random.shuffle(tf_records) ++ ++ random.seed(seed) ++ ++ #if shuffle_records: ++ # random.shuffle(tf_records) ++ + record_list = tf.data.Dataset.from_tensor_slices(tf_records) + ++ if dist_train: ++ record_list = record_list.shard(hvd.size(), hvd.rank()) ++ + # compression_type here must agree with write_tf_examples + map_func = functools.partial( + tf.data.TFRecordDataset, +@@ -130,20 +140,85 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + # cycle_length = how many tfrecord files are read in parallel + # The idea is to shuffle both the order of the files being read, + # and the examples being read from the files. +- dataset = record_list.apply(tf.contrib.data.parallel_interleave( +- map_func, cycle_length=64, sloppy=True)) ++ dataset = record_list.apply(tf.data.experimental.parallel_interleave( ++ map_func, cycle_length=1000, sloppy=True)) + else: + dataset = record_list.flat_map(map_func) + + if filter_amount < 1.0: + dataset = dataset.filter( +- lambda _: tf.random_uniform([]) < filter_amount) ++ lambda _: tf.random.uniform([], seed=seed) < filter_amount) ++ dataset = dataset.apply(optimization.optimize(["filter_with_random_uniform_fusion"])) ++ ++ #if dist_train: ++ # dataset = dataset.shard(hvd.size(), hvd.rank()) + + dataset = dataset.repeat(num_repeats) ++ + if shuffle_examples: + dataset = dataset.shuffle(buffer_size=shuffle_buffer_size) + +- dataset = dataset.batch(batch_size) ++ dataset = dataset.batch(batch_size, drop_remainder=True) ++ return dataset ++ ++def read_tf_records_new(batch_size, tf_records, num_repeats=1, ++ shuffle_records=True, shuffle_examples=True, ++ shuffle_buffer_size=None, interleave=True, ++ filter_amount=1.0, dist_train=False, seed = 0): ++ """ ++ Args: ++ batch_size: batch size to return ++ tf_records: a list of tf_record filenames ++ num_repeats: how many times the data should be read (default: One) ++ shuffle_records: whether to shuffle the order of files read ++ shuffle_examples: whether to shuffle the tf.Examples ++ shuffle_buffer_size: how big of a buffer to fill before shuffling. ++ interleave: iwhether to interleave examples from multiple tf_records ++ filter_amount: what fraction of records to keep ++ Returns: ++ a tf dataset of batched tensors ++ """ ++ if shuffle_examples and not shuffle_buffer_size: ++ raise ValueError("Must set shuffle buffer size if shuffling examples") ++ ++ random.seed(seed) ++ ++ #if shuffle_records: ++ # random.shuffle(tf_records) ++ record_list = tf.data.Dataset.from_tensor_slices(tf_records) ++ ++ if dist_train: ++ record_list = record_list.shard(hvd.size(), hvd.rank()) ++ ++ # compression_type here must agree with write_tf_examples ++ map_func = functools.partial( ++ tf.data.TFRecordDataset, ++ buffer_size=8 * 1024 * 1024, ++ compression_type='ZLIB') ++ ++ if interleave: ++ # cycle_length = how many tfrecord files are read in parallel ++ # The idea is to shuffle both the order of the files being read, ++ # and the examples being read from the files. ++ dataset = record_list.apply(tf.data.experimental.parallel_interleave( ++ map_func, cycle_length=1000, sloppy=True)) ++ else: ++ dataset = record_list.flat_map(map_func) ++ ++ if filter_amount < 1.0: ++ dataset = dataset.filter( ++ lambda _: tf.random.uniform([], seed=seed) < filter_amount) ++ dataset = dataset.apply(optimization.optimize(["filter_with_random_uniform_fusion"])) ++ ++ #if dist_train: ++ # dataset = dataset.shard(hvd.size(), hvd.rank()) ++ ++ dataset = dataset.repeat(num_repeats) ++ ++ if shuffle_examples: ++ dataset = dataset.shuffle(buffer_size=shuffle_buffer_size) ++ ++ dataset = dataset.batch(batch_size, drop_remainder=True) + return dataset + + +@@ -181,7 +256,8 @@ def _random_rotation_pure_tf(x_tensor, outcome_tensor): + def get_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_records=True, shuffle_examples=True, + shuffle_buffer_size=None, +- filter_amount=0.05, random_rotation=True): ++ filter_amount=0.05, random_rotation=True, ++ dist_train=False, seed = 0, make_one_shot = False): + """Read tf.Records and prepare them for ingestion by dual_net. + + See `read_tf_records` for parameter documentation. +@@ -197,18 +273,51 @@ def get_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_examples=shuffle_examples, + shuffle_buffer_size=shuffle_buffer_size, + filter_amount=filter_amount, +- interleave=True) ++ interleave=True, ++ dist_train=dist_train, seed=seed) + dataset = dataset.filter(lambda t: tf.equal(tf.shape(t)[0], batch_size)) + dataset = dataset.map( + functools.partial(batch_parse_tf_example, batch_size)) + if random_rotation: + dataset = dataset.map(_random_rotation_pyfunc) + +- return dataset.make_one_shot_iterator().get_next() ++ dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE) ++ if make_one_shot: ++ return dataset.make_one_shot_iterator().get_next() ++ else: ++ return dataset ++ ++def get_input_tensors_new(batch_size, tf_records, num_repeats=1, ++ shuffle_records=True, shuffle_examples=True, ++ shuffle_buffer_size=None, ++ filter_amount=0.05, random_rotation=True, ++ dist_train=False, seed = 0, make_one_shot = False): ++ """Read tf.Records and prepare them for ingestion by dual_net. ++ See `read_tf_records` for parameter documentation. ++ Returns a dict of tensors (see return value of batch_parse_tf_example) ++ """ ++ #print("Reading tf_records from {} inputs".format(len(tf_records))) ++ dataset = read_tf_records_new( ++ batch_size, ++ tf_records, ++ num_repeats=num_repeats, ++ shuffle_records=shuffle_records, ++ shuffle_examples=shuffle_examples, ++ shuffle_buffer_size=shuffle_buffer_size, ++ filter_amount=filter_amount, ++ interleave=True, ++ dist_train=dist_train, seed=seed) ++ dataset = dataset.filter(lambda t: tf.equal(tf.shape(t)[0], batch_size)) ++ dataset = dataset.map( ++ functools.partial(batch_parse_tf_example, batch_size)) ++ if random_rotation: ++ dataset = dataset.map(_random_rotation_pyfunc) + ++ dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE) ++ return dataset.make_initializable_iterator() + + def get_tpu_input_tensors(batch_size, tf_records, num_repeats=1, +- filter_amount=1, random_rotation=True): ++ filter_amount=1, random_rotation=True, seed=0): + # TPUs trains on sequential golden chunks to simplify preprocessing and + # reproducibility. + assert len(tf_records) < 101, "Use example_buffer to build a golden_chunk" +@@ -221,7 +330,7 @@ def get_tpu_input_tensors(batch_size, tf_records, num_repeats=1, + shuffle_examples=False, + shuffle_buffer_size=None, + filter_amount=filter_amount, +- interleave=False) ++ interleave=False, seed=seed) + dataset = dataset.filter(lambda t: tf.equal(tf.shape(t)[0], batch_size)) + dataset = dataset.map( + functools.partial(batch_parse_tf_example, batch_size)) +diff --git a/produce_min_max_log.py b/produce_min_max_log.py +new file mode 100644 +index 0000000..493ce38 +--- /dev/null ++++ b/produce_min_max_log.py +@@ -0,0 +1,94 @@ ++# Copyright 2019 Google LLC ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++#!/usr/bin/env python ++# encoding: utf-8 ++ ++import time ++import os ++ ++import tensorflow as tf ++from tensorflow.core.framework import graph_pb2 ++from tensorflow.python.platform import gfile ++ ++from absl import app, flags ++ ++import preprocessing ++import dual_net ++ ++ ++flags.DEFINE_string('input_graph', None, 'The path of input graph.') ++flags.DEFINE_string('data_location', None, 'The path of input data.') ++flags.DEFINE_integer('num_steps', 20, 'Number of eval steps.') ++flags.DEFINE_integer('batch_size', 20, 'eval batch size.') ++flags.DEFINE_boolean('random_rotation', True, 'Do random rotation if true.') ++ ++ ++FLAGS = flags.FLAGS ++ ++def run_graph(graph, tf_records): ++ ++ data_graph = tf.Graph() ++ with data_graph.as_default(): ++ features, labels = preprocessing.get_input_tensors( ++ FLAGS.batch_size, ++ tf_records, ++ shuffle_buffer_size=100000000, ++ random_rotation=FLAGS.random_rotation, seed=2, ++ dist_train=False, make_one_shot=True) ++ ++ infer_graph = tf.Graph() ++ with infer_graph.as_default(): ++ tf.import_graph_def(graph, name='') ++ ++ input_tensor = dual_net.get_input_tensor(infer_graph) ++ output_tensor = dual_net.get_output_tensor(infer_graph) ++ ++ config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) ++ data_sess = tf.Session(graph=data_graph, config=config) ++ infer_sess = tf.Session(graph=infer_graph, config=config) ++ ++ elapsed = 0 ++ #with tf.contrib.tfprof.ProfileContext('/home/letiank/skx-8180/train_dir/minigo', trace_steps=range(70, 80), dump_steps=[110]): ++ for it in range(FLAGS.num_steps): ++ features_np = data_sess.run(features) ++ start_time = time.time() ++ infer_sess.run(output_tensor, feed_dict={input_tensor: features_np}) ++ elapsed += time.time() - start_time ++ ++def read_graph(input_graph): ++ if not gfile.Exists(input_graph): ++ print("Input graph file '" + input_graph + "' does not exist!") ++ exit(-1) ++ ++ input_graph_def = graph_pb2.GraphDef() ++ with gfile.Open(input_graph, "rb") as f: ++ data = f.read() ++ input_graph_def.ParseFromString(data) ++ ++ return input_graph_def ++ ++ ++def main(unused_argv): ++ """Run the reinforcement learning loop.""" ++ ++ graph = read_graph(FLAGS.input_graph) ++ tf_records = sorted(tf.gfile.Glob(FLAGS.data_location), reverse=True)[:1] ++ print(tf_records) ++ run_graph(graph, tf_records) ++ ++if __name__ == "__main__": ++ app.run(main) +diff --git a/quantize_graph.py b/quantize_graph.py +new file mode 100644 +index 0000000..4789825 +--- /dev/null ++++ b/quantize_graph.py +@@ -0,0 +1,1636 @@ ++# Copyright 2015 The TensorFlow Authors. All Rights Reserved. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ============================================================================== ++r"""Transforms a float-trained graph into an equivalent quantized version. ++An example of command-line usage is: ++bazel build tensorflow/tools/quantization:quantize_graph \ ++&& bazel-bin/tensorflow/tools/quantization/quantize_graph \ ++--input=tensorflow_inception_graph.pb ++--output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \ ++--mode=eightbit --logtostderr ++To quantize for Intel CPU, add --intel_cpu_eightbitize=True. ++""" ++ ++from __future__ import absolute_import ++from __future__ import division ++from __future__ import print_function ++ ++import collections ++import re ++import numpy as np ++ ++from tensorflow.core.framework import attr_value_pb2 ++from tensorflow.core.framework import graph_pb2 ++from tensorflow.core.framework import node_def_pb2 ++from tensorflow.python.client import session ++from tensorflow.python.framework import constant_op ++from tensorflow.python.framework import dtypes ++from tensorflow.python.framework import graph_util ++from tensorflow.python.framework import importer ++from tensorflow.python.framework import ops ++from tensorflow.python.framework import tensor_shape ++from tensorflow.python.framework import tensor_util ++from tensorflow.python.ops import array_ops ++from tensorflow.python.platform import app ++from tensorflow.python.platform import flags as flags_lib ++from tensorflow.python.platform import gfile ++from google.protobuf import text_format ++ ++flags = flags_lib ++FLAGS = flags.FLAGS ++ ++flags.DEFINE_boolean("print_nodes", False, """Lists all nodes in the model.""") ++flags.DEFINE_string("input", "", """TensorFlow 'GraphDef' file to load.""") ++flags.DEFINE_string("output_node_names", "", ++ """Output node names, comma separated.""") ++flags.DEFINE_string("output", "", """File to save the output graph to.""") ++flags.DEFINE_integer("bitdepth", 8, ++ """How many bits to quantize the graph to.""") ++flags.DEFINE_string("mode", "round", ++ """What transformation to apply (round, quantize,""" ++ """ eightbit, weights, or weights_rounded).""") ++flags.DEFINE_string("test_input_dims", "1,224,224,3", ++ """The size of the input tensor to use when testing a""" ++ """ graph loaded from a file.""") ++flags.DEFINE_boolean("strip_redundant_quantization", True, ++ """Removes redundant dequantize/quantize pairs.""") ++flags.DEFINE_boolean("quantized_input", False, ++ "If true, assume Placeholders are quantized with values " ++ "covering [--quantized_input_min,--quantized_input_max]. " ++ "Only supported when --mode=eightbit") ++flags.DEFINE_float("quantized_input_min", 0, ++ "The minimum of the actual input range when " ++ "--quantized_input") ++flags.DEFINE_float("quantized_input_max", 1, ++ "The maximum of the actual input range when " ++ "--quantized_input") ++flags.DEFINE_float( ++ "quantized_fallback_min", None, ++ "The fallback 'min' value to use for layers which lack min-max " ++ "information. Note: this should be considered a coarse tool just good " ++ "enough for experimentation purposes, since graphs quantized in this way " ++ "would be very inaccurate.") ++flags.DEFINE_float( ++ "quantized_fallback_max", None, ++ "The fallback 'max' value to use for layers which lack min-max " ++ "information. Note: this should be considered a coarse tool just good " ++ "enough for experimentation purposes, since graphs quantized in this way " ++ "would be very inaccurate.") ++flags.DEFINE_boolean("input_binary", True, ++ """Input graph binary or text.""") ++flags.DEFINE_boolean("output_binary", True, ++ """Output graph binary or text.""") ++flags.DEFINE_boolean( ++ "intel_cpu_eightbitize", False, ++ "If true eightbitized graph will include fused quantized" ++ "nodes in the output_graph for Intel CPU.") ++ ++def print_input_nodes(current_node, nodes_map, indent, already_visited): ++ print(" " * indent + current_node.op + ":" + current_node.name) ++ already_visited[current_node.name] = True ++ for input_node_name in current_node.input: ++ if input_node_name in already_visited: ++ continue ++ input_node = nodes_map[input_node_name] ++ print_input_nodes(input_node, nodes_map, indent + 1, already_visited) ++ ++ ++def create_node(op, name, inputs): ++ new_node = node_def_pb2.NodeDef() ++ new_node.op = op ++ new_node.name = name ++ for input_name in inputs: ++ new_node.input.extend([input_name]) ++ return new_node ++ ++ ++def create_constant_node(name, value, dtype, shape=None): ++ node = create_node("Const", name, []) ++ set_attr_dtype(node, "dtype", dtype) ++ set_attr_tensor(node, "value", value, dtype, shape) ++ return node ++ ++ ++def copy_attr(node, key, attr_value): ++ try: ++ node.attr[key].CopyFrom(attr_value) ++ except KeyError: ++ pass ++ ++ ++def set_attr_dtype(node, key, value): ++ try: ++ node.attr[key].CopyFrom( ++ attr_value_pb2.AttrValue(type=value.as_datatype_enum)) ++ except KeyError: ++ pass ++ ++ ++def set_attr_shape(node, key, value): ++ try: ++ node.attr[key].CopyFrom( ++ attr_value_pb2.AttrValue(shape=tensor_shape.as_shape(value).as_proto())) ++ except KeyError: ++ pass ++ ++ ++def set_attr_tensor(node, key, value, dtype, shape=None): ++ try: ++ node.attr[key].CopyFrom( ++ attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( ++ value, dtype=dtype, shape=shape))) ++ except KeyError: ++ pass ++ ++ ++def set_attr_string(node, key, value): ++ try: ++ node.attr[key].CopyFrom(attr_value_pb2.AttrValue(s=value)) ++ except KeyError: ++ pass ++ ++ ++def set_attr_int_list(node, key, value): ++ list_value = attr_value_pb2.AttrValue.ListValue(i=value) ++ try: ++ node.attr[key].CopyFrom(attr_value_pb2.AttrValue(list=list_value)) ++ except KeyError: ++ pass ++ ++ ++def set_attr_bool(node, key, value): ++ try: ++ node.attr[key].CopyFrom(attr_value_pb2.AttrValue(b=value)) ++ except KeyError: ++ pass ++ ++ ++def set_attr_int(node, key, value): ++ try: ++ node.attr[key].CopyFrom(attr_value_pb2.AttrValue(i=value)) ++ except KeyError: ++ pass ++ ++ ++def set_attr_float(node, key, value): ++ try: ++ node.attr[key].CopyFrom(attr_value_pb2.AttrValue(f=value)) ++ except KeyError: ++ pass ++ ++ ++def node_name_from_input(node_name): ++ """Strips off ports and other decorations to get the underlying node name.""" ++ if node_name.startswith("^"): ++ node_name = node_name[1:] ++ m = re.search(r"(.*):\d+$", node_name) ++ if m: ++ node_name = m.group(1) ++ return node_name ++ ++ ++def ensure_tensor_name_has_port(node_name): ++ """Makes sure that a tensor name has :0 if no explicit port exists.""" ++ m = re.search(r"(.*):\d+$", node_name) ++ if m: ++ name_with_port = node_name ++ else: ++ name_with_port = node_name + ":0" ++ return name_with_port ++ ++ ++def unique_node_name_from_input(node_name): ++ """Replaces invalid characters in input names to get a unique node name.""" ++ return node_name.replace(":", "__port__").replace("^", "__hat__") ++ ++ ++def quantize_array(arr, num_buckets): ++ """Quantizes a numpy array. ++ This function maps each scalar in arr to the center of one of num_buckets ++ buckets. For instance, ++ quantize_array([0, 0.3, 0.6, 1], 2) => [0.25, 0.25, 0.75, 0.75] ++ Args: ++ arr: The numpy array to quantize. ++ num_buckets: The number of buckets to map "var" to. ++ Returns: ++ The quantized numpy array. ++ Raises: ++ ValueError: when num_buckets < 1. ++ """ ++ if num_buckets < 1: ++ raise ValueError("num_buckets must be >= 1") ++ arr_max = arr.max() ++ arr_min = arr.min() ++ if arr_max == arr_min: ++ return arr ++ bucket_width = (arr_max - arr_min) / num_buckets ++ # Map scalars to bucket indices. Take special care of max(arr). ++ bucket_indices = np.floor((arr - arr_min) / bucket_width) ++ bucket_indices[bucket_indices == num_buckets] = num_buckets - 1 ++ # Map each scalar to the center of a bucket. ++ arr = arr_min + bucket_width * (bucket_indices + 0.5) ++ return arr ++ ++ ++def quantize_weight_rounded(input_node): ++ """Returns a replacement node for input_node containing bucketed floats.""" ++ input_tensor = input_node.attr["value"].tensor ++ tensor_value = tensor_util.MakeNdarray(input_tensor) ++ shape = input_tensor.tensor_shape ++ # Currently, the parameter FLAGS.bitdepth is used to compute the ++ # number of buckets as 1 << FLAGS.bitdepth, meaning the number of ++ # buckets can only be a power of 2. ++ # This could be fixed by introducing a new parameter, num_buckets, ++ # which would allow for more flexibility in chosing the right model ++ # size/accuracy tradeoff. But I didn't want to add more parameters ++ # to this script than absolutely necessary. ++ num_buckets = 1 << FLAGS.bitdepth ++ tensor_value_rounded = quantize_array(tensor_value, num_buckets) ++ tensor_shape_list = tensor_util.TensorShapeProtoToList(shape) ++ return [ ++ create_constant_node( ++ input_node.name, ++ tensor_value_rounded, ++ dtypes.float32, ++ shape=tensor_shape_list) ++ ] ++ ++ ++def quantize_weight_eightbit(input_node, quantization_mode): ++ """Returns replacement nodes for input_node using the Dequantize op.""" ++ base_name = input_node.name + "_" ++ quint8_const_name = base_name + "quint8_const" ++ min_name = base_name + "min" ++ max_name = base_name + "max" ++ float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) ++ min_value = np.min(float_tensor.flatten()) ++ max_value = np.max(float_tensor.flatten()) ++ # Make sure that the range includes zero. ++ if min_value > 0.0: ++ min_value = 0.0 ++ # min_value == max_value is a tricky case. It can occur for general ++ # tensors, and of course for scalars. The quantized ops cannot deal ++ # with this case, so we set max_value to something else. ++ # It's a tricky question what is the numerically best solution to ++ # deal with this degeneracy. ++ # TODO(petewarden): Better use a tolerance than a hard comparison? ++ if min_value == max_value: ++ if abs(min_value) < 0.000001: ++ max_value = min_value + 1.0 ++ elif min_value > 0: ++ max_value = 2 * min_value ++ else: ++ max_value = min_value / 2.0 ++ ++ sess = session.Session() ++ with sess.as_default(): ++ quantize_op = array_ops.quantize_v2( ++ float_tensor, ++ min_value, ++ max_value, ++ dtypes.quint8, ++ mode=quantization_mode) ++ quint8_tensor = quantize_op[0].eval() ++ min_value = quantize_op[1].eval() ++ max_value = quantize_op[2].eval() ++ shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"] ++ .tensor.tensor_shape) ++ quint8_const_node = create_constant_node( ++ quint8_const_name, quint8_tensor, dtypes.quint8, shape=shape) ++ min_node = create_constant_node(min_name, min_value, dtypes.float32) ++ max_node = create_constant_node(max_name, max_value, dtypes.float32) ++ dequantize_node = create_node("Dequantize", input_node.name, ++ [quint8_const_name, min_name, max_name]) ++ set_attr_dtype(dequantize_node, "T", dtypes.quint8) ++ set_attr_string(dequantize_node, "mode", quantization_mode) ++ return [quint8_const_node, min_node, max_node, dequantize_node] ++ ++# TODO(intel-tf): Current Intel-CPU quantized Conv2D and Matmul supports only ++# signed scaled mode of weight quantization. ++def intel_cpu_quantize_weight_eightbit(input_node, quantization_mode="SCALED"): ++ """Returns replacement of constant weight node. ++ This function creates (i) a quantized constant node, (ii) a float min node ++ (iii) a float max node, and (iv) a dequantize node.""" ++ base_name = input_node.name + "_" ++ qint8_const_name = base_name + "qint8_const" ++ min_name = base_name + "min" ++ max_name = base_name + "max" ++ float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) ++ min_value = np.min(float_tensor.flatten()) ++ max_value = np.max(float_tensor.flatten()) ++ # Same processing of min-max as in quantize_weight_eightbit function. ++ if min_value > 0.0: ++ min_value = 0.0 ++ if min_value == max_value: ++ if abs(min_value) < 0.000001: ++ max_value = min_value + 1.0 ++ elif min_value > 0: ++ max_value = 2 * min_value ++ else: ++ max_value = min_value / 2.0 ++ ++ sess = session.Session() ++ with sess.as_default(): ++ quantize_op = array_ops.quantize_v2( ++ float_tensor, ++ min_value, ++ max_value, ++ dtypes.qint8, ++ mode=quantization_mode, ++ round_mode="HALF_TO_EVEN") ++ qint8_tensor = quantize_op[0].eval() ++ # Updated min-max values should be passed to the next feeding node. ++ min_value = quantize_op[1].eval() ++ max_value = quantize_op[2].eval() ++ shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"] ++ .tensor.tensor_shape) ++ qint8_const_node = create_constant_node( ++ qint8_const_name, qint8_tensor, ++ dtypes.qint8, ++ shape=shape) ++ min_node = create_constant_node(min_name, min_value, dtypes.float32) ++ max_node = create_constant_node(max_name, max_value, dtypes.float32) ++ dequantize_node = create_node("Dequantize", input_node.name, ++ [qint8_const_name, min_name, max_name]) ++ set_attr_dtype(dequantize_node, "T", dtypes.quint8) ++ set_attr_string(dequantize_node, "mode", b'SCALED') ++ return [qint8_const_node, min_node, max_node, dequantize_node] ++ ++EightbitizeRecursionState = collections.namedtuple( ++ "EightbitizeRecursionState", ++ ["already_visited", "output_node_stack", "merged_with_fake_quant"]) ++ ++ ++class GraphRewriter(object): ++ """Takes a float graph, and rewrites it in quantized form.""" ++ ++ def __init__(self, ++ input_graph, ++ mode, ++ quantized_input_range, ++ fallback_quantization_range=None, ++ intel_cpu_eightbitize=False): ++ """Sets up the class to rewrite a float graph. ++ Args: ++ input_graph: A float graph to transform. ++ mode: A string controlling how quantization is performed - ++ round, quantize, eightbit, or weights. ++ quantized_input_range: if set, assume the input is ++ quantized and represents the range ++ [quantized_input_range[0], quantized_input_range[1]] ++ fallback_quantization_range: if set, then for nodes where the quantization ++ range can't be inferred from the graph, use the range ++ [fallback_quantization_range[0], fallback_quantization_range[1]) instead ++ of using a RequantizationRange node in the graph. ++ Raises: ++ ValueError: Two nodes with the same name were found in the graph. ++ """ ++ self.input_graph = input_graph ++ self.nodes_map = self.create_nodes_map(input_graph) ++ self.output_graph = None ++ self.mode = mode ++ self.intel_cpu_eightbitize = intel_cpu_eightbitize ++ self.final_node_renames = {} ++ self.quantized_node_dict = {} ++ if quantized_input_range: ++ self.input_range = (quantized_input_range[0], quantized_input_range[1]) ++ if self.input_range[0] >= self.input_range[1]: ++ raise ValueError("Invalid quantized_input_range: [%s,%s]" % ++ self.input_range) ++ if self.mode != "eightbit": ++ raise ValueError( ++ "quantized_input_range can only be specified in eightbit mode") ++ else: ++ self.input_range = None ++ ++ if fallback_quantization_range: ++ self.fallback_quantization_range = [ ++ fallback_quantization_range[0], fallback_quantization_range[1] ++ ] ++ if (self.fallback_quantization_range[0] >= ++ self.fallback_quantization_range[1]): ++ raise ValueError("Invalid fallback_quantization_range: [%s,%s]" % ++ self.fallback_quantization_range) ++ if self.mode != "eightbit": ++ raise ValueError("fallback_quantization_range can only be " ++ "specified in eightbit mode") ++ else: ++ self.fallback_quantization_range = None ++ ++ # Data that is valid only during the recursive call to rewrite the graph. ++ self.state = None ++ ++ def create_nodes_map(self, graph): ++ """Builds a mapping of node names to their defs from the graph.""" ++ nodes_map = {} ++ for node in graph.node: ++ if node.name not in nodes_map.keys(): ++ nodes_map[node.name] = node ++ else: ++ raise ValueError("Duplicate node names detected.") ++ return nodes_map ++ ++ def rewrite(self, output_node_names): ++ """Triggers rewriting of the float graph. ++ Args: ++ output_node_names: A list of names of the nodes that produce the final ++ results. ++ Returns: ++ A quantized version of the float graph. ++ """ ++ self.output_graph = graph_pb2.GraphDef() ++ output_nodes = [ ++ self.nodes_map[output_node_name] ++ for output_node_name in output_node_names ++ ] ++ if self.mode == "round": ++ self.already_visited = {} ++ for output_node in output_nodes: ++ self.round_nodes_recursively(output_node) ++ elif self.mode == "quantize": ++ self.already_visited = {} ++ self.already_quantized = {} ++ for output_node in output_nodes: ++ self.quantize_nodes_recursively(output_node) ++ elif self.mode == "eightbit": ++ # When function graph_util.remove_training_nodes remove ++ # "Identity" ops in the graph, it does not replace the ++ # control input properly, so the control input becomes ++ # the regular input. Disable this function until the ++ # the bug is fixed. ++ self.set_input_graph(graph_util.remove_training_nodes( ++ self.input_graph, protected_nodes=output_node_names)) ++ output_nodes = [ ++ self.nodes_map[output_node_name] ++ for output_node_name in output_node_names ++ ] ++ ++ self.state = EightbitizeRecursionState( ++ already_visited={}, output_node_stack=[], merged_with_fake_quant={}) ++ ++ if self.intel_cpu_eightbitize: ++ # TODO(intel-tf): Enables fused quantized node for intel cpu. ++ for output_node in output_nodes: ++ # Intiailize output_node_stack with output node. ++ # Each element in the stack is a mutable list containing ++ # [parent_node, index_to_parent, quantization_flag, fusion_flag]. ++ # In case of root node, make self as parent. ++ self.state.output_node_stack.append( ++ [output_node, None, False, False]) ++ self.intel_cpu_eightbitize_nodes_recursively(output_node) ++ self.state.output_node_stack.pop() ++ else: ++ for output_node in output_nodes: ++ self.eightbitize_nodes_recursively(output_node) ++ ++ self.state = None ++ if self.input_range: ++ self.add_output_graph_node( ++ create_constant_node("quantized_input_min_value", self.input_range[ ++ 0], dtypes.float32, [])) ++ self.add_output_graph_node( ++ create_constant_node("quantized_input_max_value", self.input_range[ ++ 1], dtypes.float32, [])) ++ if self.fallback_quantization_range: ++ self.add_output_graph_node( ++ create_constant_node("fallback_quantization_min_value", ++ self.fallback_quantization_range[0], ++ dtypes.float32, [])) ++ self.add_output_graph_node( ++ create_constant_node("fallback_quantization_max_value", ++ self.fallback_quantization_range[1], ++ dtypes.float32, [])) ++ if True: ++ self.output_graph = self.remove_redundant_quantization( ++ self.output_graph) ++ self.remove_dead_nodes(output_node_names) ++ self.apply_final_node_renames() ++ elif self.mode == "weights": ++ self.output_graph = self.quantize_weights(self.input_graph, ++ b"MIN_COMBINED") ++ self.remove_dead_nodes(output_node_names) ++ elif self.mode == "weights_rounded": ++ self.output_graph = self.quantize_weights(self.input_graph, self.mode) ++ self.remove_dead_nodes(output_node_names) ++ else: ++ print("Bad mode - " + self.mode + ".") ++ return self.output_graph ++ ++ def round_nodes_recursively(self, current_node): ++ """The entry point for simple rounding quantization.""" ++ if (current_node.name in self.already_visited ++ ) and self.already_visited[current_node.name]: ++ return ++ self.already_visited[current_node.name] = True ++ for input_node_name in current_node.input: ++ input_node_name = node_name_from_input(input_node_name) ++ input_node = self.nodes_map[input_node_name] ++ self.round_nodes_recursively(input_node) ++ nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"] ++ if any(current_node.op in s for s in nodes_to_quantize): ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ new_node.name = current_node.name + "_original" ++ self.add_output_graph_node(new_node) ++ levels = 1 << FLAGS.bitdepth ++ constant_name = current_node.name + "_round_depth" ++ constant_tensor = constant_op.constant( ++ levels, dtype=dtypes.int32, name=constant_name) ++ constant_node = constant_tensor.op.node_def ++ self.add_output_graph_node(constant_node) ++ quantize_node = node_def_pb2.NodeDef() ++ quantize_node.op = "RoundToSteps" ++ quantize_node.name = current_node.name ++ quantize_node.input.extend([current_node.name + "_original"]) ++ quantize_node.input.extend([constant_node.name]) ++ self.add_output_graph_node(quantize_node) ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ ++ def quantize_nodes_recursively(self, current_node): ++ """The entry point for quantizing nodes to eight bit and back.""" ++ if self.already_visited[current_node.name]: ++ return ++ self.already_visited[current_node.name] = True ++ for input_node_name in current_node.input: ++ input_node_name = node_name_from_input(input_node_name) ++ input_node = self.nodes_map[input_node_name] ++ self.quantize_nodes_recursively(input_node) ++ nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"] ++ if any(current_node.op in s for s in nodes_to_quantize): ++ for input_name in current_node.input: ++ input_name = node_name_from_input(input_name) ++ input_node = self.nodes_map[input_name] ++ self.quantize_node(input_node) ++ self.quantize_node(current_node) ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ ++ def quantize_node(self, input_node): ++ """Handles quantizing a single node.""" ++ input_name = input_node.name ++ if input_name in self.already_quantized: ++ return ++ self.already_quantized[input_name] = True ++ original_input_name = input_name + "_original" ++ reshape_name = input_name + "_reshape" ++ reshape_dims_name = input_name + "_reshape_dims" ++ max_name = input_name + "_max" ++ min_name = input_name + "_min" ++ dims_name = input_name + "_dims" ++ quantize_name = input_name + "_quantize" ++ dequantize_name = input_name ++ original_input_node = node_def_pb2.NodeDef() ++ original_input_node.CopyFrom(input_node) ++ original_input_node.name = original_input_name ++ self.add_output_graph_node(original_input_node) ++ reshape_dims_node = create_constant_node(reshape_dims_name, -1, ++ dtypes.int32, [1]) ++ self.add_output_graph_node(reshape_dims_node) ++ reshape_node = create_node("Reshape", reshape_name, ++ [original_input_name, reshape_dims_name]) ++ set_attr_dtype(reshape_node, "T", dtypes.float32) ++ self.add_output_graph_node(reshape_node) ++ dims_node = create_constant_node(dims_name, 0, dtypes.int32, [1]) ++ self.add_output_graph_node(dims_node) ++ max_node = create_node("Max", max_name, [reshape_name, dims_name]) ++ set_attr_dtype(max_node, "T", dtypes.float32) ++ set_attr_bool(max_node, "keep_dims", False) ++ self.add_output_graph_node(max_node) ++ min_node = create_node("Min", min_name, [reshape_name, dims_name]) ++ set_attr_dtype(min_node, "T", dtypes.float32) ++ set_attr_bool(min_node, "keep_dims", False) ++ self.add_output_graph_node(min_node) ++ quantize_node = create_node("Quantize", quantize_name, ++ [original_input_name, min_name, max_name]) ++ set_attr_dtype(quantize_node, "T", dtypes.quint8) ++ set_attr_string(quantize_node, "mode", b"MIN_FIRST") ++ self.add_output_graph_node(quantize_node) ++ dequantize_node = create_node("Dequantize", dequantize_name, ++ [quantize_name, min_name, max_name]) ++ set_attr_dtype(dequantize_node, "T", dtypes.quint8) ++ set_attr_string(dequantize_node, "mode", b"MIN_FIRST") ++ self.add_output_graph_node(dequantize_node) ++ ++ def should_merge_with_fake_quant_node(self): ++ """Should the current node merge with self.state.output_node_stack[-1]?""" ++ if not self.state.output_node_stack: ++ return False ++ top = self.state.output_node_stack[-1] ++ return top[1] == 0 and top[0].op in ["FakeQuantWithMinMaxVars"] ++ ++ def should_quantize_const(self, node): ++ if not self.state.output_node_stack: ++ return False ++ top = self.state.output_node_stack[-1] ++ if not top[2]: ++ return False ++ dtype = dtypes.as_dtype(node.attr["dtype"].type) ++ assert dtype == dtypes.float32, ( ++ "Failed to quantized constant %s of type %s" % (node.name, dtype)) ++ return True ++ ++ def eightbitize_nodes_recursively(self, current_node): ++ """The entry point for transforming a graph into full eight bit.""" ++ if current_node.name in self.state.already_visited: ++ if (self.should_merge_with_fake_quant_node() or ++ current_node.name in self.state.merged_with_fake_quant): ++ raise ValueError("Unsupported graph structure: output of node %s " ++ "is processed by a FakeQuant* node and should have " ++ "no other outputs.", current_node.name) ++ return ++ self.state.already_visited[current_node.name] = True ++ ++ for i, input_node_name in enumerate(current_node.input): ++ quantize_input = False ++ if current_node.op in ("MatMul", "Conv2D", "BiasAdd", "MaxPool", ++ "AvgPool", "Relu", "Relu6", ++ "BatchNormWithGlobalNormalization"): ++ quantize_input = True ++ elif current_node.op == "Concat" and i > 0: ++ quantize_input = ( ++ dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32) ++ elif current_node.op == "Reshape" and i == 0: ++ quantize_input = ( ++ dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32) ++ ++ self.state.output_node_stack.append((current_node, i, quantize_input)) ++ ++ input_node_name = node_name_from_input(input_node_name) ++ input_node = self.nodes_map[input_node_name] ++ self.eightbitize_nodes_recursively(input_node) ++ ++ self.state.output_node_stack.pop() ++ ++ if current_node.op == "MatMul": ++ self.eightbitize_mat_mul_node(current_node) ++ elif current_node.op == "Conv2D": ++ self.eightbitize_conv_node(current_node) ++ elif current_node.op == "BiasAdd": ++ self.eightbitize_bias_add_node(current_node) ++ elif current_node.op == "MaxPool" or current_node.op == "AvgPool": ++ self.eightbitize_single_input_tensor_node(current_node, ++ self.add_pool_function) ++ elif current_node.op == "Relu" or current_node.op == "Relu6": ++ self.eightbitize_single_input_tensor_node(current_node, ++ self.add_relu_function) ++ elif (current_node.op == "Concat" and ++ dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32): ++ self.eightbitize_concat_node(current_node) ++ elif current_node.op == "BatchNormWithGlobalNormalization": ++ self.eightbitize_batch_norm_node(current_node) ++ elif (current_node.op == "Reshape" and ++ dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32): ++ self.eightbitize_reshape_node(current_node) ++ elif (self.input_range and ++ current_node.op in ("Placeholder", "PlaceholderV2")): ++ self.eightbitize_placeholder_node(current_node) ++ elif current_node.op == "FakeQuantWithMinMaxVars": ++ # It will have been merged into the underlying node. ++ pass ++ elif current_node.op == "Const": ++ if self.should_quantize_const(current_node): ++ for n in quantize_weight_eightbit(current_node, b"MIN_FIRST"): ++ self.add_output_graph_node(n) ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ ++ ################################################################### ++ # Note: if more cases are added here, you may need to update the op ++ # name lists in the loop over children at the start of the function. ++ ################################################################### ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ ++ if (self.should_merge_with_fake_quant_node() and ++ current_node.name not in self.state.merged_with_fake_quant): ++ raise ValueError( ++ "FakeQuant* node %s failed to merge with node %s of type %s" % ++ (self.state.output_node_stack[-1][0], current_node.name, ++ current_node.op)) ++ ++ # TODO(intel-tf): Quantized Conv2D could be fused with few other succeeding ++ # ops. Current support is for BiasAdd and Relu. Future implementation will ++ # include: ++ # (i) Conv2D + {BiasAdd} + Relu + Add + Relu ++ # (ii) Conv2D + {BiasAdd} + Relu + Add ++ # (ii) Conv2D + {BiasAdd} + Add + Relu ++ # (iii) Conv2D + {BiasAdd} + Add ++ def intel_cpu_eightbitize_conv_node(self, original_node, bias_node=None, ++ bias_add_name=None, add_node_name=None, ++ relu_node_name=None): ++ """Replaces a Conv2D node with the eight bit equivalent sub-graph.""" ++ all_input_names = self.add_eightbit_prologue_nodes(original_node) ++ control_input_names = [] ++ real_input_names = [] ++ for input_name in all_input_names: ++ if input_name[0] == '^': ++ control_input_names.append(input_name) ++ else: ++ real_input_names.append(input_name) ++ ++ if bias_node and add_node_name and relu_node_name: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(bias_node) ++ self.add_output_graph_node(new_node) ++ all_input_names = real_input_names[:2] + [bias_node.name] + \ ++ real_input_names[2:] + [add_node_name] + control_input_names ++ quantized_conv_name = original_node.name + "_eightbit_quantized_conv" ++ quantized_conv_node = create_node("QuantizedConv2DWithBiasSumAndRelu", ++ quantized_conv_name, all_input_names) ++ elif bias_node and (not add_node_name) and relu_node_name: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(bias_node) ++ self.add_output_graph_node(new_node) ++ all_input_names = real_input_names[:2] + [bias_node.name] + \ ++ real_input_names[2:] + control_input_names ++ quantized_conv_name = original_node.name + "_eightbit_quantized_conv" ++ quantized_conv_node = create_node("QuantizedConv2DWithBiasAndRelu", ++ quantized_conv_name, all_input_names) ++ elif bias_node and bias_add_name and \ ++ (not add_node_name) and (not relu_node_name): ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(bias_node) ++ self.add_output_graph_node(new_node) ++ all_input_names = real_input_names[:2] + [bias_node.name] + \ ++ real_input_names[2:] + control_input_names ++ quantized_conv_name = original_node.name + "_eightbit_quantized_conv" ++ quantized_conv_node = create_node("QuantizedConv2DWithBias", ++ quantized_conv_name, all_input_names) ++ else: ++ quantized_conv_name = original_node.name + "_eightbit_quantized_conv" ++ quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name, ++ all_input_names) ++ copy_attr(quantized_conv_node, "strides", original_node.attr["strides"]) ++ copy_attr(quantized_conv_node, "padding", original_node.attr["padding"]) ++ copy_attr(quantized_conv_node, "dilations", original_node.attr["dilations"]) ++ set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8) ++ set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) ++ set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) ++ self.add_output_graph_node(quantized_conv_node) ++ quantize_down_name = self.add_quantize_down_nodes(original_node, ++ quantized_conv_name) ++ if bias_node and relu_node_name: ++ self.add_dequantize_result_node(quantize_down_name, relu_node_name) ++ elif bias_node and bias_add_name and \ ++ (not add_node_name) and (not relu_node_name): ++ self.add_dequantize_result_node(quantize_down_name, bias_add_name) ++ else: ++ self.add_dequantize_result_node(quantize_down_name, original_node.name) ++ ++ # TODO(intel-tf): To check whether Conv2D is fed by relu directly or via ++ # pooling ops. This is required as intel cpu requires input tensor for Conv2D ++ # to be non-negative. ++ def intel_cpu_find_relu_recursively(self, current_node): ++ """Helper function to check if Conv2D is fed by Relu.""" ++ if current_node.op == "Relu" or current_node.op == "Relu6": ++ return True ++ else: ++ first_input_node_name = node_name_from_input(current_node.input[0]) ++ input_node = self.nodes_map[first_input_node_name] ++ if input_node.op in ("ConcatV2", "MaxPool", "AvgPool", "Relu", "Relu6"): ++ return self.intel_cpu_find_relu_recursively(input_node) ++ else: ++ return False ++ ++ # def intel_cpu_find_switch_input_any(self, current_node): ++ # should_quantize_concat = True ++ # for input_name in current_node.input: ++ # if self.nodes_map[node_name_from_input(input_name)].op == "Switch": ++ # should_quantize_concat = False ++ # break ++ # return should_quantize_concat ++ ++ # TODO(intel-tf): We leave the output graph partially quantized for ++ # intel cpu. Current quantization support is for Conv2D and its fusion. ++ # More quantized operations will be included as more implementations are ++ # completed. ++ def intel_cpu_eightbitize_nodes_recursively(self, current_node): ++ """The entry point for transforming a graph into full eight bit.""" ++ if current_node.name in self.state.already_visited: ++ if (self.should_merge_with_fake_quant_node() or ++ current_node.name in self.state.merged_with_fake_quant): ++ raise ValueError("Unsupported graph structure: output of node %s " ++ "is processed by a FakeQuant* node and should have " ++ "no other outputs.", current_node.name) ++ return ++ self.state.already_visited[current_node.name] = True ++ quantize_input, should_quantize_conv, \ ++ fuse_with_conv = (False, False, False) ++ ++ if current_node.op == "Conv2D": ++ should_quantize_conv = self.intel_cpu_find_relu_recursively(current_node) ++ if current_node.op == "ConcatV2": ++ should_quantize_concat = not ('map/while' in current_node.name) ++ # should_quantize_concat = self.intel_cpu_find_switch_input_any(current_node) ++ ++ inputs = list(enumerate(current_node.input)) ++ if current_node.op in ("AddN", "Add"): ++ inputs = reversed(inputs) ++ ++ for i, input_node_name in inputs: ++ input_node_name = node_name_from_input(input_node_name) ++ input_node = self.nodes_map[input_node_name] ++ ++ if should_quantize_conv and i == 1 and input_node.op == "Const": ++ quantize_input = True ++ ++ self.state.output_node_stack.append([current_node, i, quantize_input, ++ fuse_with_conv]) ++ self.intel_cpu_eightbitize_nodes_recursively(input_node) ++ self.state.output_node_stack.pop() ++ ++ if current_node.op == "Conv2D" and should_quantize_conv and quantize_input: ++ # match pattern for fusion with bias and relu ++ grand_parent, parent = self.state.output_node_stack[-2:] ++ if parent[0].op == "BiasAdd" and \ ++ (grand_parent[0].op == "Relu" or grand_parent[0].op == "Relu6"): ++ self.state.output_node_stack[-2][3] = True # BiasAdd to be fused ++ self.state.output_node_stack[-3][3] = True # Relu to be fused ++ bias_node_name = node_name_from_input(parent[0].input[1]) ++ bias_node = self.nodes_map[bias_node_name] ++ self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None, ++ None, grand_parent[0].name) ++ elif parent[0].op == "BiasAdd" and grand_parent[0].op in ("AddN", "Add"): ++ grand_grand_parent = self.state.output_node_stack[-3] ++ if grand_grand_parent[0].op in ("Relu", "Relu6") \ ++ and (not self.state.output_node_stack[-3][3]) \ ++ and (not self.state.output_node_stack[-4][3]): ++ self.state.output_node_stack[-2][3] = True # BiasAdd to be fused ++ self.state.output_node_stack[-3][3] = True # AddN to be fused ++ self.state.output_node_stack[-4][3] = True # Relu to be fused ++ bias_node_name = node_name_from_input(parent[0].input[1]) ++ bias_node = self.nodes_map[bias_node_name] ++ add_node_name = node_name_from_input(grand_parent[0].input[0]) ++ self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None, ++ add_node_name, ++ grand_grand_parent[0].name) ++ elif (not self.state.output_node_stack[-2][3]): # Fuse BiasAdd then ++ self.state.output_node_stack[-2][3] = True # BiasAdd to be fused ++ bias_node_name = node_name_from_input(parent[0].input[1]) ++ bias_node = self.nodes_map[bias_node_name] ++ self.intel_cpu_eightbitize_conv_node(current_node, bias_node, ++ parent[0].name) ++ else: ++ self.intel_cpu_eightbitize_conv_node(current_node) ++ elif parent[0].op == "BiasAdd" and \ ++ (not self.state.output_node_stack[-2][3]): ++ self.state.output_node_stack[-2][3] = True # BiasAdd to be fused ++ bias_node_name = node_name_from_input(parent[0].input[1]) ++ bias_node = self.nodes_map[bias_node_name] ++ self.intel_cpu_eightbitize_conv_node(current_node, bias_node, ++ parent[0].name) ++ else: ++ self.intel_cpu_eightbitize_conv_node(current_node) ++ elif current_node.op == "BiasAdd" and \ ++ self.state.output_node_stack[-1][3] == True: ++ pass # This op is already processed by fused quantization ++ elif (current_node.op == "Relu" or current_node.op == "Relu6") \ ++ and self.state.output_node_stack[-1][3] == True: ++ pass # This op is already processed by fused quantization ++ elif current_node.op in ("AddN", "Add") and \ ++ self.state.output_node_stack[-1][3] == True: ++ pass # AddN op is already processed by fused quatization ++ elif current_node.op == "MaxPool" or current_node.op == "AvgPool": ++ self.eightbitize_single_input_tensor_node(current_node, ++ self.add_pool_function) ++ elif (current_node.op == "ConcatV2" and should_quantize_concat and ++ dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32): ++ self.eightbitize_concatv2_node(current_node) ++ elif current_node.op == "Const": ++ parent = self.state.output_node_stack[-1] ++ if parent[0].op == "Conv2D" and parent[2]: ++ for n in intel_cpu_quantize_weight_eightbit(current_node, b"SCALED"): ++ self.add_output_graph_node(n) ++ elif parent[0].op == "BiasAdd" and \ ++ self.state.output_node_stack[-2][3]: ++ pass # This constant is already process by fused quantization ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ else: ++ new_node = node_def_pb2.NodeDef() ++ new_node.CopyFrom(current_node) ++ self.add_output_graph_node(new_node) ++ ++ if (self.should_merge_with_fake_quant_node() and ++ current_node.name not in self.state.merged_with_fake_quant): ++ raise ValueError( ++ "FakeQuant* node %s failed to merge with node %s of type %s" % ++ (self.state.output_node_stack[-1][0], current_node.name, ++ current_node.op)) ++ ++ def add_eightbit_prologue_nodes(self, original_node): ++ """Adds input conversion nodes to handle quantizing the underlying node.""" ++ namespace_prefix = original_node.name + "_eightbit" ++ ++ # Use the name of the first input as the control input name ++ # for reshape_dim and reduction_dim to slove the different frame issue ++ # in quantized graph ++ reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( ++ namespace_prefix, node_name_from_input(original_node.input[0])) ++ input_names = [] ++ min_max_names = [] ++ for original_input_name in original_node.input: ++ # Do not quantize control input ++ if original_input_name[0] == '^': ++ continue ++ quantize_input_name, min_input_name, max_input_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_input_name, ++ reshape_dims_name, ++ reduction_dims_name)) ++ input_names.append(quantize_input_name) ++ min_max_names.append(min_input_name) ++ min_max_names.append(max_input_name) ++ all_input_names = [] ++ all_input_names.extend(input_names) ++ all_input_names.extend(min_max_names) ++ ++ # add back control input name ++ for original_input_name in original_node.input: ++ if original_input_name[0] == '^': ++ all_input_names.append(original_input_name) ++ ++ return all_input_names ++ ++ def add_common_quantization_nodes(self, namespace_prefix, control_input_name=None): ++ """Builds constant nodes needed for quantization of inputs.""" ++ reshape_dims_name = namespace_prefix + "_reshape_dims" ++ reduction_dims_name = namespace_prefix + "_reduction_dims" ++ ++ reshape_dims_node = create_constant_node(reshape_dims_name, -1, ++ dtypes.int32, [1]) ++ if control_input_name: ++ reshape_dims_node.input.append("^" + control_input_name) ++ self.add_output_graph_node(reshape_dims_node) ++ reduction_dims_node = create_constant_node(reduction_dims_name, 0, ++ dtypes.int32, [1]) ++ if control_input_name: ++ reduction_dims_node.input.append("^" + control_input_name) ++ self.add_output_graph_node(reduction_dims_node) ++ return reshape_dims_name, reduction_dims_name ++ ++ def eightbitize_input_to_node(self, namespace_prefix, original_input_name, ++ reshape_dims_name, reduction_dims_name): ++ """Takes one float input to an op, and converts it to quantized form.""" ++ unique_input_name = unique_node_name_from_input(original_input_name) ++ if unique_input_name in self.quantized_node_dict: ++ quantized_tuple = self.quantized_node_dict[unique_input_name]; ++ return quantized_tuple[0], quantized_tuple[1], quantized_tuple[2] ++ ++ reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name ++ min_input_name = namespace_prefix + "_min_" + unique_input_name ++ max_input_name = namespace_prefix + "_max_" + unique_input_name ++ quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name ++ reshape_input_node = create_node("Reshape", reshape_input_name, ++ [original_input_name, reshape_dims_name]) ++ set_attr_dtype(reshape_input_node, "T", dtypes.float32) ++ self.add_output_graph_node(reshape_input_node) ++ min_input_node = create_node("Min", min_input_name, ++ [reshape_input_name, reduction_dims_name]) ++ set_attr_dtype(min_input_node, "T", dtypes.float32) ++ set_attr_bool(min_input_node, "keep_dims", False) ++ self.add_output_graph_node(min_input_node) ++ max_input_node = create_node("Max", max_input_name, ++ [reshape_input_name, reduction_dims_name]) ++ set_attr_dtype(max_input_node, "T", dtypes.float32) ++ set_attr_bool(max_input_node, "keep_dims", False) ++ self.add_output_graph_node(max_input_node) ++ quantize_input_node = create_node( ++ "QuantizeV2", quantize_input_name, ++ [original_input_name, min_input_name, max_input_name]) ++ set_attr_dtype(quantize_input_node, "T", dtypes.quint8) ++ set_attr_string(quantize_input_node, "mode", ++ b"SCALED" if self.intel_cpu_eightbitize else b"MIN_FIRST") ++ set_attr_string(quantize_input_node, "round_mode", ++ b"HALF_TO_EVEN" if self.intel_cpu_eightbitize ++ else b"HALF_AWAY_FROM_ZERO") ++ self.add_output_graph_node(quantize_input_node) ++ min_output_name = quantize_input_name + ":1" ++ max_output_name = quantize_input_name + ":2" ++ ++ self.quantized_node_dict[unique_input_name] = (quantize_input_name, ++ min_output_name, max_output_name) ++ return quantize_input_name, min_output_name, max_output_name ++ ++ def add_quantize_down_nodes(self, original_node, quantized_output_name): ++ quantized_outputs = [ ++ quantized_output_name, quantized_output_name + ":1", ++ quantized_output_name + ":2" ++ ] ++ min_max_inputs = None ++ if self.should_merge_with_fake_quant_node(): ++ # Use the inputs to the FakeQuantWithMinMaxVars node as the inputs to ++ # Requantize. ++ fake_quant_node = self.state.output_node_stack[-1][0] ++ min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]] ++ assert original_node.name not in self.state.merged_with_fake_quant ++ self.state.merged_with_fake_quant[original_node.name] = True ++ elif self.fallback_quantization_range: ++ min_max_inputs = [ ++ "fallback_quantization_min_value:0", ++ "fallback_quantization_max_value:0" ++ ] ++ else: ++ # Add a RequantizationRange node for finding the min and max values. ++ requant_range_node = create_node( ++ "RequantizationRange", original_node.name + "_eightbit_requant_range", ++ quantized_outputs) ++ set_attr_dtype(requant_range_node, "Tinput", dtypes.qint32) ++ self.add_output_graph_node(requant_range_node) ++ min_max_inputs = [ ++ requant_range_node.name + ":0", requant_range_node.name + ":1" ++ ] ++ requantize_node = create_node("Requantize", ++ original_node.name + "_eightbit_requantize", ++ quantized_outputs + min_max_inputs) ++ set_attr_dtype(requantize_node, "Tinput", dtypes.qint32) ++ set_attr_dtype(requantize_node, "out_type", dtypes.quint8) ++ self.add_output_graph_node(requantize_node) ++ return requantize_node.name ++ ++ def add_dequantize_result_node(self, ++ quantized_output_name, ++ original_node_name, ++ min_tensor_index=1): ++ min_max_inputs = [ ++ "%s:%s" % (quantized_output_name, min_tensor_index), ++ "%s:%s" % (quantized_output_name, (min_tensor_index + 1)) ++ ] ++ dequantize_name = original_node_name ++ if self.should_merge_with_fake_quant_node(): ++ fake_quant_node = self.state.output_node_stack[-1][0] ++ if original_node_name not in self.state.merged_with_fake_quant: ++ min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]] ++ self.state.merged_with_fake_quant[original_node_name] = True ++ dequantize_name = fake_quant_node.name ++ ++ dequantize_node = create_node( ++ "Dequantize", dequantize_name, ++ [quantized_output_name, min_max_inputs[0], min_max_inputs[1]]) ++ set_attr_dtype(dequantize_node, "T", dtypes.quint8) ++ set_attr_string(dequantize_node, "mode", b"MIN_FIRST") ++ self.add_output_graph_node(dequantize_node) ++ ++ def eightbitize_mat_mul_node(self, original_node): ++ """Replaces a MatMul node with the eight bit equivalent sub-graph.""" ++ quantized_mat_mul_name = original_node.name + "_eightbit_quantized_mat_mul" ++ all_input_names = self.add_eightbit_prologue_nodes(original_node) ++ quantized_mat_mul_node = create_node("QuantizedMatMul", ++ quantized_mat_mul_name, ++ all_input_names) ++ set_attr_dtype(quantized_mat_mul_node, "T1", dtypes.quint8) ++ set_attr_dtype(quantized_mat_mul_node, "T2", dtypes.quint8) ++ set_attr_dtype(quantized_mat_mul_node, "Toutput", dtypes.qint32) ++ copy_attr(quantized_mat_mul_node, "transpose_a", ++ original_node.attr["transpose_a"]) ++ copy_attr(quantized_mat_mul_node, "transpose_b", ++ original_node.attr["transpose_b"]) ++ self.add_output_graph_node(quantized_mat_mul_node) ++ quantize_down_name = self.add_quantize_down_nodes(original_node, ++ quantized_mat_mul_name) ++ self.add_dequantize_result_node(quantize_down_name, original_node.name) ++ ++ def eightbitize_conv_node(self, original_node): ++ """Replaces a Conv2D node with the eight bit equivalent sub-graph.""" ++ all_input_names = self.add_eightbit_prologue_nodes(original_node) ++ quantized_conv_name = original_node.name + "_eightbit_quantized_conv" ++ quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name, ++ all_input_names) ++ copy_attr(quantized_conv_node, "strides", original_node.attr["strides"]) ++ copy_attr(quantized_conv_node, "padding", original_node.attr["padding"]) ++ set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8) ++ set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.quint8) ++ set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) ++ self.add_output_graph_node(quantized_conv_node) ++ quantize_down_name = self.add_quantize_down_nodes(original_node, ++ quantized_conv_name) ++ self.add_dequantize_result_node(quantize_down_name, original_node.name) ++ ++ def eightbitize_bias_add_node(self, original_node): ++ """Replaces a BiasAdd node with the eight bit equivalent sub-graph.""" ++ quantized_bias_add_name = ( ++ original_node.name + "_eightbit_quantized_bias_add") ++ all_input_names = self.add_eightbit_prologue_nodes(original_node) ++ quantized_bias_add_node = create_node("QuantizedBiasAdd", ++ quantized_bias_add_name, ++ all_input_names) ++ set_attr_dtype(quantized_bias_add_node, "T1", dtypes.quint8) ++ set_attr_dtype(quantized_bias_add_node, "T2", dtypes.quint8) ++ set_attr_dtype(quantized_bias_add_node, "out_type", dtypes.qint32) ++ self.add_output_graph_node(quantized_bias_add_node) ++ quantize_down_name = self.add_quantize_down_nodes(original_node, ++ quantized_bias_add_name) ++ self.add_dequantize_result_node(quantize_down_name, original_node.name) ++ ++ def eightbitize_single_input_tensor_node(self, original_node, ++ add_op_function): ++ """Replaces a single-tensor node with the eight bit equivalent sub-graph. ++ Converts a node like this: ++ Shape(f) Input(f) ++ | | ++ +--------v v ++ Operation ++ | ++ v ++ (f) ++ Into a quantized equivalent: ++ Input(f) ReshapeDims ++ +------v v-------------+ ++ | Reshape ++ | | ++ | | ReductionDims ++ | +-----+ | ++ | | +---c---------+ ++ | v v v v-------+ ++ | Min Max ++ | +----+ | ++ v v v--------+ ++ Quantize ++ | ++ v ++ QuantizedOperation ++ | | | ++ v v v ++ Dequantize ++ | ++ v ++ (f) ++ Args: ++ original_node: Float node to be converted. ++ add_op_function: Function to create the actual node. ++ Returns: ++ Subgraph representing the quantized version of the original node. ++ """ ++ quantized_op_name = original_node.name + "_eightbit_quantized" ++ quantized_op_type = "Quantized" + original_node.op ++ all_input_names = self.add_eightbit_prologue_nodes(original_node) ++ quantized_op_node = create_node(quantized_op_type, quantized_op_name, ++ all_input_names) ++ add_op_function(original_node, quantized_op_node) ++ self.add_output_graph_node(quantized_op_node) ++ self.add_dequantize_result_node(quantized_op_name, original_node.name) ++ ++ def add_pool_function(self, original_node, quantized_op_node): ++ set_attr_dtype(quantized_op_node, "T", dtypes.quint8) ++ copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"]) ++ copy_attr(quantized_op_node, "strides", original_node.attr["strides"]) ++ copy_attr(quantized_op_node, "padding", original_node.attr["padding"]) ++ ++ def add_relu_function(self, unused_arg_node, quantized_op_node): ++ set_attr_dtype(quantized_op_node, "Tinput", dtypes.quint8) ++ ++ def eightbitize_concat_node(self, original_node): ++ """Replaces a Concat node with the eight bit equivalent sub-graph. ++ Converts a node like this: ++ Shape(f) Input0(f) Input1(f) ++ | | | ++ +--------v v v----------+ ++ Concat ++ | ++ v ++ (f) ++ Into a quantized equivalent: ++ Shape(f) Input0(f) ReshapeDims Input1(f) ++ | +------v v--------------+------------------v v------+ ++ | | Reshape Reshape | ++ | | | | | ++ | | | ReductionDims | | ++ | | +------+ | +--------+ | ++ | | | +---c---------+-----------c-----+ | | ++ | | +v v v v-------+---------v v v v+ | ++ | | Min Max Min Max | ++ | | +----+ | | +-----+ | ++ | v v v--------+ +----------v v v ++ | Quantize Quantize ++ | +------------------+ +----------------------+ ++ +-------------------------------+ | | ++ v v v ++ QuantizedConcat ++ | | | ++ v v v ++ Dequantize ++ | ++ v ++ (f) ++ Args: ++ original_node: Float node to be converted. ++ Returns: ++ Subgraph representing the quantized version of the original node. ++ """ ++ namespace_prefix = original_node.name + "_eightbit" ++ quantized_concat_name = namespace_prefix + "_quantized_concat" ++ reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( ++ namespace_prefix, node_name_from_input(original_node.input[1])) ++ shape_input_name = original_node.input[0] ++ original_inputs = original_node.input[1:] ++ input_names = [] ++ min_names = [] ++ max_names = [] ++ for original_input_name in original_inputs: ++ quantize_input_name, min_input_name, max_input_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_input_name, ++ reshape_dims_name, ++ reduction_dims_name)) ++ input_names.append(quantize_input_name) ++ min_names.append(min_input_name) ++ max_names.append(max_input_name) ++ all_input_names = [shape_input_name] ++ all_input_names.extend(input_names) ++ all_input_names.extend(min_names) ++ all_input_names.extend(max_names) ++ quantized_concat_node = create_node("QuantizedConcat", ++ quantized_concat_name, all_input_names) ++ set_attr_int(quantized_concat_node, "N", len(original_inputs)) ++ set_attr_dtype(quantized_concat_node, "T", dtypes.quint8) ++ self.add_output_graph_node(quantized_concat_node) ++ self.add_dequantize_result_node(quantized_concat_name, original_node.name) ++ ++ def eightbitize_concatv2_node(self, original_node): ++ """ ++ Args: ++ original_node: Float node to be converted. ++ Returns: ++ Subgraph representing the quantized version of the original node. ++ """ ++ namespace_prefix = original_node.name + "_eightbit" ++ quantized_concat_name = namespace_prefix + "_quantized_concatv2" ++ reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( ++ namespace_prefix, node_name_from_input(original_node.input[-1])) ++ num_input = len(original_node.input) ++ shape_input_name = original_node.input[num_input-1] ++ original_inputs = original_node.input[0:num_input-1] ++ input_names = [] ++ min_names = [] ++ max_names = [] ++ for original_input_name in original_inputs: ++ quantize_input_name, min_input_name, max_input_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_input_name, ++ reshape_dims_name, ++ reduction_dims_name)) ++ input_names.append(quantize_input_name) ++ min_names.append(min_input_name) ++ max_names.append(max_input_name) ++ all_input_names = input_names ++ all_input_names.append(shape_input_name) ++ all_input_names.extend(min_names) ++ all_input_names.extend(max_names) ++ quantized_concat_node = create_node("QuantizedConcatV2", ++ quantized_concat_name, all_input_names) ++ set_attr_int(quantized_concat_node, "N", len(original_inputs)) ++ set_attr_dtype(quantized_concat_node, "T", dtypes.quint8) ++ self.add_output_graph_node(quantized_concat_node) ++ self.add_dequantize_result_node(quantized_concat_name, original_node.name) ++ ++ def eightbitize_placeholder_node(self, current_node): ++ """Replaces a placeholder node with a quint8 placeholder node+dequantize.""" ++ name = current_node.name ++ ++ # Convert the placeholder into a quantized type. ++ output_node = node_def_pb2.NodeDef() ++ output_node.CopyFrom(current_node) ++ set_attr_dtype(output_node, "dtype", dtypes.quint8) ++ output_node.name += "_original_input" ++ self.add_output_graph_node(output_node) ++ ++ # Add a dequantize to convert back to float. ++ dequantize_node = create_node("Dequantize", name, [ ++ output_node.name, "quantized_input_min_value", ++ "quantized_input_max_value" ++ ]) ++ set_attr_dtype(dequantize_node, "T", dtypes.quint8) ++ set_attr_string(dequantize_node, "mode", b"MIN_FIRST") ++ self.add_output_graph_node(dequantize_node) ++ ++ # For the descent over the graph to work, the dequantize node must be named ++ # current_node.name. However, for the feeding of the graph to work, the ++ # placeholder must have the name current_node.name; so record a final set ++ # of renames to apply after all processing has been done. ++ self.final_node_renames[output_node.name] = name ++ self.final_node_renames[dequantize_node.name] = name + "_dequantize" ++ ++ def eightbitize_reshape_node(self, original_node): ++ """Replaces a Reshape node with the eight bit equivalent sub-graph. ++ Args: ++ original_node: Float node to be converted. ++ Returns: ++ Subgraph representing the quantized version of the original node. ++ """ ++ namespace_prefix = original_node.name + "_eightbit" ++ quantized_reshape_name = namespace_prefix + "_quantized_reshape" ++ reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( ++ namespace_prefix, node_name_from_input(original_node.input[0])) ++ shape_input_name = original_node.input[1] ++ quantize_input_name, min_input_name, max_input_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_node.input[0], ++ reshape_dims_name, reduction_dims_name)) ++ quantized_reshape_node = create_node( ++ "QuantizedReshape", quantized_reshape_name, ++ [quantize_input_name, shape_input_name, min_input_name, max_input_name]) ++ set_attr_dtype(quantized_reshape_node, "T", dtypes.quint8) ++ self.add_output_graph_node(quantized_reshape_node) ++ self.add_dequantize_result_node(quantized_reshape_name, original_node.name) ++ ++ def eightbitize_batch_norm_node(self, original_node): ++ """Replaces a MatMul node with the eight bit equivalent sub-graph.""" ++ namespace_prefix = original_node.name + "_eightbit" ++ original_input_name = original_node.input[0] ++ original_mean_name = original_node.input[1] ++ original_variance_name = original_node.input[2] ++ original_beta_name = original_node.input[3] ++ original_gamma_name = original_node.input[4] ++ quantized_batch_norm_name = namespace_prefix + "_quantized_batch_norm" ++ ++ reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes( ++ namespace_prefix, node_name_from_input(original_input_name)) ++ quantize_input_name, min_input_name, max_input_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_input_name, ++ reshape_dims_name, reduction_dims_name)) ++ quantize_mean_name, min_mean_name, max_mean_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_mean_name, ++ reshape_dims_name, reduction_dims_name)) ++ quantize_variance_name, min_variance_name, max_variance_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_variance_name, ++ reshape_dims_name, reduction_dims_name)) ++ quantize_beta_name, min_beta_name, max_beta_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_beta_name, ++ reshape_dims_name, reduction_dims_name)) ++ quantize_gamma_name, min_gamma_name, max_gamma_name = ( ++ self.eightbitize_input_to_node(namespace_prefix, original_gamma_name, ++ reshape_dims_name, reduction_dims_name)) ++ quantized_batch_norm_node = create_node( ++ "QuantizedBatchNormWithGlobalNormalization", quantized_batch_norm_name, ++ [ ++ quantize_input_name, min_input_name, max_input_name, ++ quantize_mean_name, min_mean_name, max_mean_name, ++ quantize_variance_name, min_variance_name, max_variance_name, ++ quantize_beta_name, min_beta_name, max_beta_name, ++ quantize_gamma_name, min_gamma_name, max_gamma_name ++ ]) ++ set_attr_dtype(quantized_batch_norm_node, "Tinput", dtypes.quint8) ++ set_attr_dtype(quantized_batch_norm_node, "out_type", dtypes.qint32) ++ copy_attr(quantized_batch_norm_node, "scale_after_normalization", ++ original_node.attr["scale_after_normalization"]) ++ copy_attr(quantized_batch_norm_node, "variance_epsilon", ++ original_node.attr["variance_epsilon"]) ++ self.add_output_graph_node(quantized_batch_norm_node) ++ quantize_down_name = self.add_quantize_down_nodes(original_node, ++ quantized_batch_norm_name) ++ self.add_dequantize_result_node(quantize_down_name, original_node.name) ++ ++ def add_output_graph_node(self, output_node): ++ """Inserts one node into the new graph.""" ++ self.output_graph.node.extend([output_node]) ++ ++ def remove_redundant_quantization(self, old_graph): ++ """Removes unneeded pairs of quantize/dequantize ops from the graph. ++ This is a bit of a tricky function, because it's attempting to spot the ++ pattern of dequantizing from eight-bit up to float, and then immediately ++ quantizing back down to eight bits again, that's introduced by previous ++ passes that do 'key-hole' conversions of individual nodes but have to ++ convert back to float to match the previous output interface, since they ++ don't know that the next op can handle quantized tensors. ++ It works by: ++ - Looking for Quantize nodes. ++ - Checking to see if their first input is a Dequantize node. ++ - Seeing if their min/max inputs come from Min/Max nodes. ++ - Making sure those Min/Max nodes are being fed from the same Dequantize. ++ - Or that the Min is indirectly being fed from the same Dequantize as Max. ++ - Making sure the Dequantize is going through a Reshape (which we add ++ during the previous pass when we create the quantize sub-graph). ++ - Looking for the dims Const op for the Min/Max dims. ++ If all of these conditions are met, then it's a sub-graph pattern that ++ we know how to optimize out (and is likely the common one we've introduced). ++ We then rewire the graph to skip it entirely, and then rely on the dead node ++ removal pass to get rid of any nodes that are no longer needed. ++ Args: ++ old_graph: The model we'll be stripping redundant nodes from. ++ Returns: ++ A graph with the unnecessary nodes removed. ++ Raises: ++ ValueError: Two nodes with the same name were found in the graph. ++ """ ++ old_nodes_map = self.create_nodes_map(old_graph) ++ self.output_graph = graph_pb2.GraphDef() ++ inputs_to_rename = {} ++ # We go through all the nodes, looking for any that match the patterns we ++ # know how to optimize away. ++ for node in old_graph.node: ++ # We always start with a Quantize node, and examine its inputs to see if ++ # they are in a form that can be removed. ++ if node.op not in ["Quantize", "QuantizeV2"]: ++ continue ++ dequantize_node_name = node_name_from_input(node.input[0]) ++ if dequantize_node_name not in old_nodes_map: ++ raise ValueError("Input node name '" + dequantize_node_name + ++ "' not found in node '" + node.name + "'") ++ dequantize_node = old_nodes_map[dequantize_node_name] ++ # Do we have a Dequantize feeding in, with the same type as the Quantize? ++ if dequantize_node.op != "Dequantize": ++ continue ++ if node.attr["T"] != dequantize_node.attr["T"]: ++ continue ++ # Now look at the other inputs, and ensure they're Min/Max nodes. ++ min_node_name = node_name_from_input(node.input[1]) ++ max_node_name = node_name_from_input(node.input[2]) ++ min_node = old_nodes_map[min_node_name] ++ max_node = old_nodes_map[max_node_name] ++ is_min_right_type = (min_node.op in ["Min", "Dequantize"]) ++ is_max_right_type = (max_node.op in ["Max", "Dequantize"]) ++ if not is_min_right_type or not is_max_right_type: ++ print("Didn't find expected types on inputs : %s, %s." % (min_node.op, ++ max_node.op)) ++ continue ++ min_node_input_name = node_name_from_input(min_node.input[0]) ++ max_node_input_name = node_name_from_input(max_node.input[0]) ++ # There are two different patterns for Min nodes we can recognize, one ++ # where the input comes directly from the same one as the Max, and ++ # another where we run it through another Min first, so check for both. ++ is_same_input = False ++ if min_node_input_name == max_node_input_name: ++ is_same_input = True ++ else: ++ first_min_node_input = old_nodes_map[min_node_input_name] ++ if first_min_node_input.op == "Concat": ++ second_min_node_name = node_name_from_input( ++ first_min_node_input.input[1]) ++ second_min_node = old_nodes_map[second_min_node_name] ++ if second_min_node.op == "Min": ++ second_min_node_input_name = node_name_from_input( ++ second_min_node.input[0]) ++ is_same_input = (second_min_node_input_name == max_node_input_name) ++ if not is_same_input: ++ print("Different min/max inputs: " + min_node_input_name) ++ continue ++ # We recognize this pattern, so mark the graph edges to be rewired to ++ # route around it entirely, since we know it's a no-op. ++ dequantize_source_name = node_name_from_input(dequantize_node.input[0]) ++ node_tensor_name = ensure_tensor_name_has_port(node.name) ++ min_tensor_name = node.name + ":1" ++ max_tensor_name = node.name + ":2" ++ inputs_to_rename[node_tensor_name] = dequantize_source_name ++ inputs_to_rename[min_tensor_name] = dequantize_node.input[1] ++ inputs_to_rename[max_tensor_name] = dequantize_node.input[2] ++ # Finally we apply all the rewiring we've marked to the graph. ++ for node in old_graph.node: ++ for index, input_full_name in enumerate(node.input): ++ input_name = ensure_tensor_name_has_port(input_full_name) ++ if input_name in inputs_to_rename: ++ node.input[index] = inputs_to_rename[input_name] ++ self.add_output_graph_node(node) ++ return self.output_graph ++ ++ def apply_final_node_renames(self): ++ """Applies node renames in self.final_node_renames to self.output_graph.""" ++ old_graph = self.output_graph ++ self.output_graph = graph_pb2.GraphDef() ++ for node in old_graph.node: ++ node.name = self.final_node_renames.get(node.name, node.name) ++ for index, input_name in enumerate(node.input): ++ node_name = node_name_from_input(input_name) ++ input_full_name = ensure_tensor_name_has_port(input_name) ++ if node_name in self.final_node_renames: ++ node.input[index] = "%s%s" % (self.final_node_renames[node_name], ++ input_full_name[len(node_name):]) ++ self.add_output_graph_node(node) ++ return self.output_graph ++ ++ def remove_dead_nodes(self, output_names): ++ """Removes nodes that are no longer needed for inference from the graph.""" ++ old_output_graph = self.output_graph ++ self.output_graph = graph_util.extract_sub_graph(old_output_graph, ++ output_names) ++ ++ def quantize_weights(self, input_graph, quantization_mode): ++ """Quantize float Const ops. ++ There are two modes of operations, both replace float Const ops with ++ quantized values. ++ 1. If quantization_mode is "weights_rounded", this function replaces float ++ Const ops with quantized float Const ops - same as the original op, but ++ float values being mapped to the center of one of 1< max_games: + games = games[-max_games:] + +- with mp.Pool(threads) as pool: ++ with mp.pool.ThreadPool(threads) as pool: + res = tqdm(pool.imap(self.func, games), total=len(games)) + self.examples.extend(itertools.chain.from_iterable(res)) + print("Got", len(self.examples), "examples") ++ return len(self.examples) + + def update(self, new_games): + """ new_games is a list of .tfrecord.zz new game records. """ +@@ -126,6 +131,32 @@ class ExampleBuffer(): + self.examples.clear() + self.examples = deque(maxlen=self.max_size) + ++ def flush_new(self, path, example_num, num_out = 1, threads = 8): ++ # random.shuffle on deque is O(n^2) convert to list for O(n) ++ self.examples = list(self.examples) ++ example_list = [ex[1] for ex in self.examples] ++ length = example_num // num_out ++ example_list = example_list[:length*num_out] ++ ++ i_list = [] ++ for i in range(num_out): ++ i_list.append((i, example_list[i*length:(i+1)*length])) ++ ++ with timer("Writing examples to " + path): ++ with mp.pool.ThreadPool(threads) as pool: ++ pool.starmap(parallel_shuffle, i_list) ++ ++ i_list = [] ++ for i in range(num_out): ++ i_list.append((path+'_'+str(i), example_list[i*length:(i+1)*length], False)) ++ ++ with timer("Writing examples to " + path): ++ with mp.pool.ThreadPool(num_out) as pool: ++ pool.starmap(preprocessing.write_tf_examples, i_list) ++ ++ self.examples.clear() ++ self.examples = deque(maxlen=self.max_size) ++ + @property + def count(self): + return len(self.examples) +diff --git a/rl_loop/fsdb.py b/rl_loop/fsdb.py +index ab9d107..442692c 100644 +--- a/rl_loop/fsdb.py ++++ b/rl_loop/fsdb.py +@@ -62,6 +62,7 @@ models_dir = _with_base('models') + selfplay_dir = _with_base('data', 'selfplay') + holdout_dir = _with_base('data', 'holdout') + sgf_dir = _with_base('sgf') ++mpi_log_dir = _with_base('mpi') + eval_dir = _with_base('sgf', 'eval') + golden_chunk_dir = _with_base('data', 'golden_chunks') + flags_path = _with_base('flags.txt') +diff --git a/run.sh b/run.sh +new file mode 100755 +index 0000000..7cc74e7 +--- /dev/null ++++ b/run.sh +@@ -0,0 +1,24 @@ ++#!/bin/bash ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++echo Physical cores = $PHY_CORES ++echo Virtual cores = $VIRT_CORES ++echo NUMA cores = $NUMA_CORES ++ ++export KMP_HW_SUBSET=2T ++echo KMP_HW_SUBSET = $KMP_HW_SUBSET ++ ++output_dir=${SCRATCH:-$(pwd)} ++echo Output to ${output_dir} ++ ++export KMP_BLOCKTIME=1 ++export KMP_AFFINITY=compact,granularity=fine ++export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/cc/tensorflow ++ulimit -u 760000 ++ ++export PYTHONPATH=$(pwd)/ml_perf/tools/tensorflow_quantization/quantization:$PYTHONPATH ++ ++./run_minigo.sh ${output_dir}/results/$(hostname) ml_perf/flags/9 $1 +diff --git a/run_minigo.sh b/run_minigo.sh +new file mode 100755 +index 0000000..d319d2e +--- /dev/null ++++ b/run_minigo.sh +@@ -0,0 +1,34 @@ ++#!/bin/bash ++BASE_DIR=$1 ++FLAG_DIR=$2 ++ ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$3 \ ++ --train_node=localhost \ ++ --setup_train_workers=True &> train_workers.log & ++ ++# Run training loop ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$3 \ ++ --train_node=localhost ++ ++# Once the training loop has finished, run model evaluation to find the ++# first trained model that's better than the target ++BOARD_SIZE=9 python3 ml_perf/eval_models.py \ ++ --base_dir=$BASE_DIR \ ++ --flags_dir=$FLAG_DIR +diff --git a/run_minigo_mn.sh b/run_minigo_mn.sh +new file mode 100755 +index 0000000..06e0633 +--- /dev/null ++++ b/run_minigo_mn.sh +@@ -0,0 +1,51 @@ ++#!/bin/bash ++BASE_DIR=$1 ++FLAG_DIR=$2 ++ ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++NUM_NODES=`ml_perf/hostlist.sh|wc -l` ++TRAIN_NODES=$3 ++EVAL_NODES=$4 ++PLAY_NODES=$(expr $NUM_NODES - $TRAIN_NODES - $EVAL_NODES - 1) ++#EVAL_NODES=$PLAY_NODES ++TRAIN_PLUS_EVAL_NODES=$(expr $TRAIN_NODES + $EVAL_NODES) ++PLAY_NODES_PLUS_ONE=$(expr $PLAY_NODES + 1) ++echo train nodes $TRAIN_NODES ++echo eval nodes $EVAL_NODES ++echo play nodes $PLAY_NODES ++ ++echo "BOARD_SIZE=9 python3 ml_perf/reference_implementation.py --setup_train_workers=True &> train_workers.log &" ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$5 \ ++ `ml_perf/hostlist.sh |head -n $PLAY_NODES_PLUS_ONE|tail -n $PLAY_NODES|awk '/./{print "--selfplay_node="$0}'` \ ++ `ml_perf/hostlist.sh |tail -n $TRAIN_NODES|awk '/./{print "--train_node="$0}'` \ ++ `ml_perf/hostlist.sh |tail -n $TRAIN_PLUS_EVAL_NODES|head -n $EVAL_NODES |awk '/./{print "--eval_node="$0}'` \ ++ --setup_train_workers=True &> train_workers.log & ++ ++echo "BOARD_SIZE=9 python3 ml_perf/reference_implementation.py" ++# Run training loop ++BOARD_SIZE=9 python3 ml_perf/reference_implementation.py \ ++ --base_dir=$BASE_DIR \ ++ --flagfile=$FLAG_DIR/rl_loop.flags \ ++ --physical_cores=$PHY_CORES \ ++ --virtual_cores=$VIRT_CORES \ ++ --numa_cores=$NUMA_CORES \ ++ --quantization=$5 \ ++ `ml_perf/hostlist.sh |head -n $PLAY_NODES_PLUS_ONE|tail -n $PLAY_NODES|awk '/./{print "--selfplay_node="$0}'` \ ++ `ml_perf/hostlist.sh |tail -n $TRAIN_NODES|awk '/./{print "--train_node="$0}'` \ ++ `ml_perf/hostlist.sh |tail -n $TRAIN_PLUS_EVAL_NODES|head -n $EVAL_NODES |awk '/./{print "--eval_node="$0}'` ++ ++# Once the training loop has finished, run model evaluation to find the ++# first trained model that's better than the target ++BOARD_SIZE=9 python3 ml_perf/eval_models.py \ ++ --base_dir=$BASE_DIR \ ++ --flags_dir=$FLAG_DIR +diff --git a/run_mn.sh b/run_mn.sh +new file mode 100755 +index 0000000..2628118 +--- /dev/null ++++ b/run_mn.sh +@@ -0,0 +1,22 @@ ++NUMA_COUNT=`cat /proc/cpuinfo |grep physical\ id|sort -u |wc -l` ++VIRT_CORES=`cat /proc/cpuinfo |grep physical\ id|wc -l` ++NUMA_CORES=`cat /proc/cpuinfo |grep cpu\ cores|head -n 1|awk '//{print $4}'` ++PHY_CORES=$(expr $NUMA_CORES \* $NUMA_COUNT) ++ ++echo Physical cores = $PHY_CORES ++echo Virtual cores = $VIRT_CORES ++echo NUMA cores = $NUMA_CORES ++ ++export KMP_HW_SUBSET=2T ++echo KMP_HW_SUBSET = $KMP_HW_SUBSET ++ ++output_dir=${SCRATCH:-$(pwd)} ++echo Output to ${output_dir} ++ ++export KMP_BLOCKTIME=1 ++export KMP_AFFINITY=compact,granularity=fine ++export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/cc/tensorflow ++export PYTHONPATH=$(pwd):$(pwd)/ml_perf/tools/tensorflow_quantization/quantization:$PYTHONPATH ++ulimit -u 760000 ++ ++./run_minigo_mn.sh ${output_dir}/results/$(hostname) ml_perf/flags/9.mn $1 $2 $3 +diff --git a/set_avx2_build b/set_avx2_build +new file mode 100644 +index 0000000..b60a540 +--- /dev/null ++++ b/set_avx2_build +@@ -0,0 +1,61 @@ ++#This file exports the bazel build opts for AVX2 platforms (broadwell and haswell). By setting -march=haswell and -mtune=broadwell, the binary will run on systems haswell and newer, but will be tuned for broadwell. ++ ++MIN_GCC_MAJOR_VERSION=5 ++MIN_GCC_MINOR_VERSION=3 ++MIN_GCC_REVISION=0 ++GCC_VERSION_STR=$(gcc -dumpversion) ++echo "GCC Version: ${GCC_VERSION_STR}" ++IFS='.' read -r -a GCC_VERSION <<< ${GCC_VERSION_STR} ++ ++if [ "${GCC_VERSION[0]}" -lt "${MIN_GCC_MAJOR_VERSION}" ] ; ++then ++ echo "Your MAJOR version of GCC is too old: ${GCC_VERSION_STR}; it must be at least ${MIN_GCC_MAJOR_VERSION}.${MIN_GCC_MINOR_VERSION}.${MIN_GCC_REVISION}" ++ return 1 ++ ++elif [ "${GCC_VERSION[0]}" -eq "${MIN_GCC_MAJOR_VERSION}" ] ; ++then ++ if [ "${GCC_VERSION[1]}" -lt "${MIN_GCC_MINOR_VERSION}" ] ; ++ then ++ echo "Your MINOR version of GCC is too old: ${GCC_VERSION_STR}; it must be at least ${MIN_GCC_MAJOR_VERSION}.${MIN_GCC_MINOR_VERSION}." ++ return 1 ++ fi ++fi ++ ++echo "GCC ${GCC_VERSION_STR}: OK" ++ ++#Don't use the C++11 ABI; use the old one ++#These two options should be equivalent to all the options commented out below ++BAZEL_BUILD_OPTS_BASIC="--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 \ ++ --copt=-march=haswell \ ++ --copt=-mtune=broadwell \ ++ --copt=-O3" ++BAZEL_SECURE_BUILD_OPTS="--copt=-Wformat \ ++ --copt=-Wformat-security \ ++ --copt=-fstack-protector \ ++ --copt=-fPIC \ ++ --copt=-fpic \ ++ --linkopt=-znoexecstack \ ++ --linkopt=-zrelro \ ++ --linkopt=-znow \ ++ --linkopt=-fstack-protector \ ++ --linkopt=-pie" ++ ++#basic build flags ++echo "exporting BAZEL_BUILD_OPTS_BASIC=${BAZEL_BUILD_OPTS_BASIC}" ++export BAZEL_BUILD_OPTS_BASIC="${BAZEL_BUILD_OPTS_BASIC}" ++ ++#secure build flags ++BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS_BASIC} ${BAZEL_SECURE_BUILD_OPTS}" ++echo "exporting BAZEL_BUILD_OPTS=${BAZEL_BUILD_OPTS}" ++export BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS}" ++ ++#basic mkl flags ++BAZEL_MKL_BUILD_OPTS_BASIC="--config=mkl ${BAZEL_BUILD_OPTS_BASIC}" ++echo "exporting BAZEL_MKL_BUILD_OPTS_BASIC=${BAZEL_MKL_BUILD_OPTS_BASIC}" ++export BAZEL_MKL_BUILD_OPTS_BASIC="${BAZEL_MKL_BUILD_OPTS_BASIC}" ++ ++#secure mkl flags ++BAZEL_SECURE_MKL_BUILD_OPTS="--config=mkl ${BAZEL_BUILD_OPTS}" ++echo "exporting BAZEL_SECURE_MKL_BUILD_OPTS=${BAZEL_SECURE_MKL_BUILD_OPTS}" ++export BAZEL_SECURE_MKL_BUILD_OPTS="${BAZEL_SECURE_MKL_BUILD_OPTS}" ++ +diff --git a/testing/bootstrap_v2.sh b/testing/bootstrap_v2.sh +old mode 100644 +new mode 100755 +diff --git a/train.py b/train.py +index d6b24bc..3784d14 100644 +--- a/train.py ++++ b/train.py +@@ -19,6 +19,7 @@ Usage: + """ + + import logging ++import os + + from absl import app, flags + import numpy as np +@@ -28,6 +29,13 @@ import bigtable_input + import dual_net + import preprocessing + import utils ++import time ++ ++import ml_perf.mlp_log as mll ++import horovod.tensorflow as hvd ++import os ++from mpi4py import MPI ++import socket + + # See www.moderndescartes.com/essays/shuffle_viz for discussion on sizing + flags.DEFINE_integer('shuffle_buffer_size', 2000, +@@ -47,6 +55,9 @@ flags.DEFINE_float('filter_amount', 1.0, + flags.DEFINE_string('export_path', None, + 'Where to export the model after training.') + ++flags.DEFINE_string('data_path', None, ++ 'Where to get the data for training.') ++ + flags.DEFINE_bool('use_bt', False, + 'Whether to use Bigtable as input. ' + '(Only supported with --use_tpu, currently.)') +@@ -54,6 +65,16 @@ flags.DEFINE_bool('use_bt', False, + flags.DEFINE_bool('freeze', False, + 'Whether to freeze the graph at the end of training.') + ++flags.DEFINE_string('host_addr', None, ++ 'host address.') ++ ++flags.DEFINE_bool('quantization', True, 'Using Int8 if true.') ++ ++flags.DEFINE_bool('eval_min_max_every_epoch', True, 'Genereting min max log every epoch if true.') ++ ++flags.DEFINE_boolean('random_rotation', True, 'Do random rotation when running for min&max log.') ++flags.DEFINE_integer('quantize_test_steps', 5, 'The steps to run for min&max log.') ++flags.DEFINE_integer('quantize_test_batch_size', 16, 'The batch size for running inference for min&max log.') + + flags.register_multi_flags_validator( + ['use_bt', 'use_tpu'], +@@ -77,6 +98,8 @@ flags.declare_key_flag('work_dir') + flags.declare_key_flag('train_batch_size') + flags.declare_key_flag('num_tpu_cores') + flags.declare_key_flag('use_tpu') ++flags.declare_key_flag('dist_train') ++flags.declare_key_flag('training_seed') + + FLAGS = flags.FLAGS + +@@ -145,6 +168,8 @@ def train(*tf_records: "Records to train on"): + estimator = dual_net.get_estimator() + + effective_batch_size = FLAGS.train_batch_size ++ if FLAGS.dist_train: ++ effective_batch_size = int(FLAGS.train_batch_size/hvd.size()) + if FLAGS.use_tpu: + effective_batch_size *= FLAGS.num_tpu_cores + +@@ -172,14 +197,17 @@ def train(*tf_records: "Records to train on"): + else: + def _input_fn(): + return preprocessing.get_input_tensors( +- FLAGS.train_batch_size, ++ effective_batch_size, + tf_records, + filter_amount=FLAGS.filter_amount, + shuffle_buffer_size=FLAGS.shuffle_buffer_size, +- random_rotation=True) ++ random_rotation=True, seed=FLAGS.training_seed, ++ dist_train=FLAGS.dist_train) + + hooks = [UpdateRatioSessionHook(FLAGS.work_dir), + EchoStepCounterHook(output_dir=FLAGS.work_dir)] ++ if FLAGS.dist_train: ++ hooks.append(hvd.BroadcastGlobalVariablesHook(0)) + + steps = FLAGS.steps_to_train + logging.info("Training, steps = %s, batch = %s -> %s examples", +@@ -206,22 +234,96 @@ def train(*tf_records: "Records to train on"): + games.require_fresh_games(0) + raise + ++def get_golden_chunk_records(base): ++ pattern = os.path.join(base, '*.zz*') ++ window_size = FLAGS.window_size ++ return sorted(tf.gfile.Glob(pattern), reverse=True)[:window_size] ++ ++def init_socket(): ++ address = (FLAGS.host_addr, 52175) ++ server = socket.socket(socket.AF_INET,socket.SOCK_STREAM) ++ server.bind(address) ++ server.listen(1) ++ return server ++ + + def main(argv): + """Train on examples and export the updated model weights.""" +- tf_records = argv[1:] +- logging.info("Training on %s records: %s to %s", +- len(tf_records), tf_records[0], tf_records[-1]) +- with utils.logged_timer("Training"): +- train(*tf_records) +- if FLAGS.export_path: +- dual_net.export_model(FLAGS.export_path) +- if FLAGS.freeze: +- if FLAGS.use_tpu: +- dual_net.freeze_graph_tpu(FLAGS.export_path) +- else: +- dual_net.freeze_graph(FLAGS.export_path) +- ++ socket.setdefaulttimeout(99999999) ++ if FLAGS.dist_train: ++ comm_all = MPI.COMM_WORLD ++ mpi_rank = comm_all.Get_rank() ++ mpi_size = comm_all.Get_size() ++ hvd.init(comm_all) ++ if(mpi_rank==0): ++ server = init_socket() ++ comm_all.barrier() ++ ++ print(FLAGS.host_addr) ++ ++ mll.global_batch_size(FLAGS.train_batch_size) ++ mll.lr_rates(FLAGS.lr_rates) ++ mll.lr_boundaries(FLAGS.lr_boundaries) ++ effective_batch_size = FLAGS.train_batch_size ++ if FLAGS.dist_train: ++ effective_batch_size = int(FLAGS.train_batch_size/hvd.size()) ++ ++ tf_records_ph = tf.placeholder(tf.string) ++ data_iter = preprocessing.get_input_tensors_new( ++ effective_batch_size, ++ tf_records_ph, ++ filter_amount=FLAGS.filter_amount, ++ shuffle_buffer_size=FLAGS.shuffle_buffer_size, ++ random_rotation=True, seed=FLAGS.training_seed, ++ dist_train=FLAGS.dist_train) ++ features, labels = data_iter.get_next() ++ train_op = dual_net.model_fn_new(features, labels, tf.estimator.ModeKeys.TRAIN, FLAGS.flag_values_dict()) ++ session_config = tf.ConfigProto( ++ intra_op_parallelism_threads=FLAGS.num_intra_threads, ++ inter_op_parallelism_threads=FLAGS.num_inter_threads) ++ session_config.gpu_options.allow_growth = True ++ sess = tf.Session(config=session_config) ++ tf.train.Saver().restore(sess, 'ml_perf/checkpoint/9/work_dir/model.ckpt-9383') ++ ++ i = -1 ++ while True: ++ # i start from 0 in the loop ++ i += 1 ++ if(mpi_rank==0): ++ print('waiting for client...') ++ reception,addr = server.accept() ++ export_path = reception.recv(1024).decode(); ++ if export_path == 'stop training': ++ break ++ comm_all.barrier() ++ tf_records = get_golden_chunk_records(FLAGS.data_path) ++ print("Training on {} records:".format(len(tf_records))) ++ for record in tf_records: ++ print(" {}".format(record)) ++ start = time.time() ++ sess.run(data_iter.initializer, {tf_records_ph: tf_records}) ++ step = 0 ++ while True: ++ try: ++ step_start = time.time() ++ sess.run(train_op) ++ step = step+1 ++ print ('step {} -- step/sec {}'.format(step, 1/(time.time()-step_start))) ++ except tf.errors.OutOfRangeError: ++ break ++ comm_all.barrier() ++ if hvd.rank() == 0: ++ tf.train.Saver().save(sess, export_path) ++ dual_net.optimize_graph(export_path + '.pb', export_path, FLAGS.quantization, FLAGS.data_path+'/*.zz*', FLAGS.eval_min_max_every_epoch, True) ++ finish = time.time() ++ if(mpi_rank==0): ++ reception.send('finish'.encode()); ++ reception.close() ++ print ('run ', i, ': {:.3f}'.format(finish-start)) ++ ++ if(mpi_rank==0): ++ reception.send('finish'.encode()); ++ server.close() + + if __name__ == "__main__": + app.run(main) diff --git a/models/reinforcement/tensorflow/minigo/training/fp32/mlperf_split.patch b/models/reinforcement/tensorflow/minigo/training/fp32/mlperf_split.patch new file mode 100644 index 000000000..e5b70584a --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/fp32/mlperf_split.patch @@ -0,0 +1,510 @@ +diff --git a/ml_perf/divide_golden_chunk.py b/ml_perf/divide_golden_chunk.py +new file mode 100644 +index 0000000..364da8d +--- /dev/null ++++ b/ml_perf/divide_golden_chunk.py +@@ -0,0 +1,63 @@ ++# Here need some words ++ ++import os ++import shutil ++import random ++import functools ++ ++import numpy as np ++import tensorflow as tf ++import threading ++ ++from mpi4py import MPI ++from absl import app, flags ++from rl_loop import example_buffer ++ ++flags.DEFINE_string('read_path', '/tmp/minigo', ++ 'Path to the read origin data.') ++ ++flags.DEFINE_string('write_path', '/tmp/minigo/output', ++ 'Path to the read origin data.') ++ ++flags.DEFINE_integer('out_files_number', 2, ++ 'Num of files to produce.') ++ ++flags.DEFINE_integer('physical_cores', 56, ++ 'Num of cores.') ++ ++flags.DEFINE_integer('seed', 0, ++ 'Random seed.') ++ ++FLAGS = flags.FLAGS ++ ++ ++def main(unused_argv): ++ mpi_comm = MPI.COMM_WORLD ++ mpi_rank = mpi_comm.Get_rank() ++ mpi_size = mpi_comm.Get_size() ++ # avoid seed out of range ++ random.seed(FLAGS.seed % 1048576) ++ tf.set_random_seed(FLAGS.seed % 1048576) ++ np.random.seed(FLAGS.seed % 1048576) ++ ++ pattern = os.path.join(FLAGS.read_path, '*.zz') ++ files = tf.gfile.Glob(pattern) ++ ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ example_num = buffer.parallel_fill(files, threads=FLAGS.physical_cores) ++ ++ # make sure all nodes generate same number of examples ++ example_num = int(mpi_comm.allreduce(example_num, op=MPI.MIN)) ++ ++ buffer.flush_new(FLAGS.write_path+'_{}'.format(mpi_rank), example_num, FLAGS.out_files_number, threads=1) ++ ++ shutil.rmtree('/tmp/minigo/home', ignore_errors=True) ++ ++if __name__ == '__main__': ++ app.run(main) ++ ++ ++ ++ ++ ++ +diff --git a/ml_perf/reference_implementation.py b/ml_perf/reference_implementation.py +index e04d873..1649a0a 100644 +--- a/ml_perf/reference_implementation.py ++++ b/ml_perf/reference_implementation.py +@@ -34,6 +34,8 @@ import multiprocessing as mp + from ml_perf.utils import * + import ml_perf.mlp_log as mll + ++from fractions import gcd ++ + from absl import app, flags + from rl_loop import example_buffer, fsdb + import dual_net +@@ -64,7 +66,7 @@ flags.DEFINE_string('flags_dir', None, + + flags.DEFINE_integer('window_size', 10, + 'Maximum number of recent selfplay rounds to train on.') +-flags.DEFINE_integer('golden_chunk_split', 16, ++flags.DEFINE_integer('golden_chunk_split', 2, + 'Golden chunk of each selfplay is splited to accelerate write golden chunk') + + flags.DEFINE_integer('parallel_post_train', 0, +@@ -164,7 +166,7 @@ class WinStats: + self.white_wins = ColorWinStats(*raw_stats[4:]) + self.total_wins = self.black_wins.total + self.white_wins.total + +-def initialize_from_checkpoint(state): ++def initialize_from_checkpoint(state, out_files_number): + """Initialize the reinforcement learning loop from a checkpoint.""" + # The checkpoint's work_dir should contain the most recently trained model. + model_paths = glob.glob(os.path.join(FLAGS.checkpoint_dir, +@@ -174,18 +176,20 @@ def initialize_from_checkpoint(state): + 'got [{}]'.format(', '.join(model_paths))) + start_model_path = model_paths[0] + +- # Copy the training chunks. + golden_chunks_dir = os.path.join(FLAGS.checkpoint_dir, 'golden_chunks') + for basename in os.listdir(golden_chunks_dir): + path = os.path.join(golden_chunks_dir, basename) +- shutil.copy(path, fsdb.golden_chunk_dir()) ++ out_path = os.path.join(fsdb.golden_chunk_dir(), basename) ++ buffer = example_buffer.ExampleBuffer(sampling_frac=1.0) ++ example_num = buffer.parallel_fill(tf.gfile.Glob(path),FLAGS.physical_cores) ++ buffer.flush_new(out_path, example_num, out_files_number, 1)# FLAGS.physical_cores) + + # Copy the latest trained model into the models directory and use it on the + # first round of selfplay. + state.best_model_name = 'checkpoint' + best_model_path = os.path.join(fsdb.models_dir(), state.best_model_name) + +- dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) ++ dual_net.optimize_graph(start_model_path, best_model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz*', FLAGS.eval_min_max_every_epoch) + + # Copy the training files. + work_dir = os.path.join(FLAGS.checkpoint_dir, 'work_dir') +@@ -194,7 +198,6 @@ def initialize_from_checkpoint(state): + shutil.copy(path, fsdb.working_dir()) + + +- + def parse_win_stats_table(stats_str, num_lines): + result = [] + lines = stats_str.split('\n') +@@ -322,9 +325,7 @@ def get_golden_chunk_records(window_size): + A list of golden chunks up to num_records in length, sorted by path. + """ + +- pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz') +- if window_size > FLAGS.golden_chunk_split * FLAGS.window_size: +- window_size = FLAGS.golden_chunk_split * FLAGS.window_size ++ pattern = os.path.join(fsdb.golden_chunk_dir(), '*.zz*') + return sorted(tf.gfile.Glob(pattern), reverse=True)[:window_size] + + +@@ -343,9 +344,9 @@ async def selfplay(state, flagfile='selfplay'): + flagfile: the name of the flagfile to use for selfplay, either 'selfplay' + (the default) or 'boostrap'. + """ +- + output_dir = os.path.join(fsdb.selfplay_dir(), state.output_model_name) + holdout_dir = os.path.join(fsdb.holdout_dir(), state.output_model_name) ++ output_dir = '/tmp/minigo' + output_dir + + multi_instance, num_instance, flag_list = extract_multi_instance( + ['--flagfile={}_mi.flags'.format(os.path.join(FLAGS.flags_dir, flagfile))]) +@@ -391,43 +392,34 @@ async def selfplay(state, flagfile='selfplay'): + + with logged_timer('generate golden chunk'): + # Write examples to a single record. +- pattern = os.path.join(output_dir, '*', '*.zz') +- files = tf.gfile.Glob(pattern) +- +- random.seed(state.seed) +- tf.set_random_seed(state.seed) +- np.random.seed(state.seed) +- +- # TODO(tommadams): This method of generating one golden chunk per generation +- # is sub-optimal because each chunk gets reused multiple times for training, +- # introducing bias. Instead, a fresh dataset should be uniformly sampled out +- # of *all* games in the training window before the start of each training run. +- +- # TODO(tommadams): parallel_fill is currently non-deterministic. Make it not +- # so. +- logging.info('Writing golden chunk from "{}"'.format(pattern)) +- threads = FLAGS.golden_chunk_split +- file_list = [] +- files_number = len(files) +- chunk_size = files_number // threads +- +- # split files into N seperate parts +- for i in range(threads): +- if i == threads - 1: +- file_list += [[i, files[chunk_size * i :]]] +- else: +- file_list += [[i, files[chunk_size * i : chunk_size * (i + 1)]]] +- pool = mp.Pool(threads) +- pool.map(functools.partial(gen_golden_chunk, state=state), file_list) ++ hosts = FLAGS.selfplay_node ++ if hosts == []: ++ hosts = ['localhost'] ++ num_instance = len(hosts) ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ train_instance_num = FLAGS.train_instance_per_numa * len(FLAGS.train_node) * numa_per_node ++ selfplay_node_num = len(hosts) ++ selfplay_num = selfplay_node_num ++ #out_files_number = (train_instance_num*selfplay_num/gcd(train_instance_num, selfplay_num))/selfplay_num ++ out_files_number = int(train_instance_num/gcd(train_instance_num, selfplay_num)) ++ ++ cmd = ['python3', 'ml_perf/divide_golden_chunk.py', ++ '--read_path={}'.format(output_dir + "/*"), ++ '--write_path={}'.format(os.path.join(fsdb.golden_chunk_dir(), state.output_model_name + '.tfrecord.zz')), ++ '--out_files_number={}'.format(out_files_number), ++ '--physical_cores={}'.format(FLAGS.physical_cores), ++ '--base_dir={}'.format(FLAGS.base_dir)] ++ lines = await run_distributed([], 1, hosts, None, None, state.seed, *cmd) ++ ++ print(lines) + + return bias + +-async def train(state, tf_records): ++async def train(state, window_size): + """Run training and write a new model to the fsdb models_dir. + + Args: + state: the RL loop State instance. +- tf_records: a list of paths to TensorFlow records to train on. + """ + train_node = FLAGS.train_node + num_node = len(train_node) +@@ -451,10 +443,12 @@ async def train(state, tf_records): + intra_threads = FLAGS.physical_cores + + model_path = os.path.join(fsdb.models_dir(), state.train_model_name) +- cmd = ['python3', 'train.py', *tf_records, ++ cmd = ['python3', 'train.py', + '--flagfile={}'.format(os.path.join(FLAGS.flags_dir, 'train.flags')), + '--work_dir={}'.format(fsdb.working_dir()), + '--export_path={}'.format(model_path), ++ '--window_size={}'.format(window_size), ++ '--data_path={}'.format(fsdb.golden_chunk_dir()), + '--training_seed={}'.format(state.seed), + '--freeze=True', + '--num_inter_threads=1', +@@ -486,7 +480,7 @@ async def train(state, tf_records): + + def post_train(state): + model_path = os.path.join(fsdb.models_dir(), state.train_model_name) +- dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz', FLAGS.eval_min_max_every_epoch) ++ dual_net.optimize_graph(model_path + '.pb', model_path, FLAGS.quantization, fsdb.golden_chunk_dir()+'/*.zz*', FLAGS.eval_min_max_every_epoch) + mll.save_model(state.iter_num-1) + + # Append the time elapsed from when the RL was started to when this model +@@ -603,27 +597,38 @@ def rl_loop(): + # chunk left. Until it reach FLAGS.window_size * FLAGS.golden_chunk_split + + window_size = 0 +- big_chunk_remaining = 0 + + state = State() ++ numa_per_node = FLAGS.physical_cores // FLAGS.numa_cores ++ train_instance_num = FLAGS.train_instance_per_numa * len(FLAGS.train_node) * numa_per_node ++ selfplay_node_num = max(len(FLAGS.selfplay_node), 1) ++ selfplay_num = selfplay_node_num ++ out_files_number = int(train_instance_num/gcd(train_instance_num, selfplay_num)*selfplay_node_num) ++ FLAGS.golden_chunk_split = out_files_number ++ ++ window_size = out_files_number * FLAGS.window_size + + if FLAGS.checkpoint_dir != None: + # Start from a partially trained model. +- initialize_from_checkpoint(state) +- window_size = len(get_golden_chunk_records(FLAGS.window_size)) +- big_chunk_remaining = window_size ++ initialize_from_checkpoint(state, out_files_number) ++ window_size = len(get_golden_chunk_records(window_size)) ++ mll.init_stop() ++ mll.run_start() ++ state.start_time = time.time() + else: + # Play the first round of selfplay games with a fake model that returns + # random noise. We do this instead of playing multiple games using a single + # model bootstrapped with random noise to avoid any initial bias. ++ mll.init_stop() ++ mll.run_start() ++ state.start_time = time.time() + mll.epoch_start(state.iter_num) + wait(selfplay(state, 'bootstrap')) + window_size += FLAGS.golden_chunk_split + + # Train a real model from the random selfplay games. +- tf_records = get_golden_chunk_records(window_size) + state.iter_num += 1 +- wait(train(state, tf_records)) ++ wait(train(state, window_size)) + post_train(state) + + # Select the newly trained model as the best. +@@ -647,12 +652,9 @@ def rl_loop(): + holdout_glob = os.path.join(fsdb.holdout_dir(), '%06d-*' % state.iter_num, + '*') + +- # Train on shuffled game data from recent selfplay rounds. +- tf_records = get_golden_chunk_records(window_size) +- + if FLAGS.parallel_post_train == 0: + state.iter_num += 1 +- wait(train(state, tf_records)) ++ wait(train(state, window_size)) + post_train(state) + # Run eval, validation & selfplay sequentially. + wait(selfplay(state)) +@@ -667,7 +669,7 @@ def rl_loop(): + + if FLAGS.parallel_post_train == 1: + state.iter_num += 1 +- wait([train(state, tf_records), ++ wait([train(state, window_size), + selfplay(state)]) + post_train(state) + # Run eval, validation & selfplay in parallel. +@@ -694,7 +696,7 @@ def rl_loop(): + # | start selfplay[iter] + # | wait selfplay + # wait train +- train_handle = asyncio.gather(train(state, tf_records), return_exceptions=True) ++ train_handle = asyncio.gather(train(state, window_size), return_exceptions=True) + if not first_iter: + post_train(state_copy) + model_win_rate = wait(evaluate_trained_model(state_copy)) +@@ -730,12 +732,6 @@ def rl_loop(): + train_model_name_after.join(model.rsplit(train_model_name_before, 1)))) + shutil.copy(model, train_model_name_after.join(model.rsplit(train_model_name_before, 1))) + +- if big_chunk_remaining > 0: +- window_size += FLAGS.golden_chunk_split - 1 +- big_chunk_remaining -= 1 +- else: +- window_size += FLAGS.golden_chunk_split +- + # after the main loop, if parallel_post_train = 2 + # needs to print epoch_stop for last epoch + if FLAGS.parallel_post_train == 2: +@@ -775,8 +771,6 @@ def main(unused_argv): + + with logged_timer('Total time'): + try: +- mll.init_stop() +- mll.run_start() + rl_loop() + finally: + asyncio.get_event_loop().close() +diff --git a/preprocessing.py b/preprocessing.py +index d5a99a6..af35b00 100644 +--- a/preprocessing.py ++++ b/preprocessing.py +@@ -122,26 +122,26 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + + random.seed(seed) + +- if shuffle_records: +- random.shuffle(tf_records) ++ #if shuffle_records: ++ # random.shuffle(tf_records) ++ + record_list = tf.data.Dataset.from_tensor_slices(tf_records) + ++ if dist_train: ++ record_list = record_list.shard(hvd.size(), hvd.rank()) ++ + # compression_type here must agree with write_tf_examples + map_func = functools.partial( + tf.data.TFRecordDataset, + buffer_size=8 * 1024 * 1024, + compression_type='ZLIB') + +- if dist_train: +- # no need to interleave in data parallelism +- interleave = False +- + if interleave: + # cycle_length = how many tfrecord files are read in parallel + # The idea is to shuffle both the order of the files being read, + # and the examples being read from the files. + dataset = record_list.apply(tf.data.experimental.parallel_interleave( +- map_func, cycle_length=64, sloppy=True)) ++ map_func, cycle_length=1000, sloppy=True)) + else: + dataset = record_list.flat_map(map_func) + +@@ -150,15 +150,15 @@ def read_tf_records(batch_size, tf_records, num_repeats=1, + lambda _: tf.random.uniform([], seed=seed) < filter_amount) + dataset = dataset.apply(optimization.optimize(["filter_with_random_uniform_fusion"])) + +- if dist_train: +- dataset = dataset.shard(hvd.size(), hvd.rank()) ++ #if dist_train: ++ # dataset = dataset.shard(hvd.size(), hvd.rank()) + + dataset = dataset.repeat(num_repeats) + + if shuffle_examples: + dataset = dataset.shuffle(buffer_size=shuffle_buffer_size) + +- dataset = dataset.batch(batch_size) ++ dataset = dataset.batch(batch_size, drop_remainder=True) + return dataset + + +diff --git a/rl_loop/example_buffer.py b/rl_loop/example_buffer.py +index 28c77fd..40b467f 100644 +--- a/rl_loop/example_buffer.py ++++ b/rl_loop/example_buffer.py +@@ -75,6 +75,10 @@ def file_timestamp(filename): + def _ts_to_str(timestamp): + return dt.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S") + ++def parallel_shuffle(i, example_list): ++ # random.shuffle on deque is O(n^2) convert to list for O(n) ++ random.shuffle(example_list) ++ + + class ExampleBuffer(): + def __init__(self, max_size=2**21, sampling_frac=0.02): +@@ -92,16 +96,11 @@ class ExampleBuffer(): + if len(games) > max_games: + games = games[-max_games:] + +- if threads > 1: +- with mp.Pool(threads) as pool: +- res = tqdm(pool.imap(self.func, games), total=len(games)) +- self.examples.extend(itertools.chain.from_iterable(res)) +- else: +- res = [] +- for game in games: +- res += [self.func(game)] +- self.examples.extend(itertools.chain.from_iterable(res)) ++ with mp.pool.ThreadPool(threads) as pool: ++ res = tqdm(pool.imap(self.func, games), total=len(games)) ++ self.examples.extend(itertools.chain.from_iterable(res)) + print("Got", len(self.examples), "examples") ++ return len(self.examples) + + def update(self, new_games): + """ new_games is a list of .tfrecord.zz new game records. """ +@@ -132,6 +131,32 @@ class ExampleBuffer(): + self.examples.clear() + self.examples = deque(maxlen=self.max_size) + ++ def flush_new(self, path, example_num, num_out = 1, threads = 8): ++ # random.shuffle on deque is O(n^2) convert to list for O(n) ++ self.examples = list(self.examples) ++ example_list = [ex[1] for ex in self.examples] ++ length = example_num // num_out ++ example_list = example_list[:length*num_out] ++ ++ i_list = [] ++ for i in range(num_out): ++ i_list.append((i, example_list[i*length:(i+1)*length])) ++ ++ with timer("Writing examples to " + path): ++ with mp.pool.ThreadPool(threads) as pool: ++ pool.starmap(parallel_shuffle, i_list) ++ ++ i_list = [] ++ for i in range(num_out): ++ i_list.append((path+'_'+str(i), example_list[i*length:(i+1)*length], False)) ++ ++ with timer("Writing examples to " + path): ++ with mp.pool.ThreadPool(num_out) as pool: ++ pool.starmap(preprocessing.write_tf_examples, i_list) ++ ++ self.examples.clear() ++ self.examples = deque(maxlen=self.max_size) ++ + @property + def count(self): + return len(self.examples) +diff --git a/train.py b/train.py +index 2554826..cf2c8c9 100644 +--- a/train.py ++++ b/train.py +@@ -19,6 +19,7 @@ Usage: + """ + + import logging ++import os + + from absl import app, flags + import numpy as np +@@ -50,6 +51,9 @@ flags.DEFINE_float('filter_amount', 1.0, + flags.DEFINE_string('export_path', None, + 'Where to export the model after training.') + ++flags.DEFINE_string('data_path', None, ++ 'Where to get the data for training.') ++ + flags.DEFINE_bool('use_bt', False, + 'Whether to use Bigtable as input. ' + '(Only supported with --use_tpu, currently.)') +@@ -216,6 +220,11 @@ def train(*tf_records: "Records to train on"): + games.require_fresh_games(0) + raise + ++def get_golden_chunk_records(base): ++ pattern = os.path.join(base, '*.zz*') ++ window_size = FLAGS.window_size ++ return sorted(tf.gfile.Glob(pattern), reverse=True)[:window_size] ++ + + def main(argv): + """Train on examples and export the updated model weights.""" +@@ -224,7 +233,7 @@ def main(argv): + mll.global_batch_size(FLAGS.train_batch_size) + mll.lr_rates(FLAGS.lr_rates) + mll.lr_boundaries(FLAGS.lr_boundaries) +- tf_records = argv[1:] ++ tf_records = get_golden_chunk_records(FLAGS.data_path) + logging.info("Training on %s records: %s to %s", + len(tf_records), tf_records[0], tf_records[-1]) + with utils.logged_timer("Training"): diff --git a/models/reinforcement/tensorflow/minigo/training/requirements.txt b/models/reinforcement/tensorflow/minigo/training/requirements.txt new file mode 100644 index 000000000..ac55193ad --- /dev/null +++ b/models/reinforcement/tensorflow/minigo/training/requirements.txt @@ -0,0 +1,18 @@ +absl-py +autopep8>=1.3 +fire +google.cloud.logging +google.cloud.bigtable +grpcio-tools +keras +numpy>=1.14.0 +protobuf +sgf==0.5 +six +tqdm>=4.17 +pyasn1>=0.4.1 +setuptools>=34.0.0 + +oauth2client==4.1 + +horovod==0.15.1