From 54fae0423e396e26fe4e3924e98f6952241bda4a Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:24:18 +0530 Subject: [PATCH 01/26] Added `PanopticDeepLabFusion` layer --- .../vision/beta/modeling/layers/nn_layers.py | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/official/vision/beta/modeling/layers/nn_layers.py b/official/vision/beta/modeling/layers/nn_layers.py index ed1893c268c..a690235503e 100644 --- a/official/vision/beta/modeling/layers/nn_layers.py +++ b/official/vision/beta/modeling/layers/nn_layers.py @@ -320,6 +320,136 @@ def pyramid_feature_fusion(inputs, target_level): return tf.math.add_n(resampled_feats) +class PanopticDeepLabFusion(tf.keras.layers.Layer): + """Creates a Panoptic DeepLab feature Fusion layer. + + This implements the feature fusion introduced in the paper: + Cheng et al. Panoptic-DeepLab + (https://arxiv.org/pdf/1911.10194.pdf) + """ + def __init__( + self, + level: int, + low_level: List[int] = [3, 2], + num_projection_filters: List[int] = [64, 32], + num_output_filters: int = 256, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + interpolation: str = 'bilinear', + **kwargs): + + """Initializes panoptic FPN feature fusion layer. + + Args: + level: An `int` level at which the decoder was appled at. + low_level: A list of `int` of minimum level to use in feature fusion. + num_filters: An `int` number of filters in conv2d layers. + activation: A `str` name of the activation function. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + interpolation: A `str` interpolation method for upsampling. Defaults to + `bilinear`. + **kwargs: Additional keyword arguments to be passed. + Returns: + A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width, + feature_channel]. + """ + super(PanopticDeepLabFusion, self).__init__(**kwargs) + + self._config_dict = { + 'level': level, + 'low_level': low_level, + 'num_projection_filters': num_projection_filters, + 'num_output_filters': num_output_filters, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + 'interpolation': interpolation + } + if tf.keras.backend.image_data_format() == 'channels_last': + self._channel_axis = -1 + else: + self._channel_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: List[tf.TensorShape]): + conv_op = tf.keras.layers.Conv2D + conv_kwargs = { + 'padding': 'same', + 'use_bias': False, + 'kernel_initializer': tf.initializers.VarianceScaling(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + } + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._channel_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._projection_convs = [] + self._projection_norms = [] + self._fusion_convs = [] + self._fusion_norms = [] + for i in range(len(self._config_dict['low_level'])): + self._projection_convs.append( + conv_op( + filters=self._config_dict['num_projection_filters'][i], + kernel_size=1, + **conv_kwargs)) + self._fusion_convs.append( + conv_op( + filters=self._config_dict['num_output_filters'], + kernel_size=5, + **conv_kwargs)) + self._projection_norms.append(bn_op(**bn_kwargs)) + self._fusion_norms.append(bn_op(**bn_kwargs)) + + def call(self, inputs, training=None): + if training is None: + training = tf.keras.backend.learning_phase() + + backbone_output = inputs[0] + decoder_output = inputs[1][str(self._config_dict['level'])] + + x = decoder_output + for i in range(len(self._config_dict['low_level'])): + feature = backbone_output[str(self._config_dict['low_level'][i])] + feature = self._projection_convs[i](feature) + feature = self._projection_norms[i](feature, training=training) + feature = self._activation(feature) + + shape = tf.shape(feature) + x = tf.image.resize( + x, size=[shape[1], shape[2]], + method=self._config_dict['interpolation']) + x = tf.concat([x, feature], axis=self._channel_axis) + + x = self._fusion_convs[i](x) + x = self._fusion_norms[i](x, training=training) + x = self._activation(x) + return x + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) class PanopticFPNFusion(tf.keras.Model): """Creates a Panoptic FPN feature Fusion layer. From 78949f92e6529d27a665193c0cc152ccfe0df163 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:28:35 +0530 Subject: [PATCH 02/26] added new feature_fusion: panoptic_deeplab_fusion --- .../beta/modeling/heads/segmentation_heads.py | 74 ++++++++++++------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py index b87e98cdd44..ecd798271a3 100644 --- a/official/vision/beta/modeling/heads/segmentation_heads.py +++ b/official/vision/beta/modeling/heads/segmentation_heads.py @@ -32,13 +32,14 @@ def __init__( num_convs: int = 2, num_filters: int = 256, use_depthwise_convolution: bool = False, + kernel_size: int = 3, prediction_kernel_size: int = 1, upsample_factor: int = 1, feature_fusion: Optional[str] = None, decoder_min_level: Optional[int] = None, decoder_max_level: Optional[int] = None, - low_level: int = 2, - low_level_num_filters: int = 48, + low_level: Union[int, List[int]] = 2, + low_level_num_filters: Union[int, List[int]] = 48, num_decoder_filters: int = 256, activation: str = 'relu', use_sync_bn: bool = False, @@ -59,6 +60,8 @@ def __init__( Default is 256. use_depthwise_convolution: A bool to specify if use depthwise separable convolutions. + kernel_size: An `int` number to specify the kernel size of the + stacked convolutions before the last prediction layer. prediction_kernel_size: An `int` number to specify the kernel size of the prediction layer. upsample_factor: An `int` number to specify the upsampling factor to @@ -100,6 +103,7 @@ def __init__( 'num_convs': num_convs, 'num_filters': num_filters, 'use_depthwise_convolution': use_depthwise_convolution, + 'kernel_size': kernel_size, 'prediction_kernel_size': prediction_kernel_size, 'upsample_factor': upsample_factor, 'feature_fusion': feature_fusion, @@ -123,11 +127,12 @@ def __init__( def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): """Creates the variables of the segmentation head.""" + kernel_size = self._config_dict['kernel_size'] use_depthwise_convolution = self._config_dict['use_depthwise_convolution'] random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01) conv_op = tf.keras.layers.Conv2D conv_kwargs = { - 'kernel_size': 3 if not use_depthwise_convolution else 1, + 'kernel_size': kernel_size if not use_depthwise_convolution else 1, 'padding': 'same', 'use_bias': False, 'kernel_initializer': random_initializer, @@ -167,6 +172,19 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): kernel_regularizer=self._config_dict['kernel_regularizer'], bias_regularizer=self._config_dict['bias_regularizer']) + if self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion': + self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion( + level=self._config_dict['level'], + low_level=self._config_dict['low_level'], + num_projection_filters=self._config_dict['low_level_num_filters'], + num_output_filters=self._config_dict['num_filters'], + activation=self._config_dict['activation'], + use_sync_bn=self._config_dict['use_sync_bn'], + norm_momentum=self._config_dict['norm_momentum'], + norm_epsilon=self._config_dict['norm_epsilon'], + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + # Segmentation head layers. self._convs = [] self._norms = [] @@ -192,7 +210,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): norm_name = 'segmentation_head_norm_{}'.format(i) self._norms.append(bn_op(name=norm_name, **bn_kwargs)) - self._classifier = conv_op( + self._prediction_conv = conv_op( name='segmentation_output', filters=self._config_dict['num_classes'], kernel_size=self._config_dict['prediction_kernel_size'], @@ -204,26 +222,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): super(SegmentationHead, self).build(input_shape) - def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], - Union[tf.Tensor, Mapping[str, tf.Tensor]]]): - """Forward pass of the segmentation head. - - It supports both a tuple of 2 tensors or 2 dictionaries. The first is - backbone endpoints, and the second is decoder endpoints. When inputs are - tensors, they are from a single level of feature maps. When inputs are - dictionaries, they contain multiple levels of feature maps, where the key - is the index of feature map. - - Args: - inputs: A tuple of 2 feature map tensors of shape - [batch, height_l, width_l, channels] or 2 dictionaries of tensors: - - key: A `str` of the level of the multilevel features. - - values: A `tf.Tensor` of the feature map tensors, whose shape is - [batch, height_l, width_l, channels]. - Returns: - segmentation prediction mask: A `tf.Tensor` of the segmentation mask - scores predicted from input features. - """ + def _fuse_features(self, inputs): backbone_output = inputs[0] decoder_output = inputs[1] if self._config_dict['feature_fusion'] == 'deeplabv3plus': @@ -246,9 +245,34 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], self._config_dict['level']) elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion': x = self._panoptic_fpn_fusion(decoder_output) + elif self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion': + x = self._panoptic_deeplab_fusion(inputs) else: x = decoder_output[str(self._config_dict['level'])] if isinstance( decoder_output, dict) else decoder_output + return x + + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]]): + """Forward pass of the segmentation head. + + It supports both a tuple of 2 tensors or 2 dictionaries. The first is + backbone endpoints, and the second is decoder endpoints. When inputs are + tensors, they are from a single level of feature maps. When inputs are + dictionaries, they contain multiple levels of feature maps, where the key + is the index of feature map. + + Args: + inputs: A tuple of 2 feature map tensors of shape + [batch, height_l, width_l, channels] or 2 dictionaries of tensors: + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + Returns: + segmentation prediction mask: A `tf.Tensor` of the segmentation mask + scores predicted from input features. + """ + x = self._fuse_features(inputs) for conv, norm in zip(self._convs, self._norms): x = conv(x) @@ -258,7 +282,7 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], x = spatial_transform_ops.nearest_upsampling( x, scale=self._config_dict['upsample_factor']) - return self._classifier(x) + return self._prediction_conv(x) def get_config(self): return self._config_dict From c8e0233b00ced4798e9707714efc82f9b4dc4623 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:28:56 +0530 Subject: [PATCH 03/26] added tests for panoptic_deeplab_fusion --- .../modeling/heads/segmentation_heads_test.py | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/official/vision/beta/modeling/heads/segmentation_heads_test.py b/official/vision/beta/modeling/heads/segmentation_heads_test.py index 7d620252189..df98790ad23 100644 --- a/official/vision/beta/modeling/heads/segmentation_heads_test.py +++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py @@ -26,14 +26,17 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase): @parameterized.parameters( - (2, 'pyramid_fusion', None, None), - (3, 'pyramid_fusion', None, None), - (2, 'panoptic_fpn_fusion', 2, 5), - (2, 'panoptic_fpn_fusion', 2, 6), - (3, 'panoptic_fpn_fusion', 3, 5), - (3, 'panoptic_fpn_fusion', 3, 6)) + (2, 'pyramid_fusion', None, None, 2, 48), + (3, 'pyramid_fusion', None, None, 2, 48), + (2, 'panoptic_fpn_fusion', 2, 5, 2, 48), + (2, 'panoptic_fpn_fusion', 2, 6, 2, 48), + (3, 'panoptic_fpn_fusion', 3, 5, 2, 48), + (3, 'panoptic_fpn_fusion', 3, 6, 2, 48), + (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)), + (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32))) def test_forward(self, level, feature_fusion, - decoder_min_level, decoder_max_level): + decoder_min_level, decoder_max_level, + low_level, low_level_num_filters): backbone_features = { '3': np.random.rand(2, 128, 128, 16), '4': np.random.rand(2, 64, 64, 16), @@ -45,14 +48,16 @@ def test_forward(self, level, feature_fusion, '5': np.random.rand(2, 32, 32, 64), '6': np.random.rand(2, 16, 16, 64), } - - if feature_fusion == 'panoptic_fpn_fusion': + num_classes = 10 + if 'panoptic' in feature_fusion: backbone_features['2'] = np.random.rand(2, 256, 256, 16) decoder_features['2'] = np.random.rand(2, 256, 256, 64) head = segmentation_heads.SegmentationHead( - num_classes=10, + num_classes=num_classes, level=level, + low_level=low_level, + low_level_num_filters=low_level_num_filters, feature_fusion=feature_fusion, decoder_min_level=decoder_min_level, decoder_max_level=decoder_max_level, @@ -60,14 +65,18 @@ def test_forward(self, level, feature_fusion, logits = head((backbone_features, decoder_features)) - if level in decoder_features: - self.assertAllEqual(logits.numpy().shape, [ - 2, decoder_features[str(level)].shape[1], - decoder_features[str(level)].shape[2], 10 - ]) + if str(level) in decoder_features: + if feature_fusion == 'panoptic_deeplab_fusion': + h, w = decoder_features[str(low_level[-1])].shape[1:3] + else: + h, w = decoder_features[str(level)].shape[1:3] + self.assertAllEqual( + logits.numpy().shape, + [2, h, w, num_classes]) def test_serialize_deserialize(self): - head = segmentation_heads.SegmentationHead(num_classes=10, level=3) + head = segmentation_heads.SegmentationHead( + num_classes=10, level=3) config = head.get_config() new_head = segmentation_heads.SegmentationHead.from_config(config) self.assertAllEqual(head.get_config(), new_head.get_config()) From e257b292f15ab7a9adfcae7081333e957799bf37 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:30:21 +0530 Subject: [PATCH 04/26] added `kernel_size` param for `SegmentationHead` --- official/vision/beta/configs/semantic_segmentation.py | 1 + official/vision/beta/modeling/factory.py | 1 + 2 files changed, 2 insertions(+) diff --git a/official/vision/beta/configs/semantic_segmentation.py b/official/vision/beta/configs/semantic_segmentation.py index 3aefcefec70..a952e3416bd 100644 --- a/official/vision/beta/configs/semantic_segmentation.py +++ b/official/vision/beta/configs/semantic_segmentation.py @@ -63,6 +63,7 @@ class SegmentationHead(hyperparams.Config): num_convs: int = 2 num_filters: int = 256 use_depthwise_convolution: bool = False + kernel_size: int = 3 prediction_kernel_size: int = 1 upsample_factor: int = 1 feature_fusion: Optional[ diff --git a/official/vision/beta/modeling/factory.py b/official/vision/beta/modeling/factory.py index b03d0ea9d55..b75c347a44e 100644 --- a/official/vision/beta/modeling/factory.py +++ b/official/vision/beta/modeling/factory.py @@ -356,6 +356,7 @@ def build_segmentation_model( num_classes=model_config.num_classes, level=head_config.level, num_convs=head_config.num_convs, + kernel_size=head_config.kernel_size, prediction_kernel_size=head_config.prediction_kernel_size, num_filters=head_config.num_filters, use_depthwise_convolution=head_config.use_depthwise_convolution, From 6742d61a045c5a5a029b6ec2d9f429edc8f161ad Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:30:50 +0530 Subject: [PATCH 05/26] added `InstanceCenterHead` --- .../modeling/heads/instance_center_head.py | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py new file mode 100644 index 00000000000..f16bbfbbb24 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py @@ -0,0 +1,170 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definition of instance center heads.""" +from typing import List, Union, Optional, Mapping, Tuple +import tensorflow as tf + +from official.vision.beta.modeling.heads import segmentation_heads +from official.vision.beta.ops import spatial_transform_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class InstanceCenterHead(segmentation_heads.SegmentationHead): + """Creates a segmentation head.""" + + def __init__( + self, + level: Union[int, str], + num_convs: int = 2, + num_filters: int = 256, + use_depthwise_convolution: bool = False, + kernel_size: int = 3, + prediction_kernel_size: int = 1, + upsample_factor: int = 1, + feature_fusion: Optional[str] = None, + decoder_min_level: Optional[int] = None, + decoder_max_level: Optional[int] = None, + low_level: Union[int, List[int]] = 2, + low_level_num_filters: Union[int, List[int]] = 48, + num_decoder_filters: int = 256, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a instance center head. + + Args: + level: An `int` or `str`, level to use to build segmentation head. + num_convs: An `int` number of stacked convolution before the last + prediction layer. + num_filters: An `int` number to specify the number of filters used. + Default is 256. + use_depthwise_convolution: A bool to specify if use depthwise separable + convolutions. + prediction_kernel_size: An `int` number to specify the kernel size of the + prediction layer. + upsample_factor: An `int` number to specify the upsampling factor to + generate finer mask. Default 1 means no upsampling is applied. + feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, + `panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from + decoder_features[level] will be fused with low level feature maps from + backbone. If `pyramid_fusion`, multiscale features will be resized and + fused at the target level. + decoder_min_level: An `int` of minimum level from decoder to use in + feature fusion. It is only used when feature_fusion is set to + `panoptic_fpn_fusion`. + decoder_max_level: An `int` of maximum level from decoder to use in + feature fusion. It is only used when feature_fusion is set to + `panoptic_fpn_fusion`. + low_level: An `int` of backbone level to be used for feature fusion. It is + used when feature_fusion is set to `deeplabv3plus`. + low_level_num_filters: An `int` of reduced number of filters for the low + level features before fusing it with higher level features. It is only + used when feature_fusion is set to `deeplabv3plus`. + num_decoder_filters: An `int` of number of filters in the decoder outputs. + It is only used when feature_fusion is set to `panoptic_fpn_fusion`. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(InstanceCenterHead, self).__init__( + num_classes=2, + level=level, + num_convs=num_convs, + num_filters=num_filters, + use_depthwise_convolution=use_depthwise_convolution, + kernel_size=kernel_size, + prediction_kernel_size=prediction_kernel_size, + upsample_factor=upsample_factor, + feature_fusion=feature_fusion, + decoder_min_level=decoder_min_level, + decoder_max_level=decoder_max_level, + low_level=low_level, + low_level_num_filters=low_level_num_filters, + num_decoder_filters=num_decoder_filters, + activation=activation, + use_sync_bn=use_sync_bn, + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + **kwargs) + + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + self._instance_center_prediction_conv = tf.keras.layers.Conv2D( + name='instance_center_prediction', + filters=1, + kernel_size=self._config_dict['prediction_kernel_size'], + padding='same', + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + super(InstanceCenterHead, self).build(input_shape) + + + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]]): + """Forward pass of the segmentation head. + + It supports both a tuple of 2 tensors or 2 dictionaries. The first is + backbone endpoints, and the second is decoder endpoints. When inputs are + tensors, they are from a single level of feature maps. When inputs are + dictionaries, they contain multiple levels of feature maps, where the key + is the index of feature map. + + Args: + inputs: A tuple of 2 feature map tensors of shape + [batch, height_l, width_l, channels] or 2 dictionaries of tensors: + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + Returns: + segmentation prediction mask: A `tf.Tensor` of the segmentation mask + scores predicted from input features. + """ + x = self._fuse_features(inputs) + + for conv, norm in zip(self._convs, self._norms): + x = conv(x) + x = norm(x) + x = self._activation(x) + if self._config_dict['upsample_factor'] > 1: + x = spatial_transform_ops.nearest_upsampling( + x, scale=self._config_dict['upsample_factor']) + + instance_center_prediction = self._instance_center_prediction_conv(x) + instance_center_regression = self._prediction_conv(x) + outputs = { + 'instance_center_prediction': instance_center_prediction, + 'instance_center_regression': instance_center_regression + } + return outputs + + def get_config(self): + config_dict = super(InstanceCenterHead, self).get_config().copy() + config_dict.pop('num_classes') + return config_dict From a6a14de72b50e4cef438f73fbe1b0cbdd237ccae Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:31:17 +0530 Subject: [PATCH 06/26] added tests for `InstanceCenterHead` --- .../heads/instance_center_head_test.py | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py new file mode 100644 index 00000000000..ab4fe281a22 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py @@ -0,0 +1,87 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for segmentation_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head + + +class InstanceCenterHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (2, 'pyramid_fusion', None, None, 2, 48), + (3, 'pyramid_fusion', None, None, 2, 48), + (2, 'panoptic_fpn_fusion', 2, 5, 2, 48), + (2, 'panoptic_fpn_fusion', 2, 6, 2, 48), + (3, 'panoptic_fpn_fusion', 3, 5, 2, 48), + (3, 'panoptic_fpn_fusion', 3, 6, 2, 48), + (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)), + (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32))) + def test_forward(self, level, feature_fusion, + decoder_min_level, decoder_max_level, + low_level, low_level_num_filters): + backbone_features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + '5': np.random.rand(2, 32, 32, 16), + } + decoder_features = { + '3': np.random.rand(2, 128, 128, 64), + '4': np.random.rand(2, 64, 64, 64), + '5': np.random.rand(2, 32, 32, 64), + '6': np.random.rand(2, 16, 16, 64), + } + + if 'panoptic' in feature_fusion: + backbone_features['2'] = np.random.rand(2, 256, 256, 16) + decoder_features['2'] = np.random.rand(2, 256, 256, 64) + + head = instance_center_head.InstanceCenterHead( + level=level, + low_level=low_level, + low_level_num_filters=low_level_num_filters, + feature_fusion=feature_fusion, + decoder_min_level=decoder_min_level, + decoder_max_level=decoder_max_level, + num_decoder_filters=64) + + outputs = head((backbone_features, decoder_features)) + + if str(level) in decoder_features: + if feature_fusion == 'panoptic_deeplab_fusion': + h, w = decoder_features[str(low_level[-1])].shape[1:3] + else: + h, w = decoder_features[str(level)].shape[1:3] + self.assertAllEqual( + outputs['instance_center_prediction'].numpy().shape, + [2, h, w, 1]) + self.assertAllEqual( + outputs['instance_center_regression'].numpy().shape, + [2, h, w, 2]) + + + def test_serialize_deserialize(self): + head = instance_center_head.InstanceCenterHead(level=3) + config = head.get_config() + new_head = instance_center_head.InstanceCenterHead.from_config(config) + self.assertAllEqual(head.get_config(), new_head.get_config()) + +if __name__ == '__main__': + tf.test.main() From 6ee54a60f61b0a639dfa855009c6abc3d51f4d92 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:31:59 +0530 Subject: [PATCH 07/26] added `PanopticDeeplabModel` --- .../configs/panoptic_deeplab.py | 61 ++++++++++ .../modeling/panoptic_deeplab_model.py | 107 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py new file mode 100644 index 00000000000..d509ba669a7 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py @@ -0,0 +1,61 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Panoptic Mask R-CNN configuration definition.""" + +import dataclasses +from typing import List, Optional, Union + +from official.modeling import hyperparams +from official.vision.beta.configs import common +from official.vision.beta.configs import backbones +from official.vision.beta.configs import decoders +from official.vision.beta.configs import semantic_segmentation + +SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead + +_COCO_INPUT_PATH_BASE = 'coco/tfrecords' +_COCO_TRAIN_EXAMPLES = 118287 +_COCO_VAL_EXAMPLES = 5000 + + +@dataclasses.dataclass +class InstanceCenterHead(semantic_segmentation.SegmentationHead): + """Instance Center head config.""" + # None, deeplabv3plus, panoptic_fpn_fusion, + # panoptic_deeplab_fusion or pyramid_fusion + kernel_size: int = 5 + feature_fusion: Optional[str] = None + low_level: Union[int, List[int]] = dataclasses.field( + default_factory=lambda: [3, 2]) + low_level_num_filters: Union[int, List[int]] = dataclasses.field( + default_factory=lambda: [64, 32]) + + +# pytype: disable=wrong-keyword-args +@dataclasses.dataclass +class PanopticDeeplab(hyperparams.Config): + """Panoptic Mask R-CNN model config.""" + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + min_level: int = 3 + max_level: int = 6 + norm_activation: common.NormActivation = common.NormActivation() + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + decoder: decoders.Decoder = decoders.Decoder(type='aspp') + semantic_head: SEGMENTATION_HEAD = SEGMENTATION_HEAD() + instance_head: InstanceCenterHead = InstanceCenterHead( + low_level=[3, 2]) + shared_decoder: bool = False diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py new file mode 100644 index 00000000000..2e963e909c9 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py @@ -0,0 +1,107 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build Panoptic Deeplab model.""" +from typing import Any, Mapping, Optional, Union + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class PanopticDeeplabModel(tf.keras.Model): + """Panoptic Deeplab model.""" + + def __init__( + self, + backbone: tf.keras.Model, + semantic_decoder: tf.keras.Model, + semantic_head: tf.keras.layers.Layer, + instance_head: tf.keras.layers.Layer, + instance_decoder: Optional[tf.keras.Model] = None, + **kwargs): + """ + Args: + backbone: a backbone network. + semantic_decoder: a decoder network. E.g. FPN. + semantic_head: segmentation head. + instance_head: instance center head . + instance_decoder: Optional decoder network for instance predictions. + **kwargs: keyword arguments to be passed. + """ + super(PanopticDeeplabModel, self).__init__(**kwargs) + + self._config_dict = { + 'backbone': backbone, + 'semantic_decoder': semantic_decoder, + 'instance_decoder': instance_decoder, + 'semantic_head': semantic_head, + 'instance_head': instance_head + } + self.backbone = backbone + self.semantic_decoder = semantic_decoder + self.instance_decoder = instance_decoder + self.semantic_head = semantic_head + self.instance_head = instance_head + + def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor: + if training is None: + training = tf.keras.backend.learning_phase() + + backbone_features = self.backbone(inputs, training=training) + + semantic_features = self.semantic_decoder( + backbone_features, training=training) + + if self.instance_decoder is None: + instance_features = semantic_features + else: + instance_features = self.instance_decoder( + backbone_features, training=training) + + segmentation_outputs = self.semantic_head( + (backbone_features, semantic_features), + training=training) + instance_outputs = self.instance_head( + (backbone_features, instance_features), + training=training) + + outputs = { + 'segmentation_outputs': segmentation_outputs, + 'instance_center_prediction': + instance_outputs['instance_center_prediction'], + 'instance_center_regression': + instance_outputs['instance_center_regression'], + } + return outputs + + @property + def checkpoint_items( + self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]: + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict( + backbone=self.backbone, + semantic_decoder=self.semantic_decoder, + semantic_head=self.semantic_head, + instance_head=self.instance_head) + if self.instance_decoder is not None: + items.update(instance_decoder=self.instance_decoder) + + return items + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) From 8a8d5fabbb2f111ec7fd4d8ae5bf3d4f1cb29fbb Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:32:14 +0530 Subject: [PATCH 08/26] added tests for `PanopticDeeplabModel` --- .../modeling/panoptic_deeplab_model_test.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py new file mode 100644 index 00000000000..95bc2e55729 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py @@ -0,0 +1,148 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for Panoptic Deeplab network.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow.python.distribute import combinations + +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling.decoders import aspp +from official.vision.beta.modeling.heads import segmentation_heads +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head +from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model + +class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate( + combinations.combine( + level=[2, 3, 4], + input_size=[256, 512], + low_level=[(4, 3), (3, 2)], + shared_decoder=[True, False], + training=[True, False])) + def test_panoptic_deeplab_network_creation( + self, input_size, level, low_level, shared_decoder, training): + """Test for creation of a panoptic deep lab network.""" + num_classes = 10 + inputs = np.random.rand(2, input_size, input_size, 3) + tf.keras.backend.set_image_data_format('channels_last') + backbone = backbones.ResNet(model_id=50) + + semantic_decoder = aspp.ASPP( + level=level, dilation_rates=[6, 12, 18]) + + if shared_decoder: + instance_decoder = semantic_decoder + else: + instance_decoder = aspp.ASPP( + level=level, dilation_rates=[6, 12, 18]) + + semantic_head = segmentation_heads.SegmentationHead( + num_classes, + level=level, + low_level=low_level, + low_level_num_filters=[64, 32], + feature_fusion='panoptic_deeplab_fusion') + + instance_head = instance_center_head.InstanceCenterHead( + level=level, + low_level=low_level, + low_level_num_filters=[64, 32], + feature_fusion='panoptic_deeplab_fusion') + + model = panoptic_deeplab_model.PanopticDeeplabModel( + backbone=backbone, + semantic_decoder=semantic_decoder, + instance_decoder=instance_decoder, + semantic_head=semantic_head, + instance_head=instance_head) + + outputs = model(inputs, training=training) + + + self.assertIn('segmentation_outputs', outputs) + self.assertIn('instance_center_prediction', outputs) + self.assertIn('instance_center_regression', outputs) + + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size //(2**low_level[-1]), + num_classes], + outputs['segmentation_outputs'].numpy().shape) + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size // (2**low_level[-1]), + 1], + outputs['instance_center_prediction'].numpy().shape) + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size // (2**low_level[-1]), + 2], + outputs['instance_center_regression'].numpy().shape) + + @combinations.generate( + combinations.combine( + level=[2, 3, 4], + low_level=[(4, 3), (3, 2)], + shared_decoder=[True, False])) + def test_serialize_deserialize(self, level, low_level, shared_decoder): + """Validate the network can be serialized and deserialized.""" + num_classes = 10 + backbone = backbones.ResNet(model_id=50) + + semantic_decoder = aspp.ASPP( + level=level, dilation_rates=[6, 12, 18]) + + if shared_decoder: + instance_decoder = semantic_decoder + else: + instance_decoder = aspp.ASPP( + level=level, dilation_rates=[6, 12, 18]) + + semantic_head = segmentation_heads.SegmentationHead( + num_classes, + level=level, + low_level=low_level, + low_level_num_filters=[64, 32], + feature_fusion='panoptic_deeplab_fusion') + + instance_head = instance_center_head.InstanceCenterHead( + level=level, + low_level=low_level, + low_level_num_filters=[64, 32], + feature_fusion='panoptic_deeplab_fusion') + + model = panoptic_deeplab_model.PanopticDeeplabModel( + backbone=backbone, + semantic_decoder=semantic_decoder, + instance_decoder=instance_decoder, + semantic_head=semantic_head, + instance_head=instance_head) + + config = model.get_config() + new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() From c3282abe652c928a8923f07b07be2456651a7c8f Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:32:57 +0530 Subject: [PATCH 09/26] added `build_panoptic_deeplab` in panoptic factory --- .../panoptic_maskrcnn/modeling/factory.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py index e02227fb3e2..87012bcc64d 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py @@ -20,7 +20,10 @@ from official.vision.beta.modeling import factory as models_factory from official.vision.beta.modeling.decoders import factory as decoder_factory from official.vision.beta.modeling.heads import segmentation_heads +from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head +from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator @@ -82,6 +85,7 @@ def build_panoptic_maskrcnn( num_classes=segmentation_config.num_classes, level=segmentation_head_config.level, num_convs=segmentation_head_config.num_convs, + kernel_size=segmentation_head_config.kernel_size, prediction_kernel_size=segmentation_head_config.prediction_kernel_size, num_filters=segmentation_head_config.num_filters, upsample_factor=segmentation_head_config.upsample_factor, @@ -141,3 +145,88 @@ def build_panoptic_maskrcnn( aspect_ratios=model_config.anchor.aspect_ratios, anchor_size=model_config.anchor.anchor_size) return model + + +def build_panoptic_deeplab( + input_specs: tf.keras.layers.InputSpec, + model_config: panoptic_deeplab_cfg.PanopticDeeplab, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras + """Builds Panoptic Deeplab model. + + + Args: + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + model_config: Config instance for the panoptic maskrcnn model. + l2_regularizer: Optional `tf.keras.regularizers.Regularizer`, if specified, + the model is built with the provided regularization layer. + Returns: + tf.keras.Model for the panoptic segmentation model. + """ + norm_activation_config = model_config.norm_activation + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + backbone_config=model_config.backbone, + norm_activation_config=norm_activation_config, + l2_regularizer=l2_regularizer) + + semantic_decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + if model_config.shared_decoder: + instance_decoder = None + else: + # TODO(srihari-humbarwadi): decouple semantic and + # instance decoder types + instance_decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + semantic_head_config = model_config.semantic_head + instnace_head_config = model_config.instance_head + + semantic_head = segmentation_heads.SegmentationHead( + num_classes=model_config.num_classes, + level=semantic_head_config.level, + num_convs=semantic_head_config.num_convs, + kernel_size=semantic_head_config.kernel_size, + prediction_kernel_size=semantic_head_config.prediction_kernel_size, + num_filters=semantic_head_config.num_filters, + use_depthwise_convolution=semantic_head_config.use_depthwise_convolution, + upsample_factor=semantic_head_config.upsample_factor, + feature_fusion=semantic_head_config.feature_fusion, + low_level=semantic_head_config.low_level, + low_level_num_filters=semantic_head_config.low_level_num_filters, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + instance_head = instance_center_head.InstanceCenterHead( + level=instnace_head_config.level, + num_convs=instnace_head_config.num_convs, + kernel_size=instnace_head_config.kernel_size, + prediction_kernel_size=instnace_head_config.prediction_kernel_size, + num_filters=instnace_head_config.num_filters, + use_depthwise_convolution=instnace_head_config.use_depthwise_convolution, + upsample_factor=instnace_head_config.upsample_factor, + feature_fusion=instnace_head_config.feature_fusion, + low_level=instnace_head_config.low_level, + low_level_num_filters=instnace_head_config.low_level_num_filters, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + model = panoptic_deeplab_model.PanopticDeeplabModel( + backbone=backbone, + semantic_decoder=semantic_decoder, + instance_decoder=instance_decoder, + semantic_head=semantic_head, + instance_head=instance_head) + + return model From ac6713063e8a4dd57363a8e4d41e3afdf28037ee Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Thu, 13 Jan 2022 00:33:07 +0530 Subject: [PATCH 10/26] added tests for `build_panoptic_deeplab` in panoptic factory --- .../modeling/factory_test.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py index ba64f8083a6..ed31fe8487c 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py @@ -17,10 +17,14 @@ from absl.testing import parameterized import numpy as np import tensorflow as tf +from tensorflow.python.distribute import combinations + from official.vision.beta.configs import backbones from official.vision.beta.configs import decoders from official.vision.beta.configs import semantic_segmentation from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg +from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg + from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory @@ -61,5 +65,53 @@ def test_builder(self, backbone_type, input_size, segmentation_backbone_type, model_config=model_config, l2_regularizer=l2_regularizer) +class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate( + combinations.combine( + input_size=[(640, 640), (512, 512)], + backbone_type=['resnet', 'dilated_resnet'], + decoder_type=['aspp', 'fpn'], + level=[2, 3, 4], + low_level=[(4, 3), (3, 2)], + shared_decoder=[True, False], + fusion_type=[ + 'pyramid_fusion', + 'panoptic_fpn_fusion', + 'panoptic_deeplab_fusion'])) + def test_builder(self, input_size, backbone_type, level, + low_level, decoder_type, shared_decoder, fusion_type): + num_classes = 10 + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], 3]) + + model_config = panoptic_deeplab_cfg.PanopticDeeplab( + num_classes=num_classes, + input_size=input_size, + backbone=backbones.Backbone(type=backbone_type), + decoder=decoders.Decoder(type=decoder_type), + semantic_head=semantic_segmentation.SegmentationHead( + level=level, + num_convs=1, + kernel_size=5, + prediction_kernel_size=1, + low_level=low_level, + feature_fusion=fusion_type), + instance_head=panoptic_deeplab_cfg.InstanceCenterHead( + level=level, + num_convs=1, + kernel_size=5, + prediction_kernel_size=1, + low_level=low_level, + feature_fusion=fusion_type), + shared_decoder=shared_decoder) + + l2_regularizer = tf.keras.regularizers.l2(5e-5) + _ = factory.build_panoptic_deeplab( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + if __name__ == '__main__': tf.test.main() From 4dc4f6c74bf9f79d08df27315eb01cdb0c736b5c Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 14 Jan 2022 13:29:04 +0530 Subject: [PATCH 11/26] Revert "added `kernel_size` param for `SegmentationHead`" This reverts commit e257b292f15ab7a9adfcae7081333e957799bf37. --- official/vision/beta/configs/semantic_segmentation.py | 1 - official/vision/beta/modeling/factory.py | 1 - 2 files changed, 2 deletions(-) diff --git a/official/vision/beta/configs/semantic_segmentation.py b/official/vision/beta/configs/semantic_segmentation.py index 701e1653e90..0543fcc13d2 100644 --- a/official/vision/beta/configs/semantic_segmentation.py +++ b/official/vision/beta/configs/semantic_segmentation.py @@ -63,7 +63,6 @@ class SegmentationHead(hyperparams.Config): num_convs: int = 2 num_filters: int = 256 use_depthwise_convolution: bool = False - kernel_size: int = 3 prediction_kernel_size: int = 1 upsample_factor: int = 1 feature_fusion: Optional[ diff --git a/official/vision/beta/modeling/factory.py b/official/vision/beta/modeling/factory.py index 5e35fdfe5af..c91a1abceed 100644 --- a/official/vision/beta/modeling/factory.py +++ b/official/vision/beta/modeling/factory.py @@ -356,7 +356,6 @@ def build_segmentation_model( num_classes=model_config.num_classes, level=head_config.level, num_convs=head_config.num_convs, - kernel_size=head_config.kernel_size, prediction_kernel_size=head_config.prediction_kernel_size, num_filters=head_config.num_filters, use_depthwise_convolution=head_config.use_depthwise_convolution, From 29ab89cca4c7416930b747497caeda501bec3fe6 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 14 Jan 2022 13:29:49 +0530 Subject: [PATCH 12/26] Revert "added new feature_fusion: panoptic_deeplab_fusion" This reverts commit 78949f92e6529d27a665193c0cc152ccfe0df163. --- .../beta/modeling/heads/segmentation_heads.py | 74 +++++++------------ 1 file changed, 25 insertions(+), 49 deletions(-) diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py index 66967392aa6..f65b234b3cf 100644 --- a/official/vision/beta/modeling/heads/segmentation_heads.py +++ b/official/vision/beta/modeling/heads/segmentation_heads.py @@ -202,14 +202,13 @@ def __init__( num_convs: int = 2, num_filters: int = 256, use_depthwise_convolution: bool = False, - kernel_size: int = 3, prediction_kernel_size: int = 1, upsample_factor: int = 1, feature_fusion: Optional[str] = None, decoder_min_level: Optional[int] = None, decoder_max_level: Optional[int] = None, - low_level: Union[int, List[int]] = 2, - low_level_num_filters: Union[int, List[int]] = 48, + low_level: int = 2, + low_level_num_filters: int = 48, num_decoder_filters: int = 256, activation: str = 'relu', use_sync_bn: bool = False, @@ -230,8 +229,6 @@ def __init__( Default is 256. use_depthwise_convolution: A bool to specify if use depthwise separable convolutions. - kernel_size: An `int` number to specify the kernel size of the - stacked convolutions before the last prediction layer. prediction_kernel_size: An `int` number to specify the kernel size of the prediction layer. upsample_factor: An `int` number to specify the upsampling factor to @@ -273,7 +270,6 @@ def __init__( 'num_convs': num_convs, 'num_filters': num_filters, 'use_depthwise_convolution': use_depthwise_convolution, - 'kernel_size': kernel_size, 'prediction_kernel_size': prediction_kernel_size, 'upsample_factor': upsample_factor, 'feature_fusion': feature_fusion, @@ -297,12 +293,11 @@ def __init__( def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): """Creates the variables of the segmentation head.""" - kernel_size = self._config_dict['kernel_size'] use_depthwise_convolution = self._config_dict['use_depthwise_convolution'] random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01) conv_op = tf.keras.layers.Conv2D conv_kwargs = { - 'kernel_size': kernel_size if not use_depthwise_convolution else 1, + 'kernel_size': 3 if not use_depthwise_convolution else 1, 'padding': 'same', 'use_bias': False, 'kernel_initializer': random_initializer, @@ -342,19 +337,6 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): kernel_regularizer=self._config_dict['kernel_regularizer'], bias_regularizer=self._config_dict['bias_regularizer']) - if self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion': - self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion( - level=self._config_dict['level'], - low_level=self._config_dict['low_level'], - num_projection_filters=self._config_dict['low_level_num_filters'], - num_output_filters=self._config_dict['num_filters'], - activation=self._config_dict['activation'], - use_sync_bn=self._config_dict['use_sync_bn'], - norm_momentum=self._config_dict['norm_momentum'], - norm_epsilon=self._config_dict['norm_epsilon'], - kernel_regularizer=self._config_dict['kernel_regularizer'], - bias_regularizer=self._config_dict['bias_regularizer']) - # Segmentation head layers. self._convs = [] self._norms = [] @@ -380,7 +362,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): norm_name = 'segmentation_head_norm_{}'.format(i) self._norms.append(bn_op(name=norm_name, **bn_kwargs)) - self._prediction_conv = conv_op( + self._classifier = conv_op( name='segmentation_output', filters=self._config_dict['num_classes'], kernel_size=self._config_dict['prediction_kernel_size'], @@ -392,7 +374,26 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): super().build(input_shape) - def _fuse_features(self, inputs): + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]]): + """Forward pass of the segmentation head. + + It supports both a tuple of 2 tensors or 2 dictionaries. The first is + backbone endpoints, and the second is decoder endpoints. When inputs are + tensors, they are from a single level of feature maps. When inputs are + dictionaries, they contain multiple levels of feature maps, where the key + is the index of feature map. + + Args: + inputs: A tuple of 2 feature map tensors of shape + [batch, height_l, width_l, channels] or 2 dictionaries of tensors: + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + Returns: + segmentation prediction mask: A `tf.Tensor` of the segmentation mask + scores predicted from input features. + """ backbone_output = inputs[0] decoder_output = inputs[1] if self._config_dict['feature_fusion'] == 'deeplabv3plus': @@ -415,34 +416,9 @@ def _fuse_features(self, inputs): self._config_dict['level']) elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion': x = self._panoptic_fpn_fusion(decoder_output) - elif self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion': - x = self._panoptic_deeplab_fusion(inputs) else: x = decoder_output[str(self._config_dict['level'])] if isinstance( decoder_output, dict) else decoder_output - return x - - def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], - Union[tf.Tensor, Mapping[str, tf.Tensor]]]): - """Forward pass of the segmentation head. - - It supports both a tuple of 2 tensors or 2 dictionaries. The first is - backbone endpoints, and the second is decoder endpoints. When inputs are - tensors, they are from a single level of feature maps. When inputs are - dictionaries, they contain multiple levels of feature maps, where the key - is the index of feature map. - - Args: - inputs: A tuple of 2 feature map tensors of shape - [batch, height_l, width_l, channels] or 2 dictionaries of tensors: - - key: A `str` of the level of the multilevel features. - - values: A `tf.Tensor` of the feature map tensors, whose shape is - [batch, height_l, width_l, channels]. - Returns: - segmentation prediction mask: A `tf.Tensor` of the segmentation mask - scores predicted from input features. - """ - x = self._fuse_features(inputs) for conv, norm in zip(self._convs, self._norms): x = conv(x) @@ -452,7 +428,7 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], x = spatial_transform_ops.nearest_upsampling( x, scale=self._config_dict['upsample_factor']) - return self._prediction_conv(x) + return self._classifier(x) def get_config(self): base_config = super().get_config() From cbe473920d0b018326917aaf75650c02b2fa117b Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 14 Jan 2022 13:30:05 +0530 Subject: [PATCH 13/26] Revert "added tests for panoptic_deeplab_fusion" This reverts commit c8e0233b00ced4798e9707714efc82f9b4dc4623. --- .../modeling/heads/segmentation_heads_test.py | 41 ++++++++----------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/official/vision/beta/modeling/heads/segmentation_heads_test.py b/official/vision/beta/modeling/heads/segmentation_heads_test.py index d882fbd94db..2ec7ded68c1 100644 --- a/official/vision/beta/modeling/heads/segmentation_heads_test.py +++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py @@ -26,17 +26,14 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase): @parameterized.parameters( - (2, 'pyramid_fusion', None, None, 2, 48), - (3, 'pyramid_fusion', None, None, 2, 48), - (2, 'panoptic_fpn_fusion', 2, 5, 2, 48), - (2, 'panoptic_fpn_fusion', 2, 6, 2, 48), - (3, 'panoptic_fpn_fusion', 3, 5, 2, 48), - (3, 'panoptic_fpn_fusion', 3, 6, 2, 48), - (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)), - (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32))) + (2, 'pyramid_fusion', None, None), + (3, 'pyramid_fusion', None, None), + (2, 'panoptic_fpn_fusion', 2, 5), + (2, 'panoptic_fpn_fusion', 2, 6), + (3, 'panoptic_fpn_fusion', 3, 5), + (3, 'panoptic_fpn_fusion', 3, 6)) def test_forward(self, level, feature_fusion, - decoder_min_level, decoder_max_level, - low_level, low_level_num_filters): + decoder_min_level, decoder_max_level): backbone_features = { '3': np.random.rand(2, 128, 128, 16), '4': np.random.rand(2, 64, 64, 16), @@ -48,16 +45,14 @@ def test_forward(self, level, feature_fusion, '5': np.random.rand(2, 32, 32, 64), '6': np.random.rand(2, 16, 16, 64), } - num_classes = 10 - if 'panoptic' in feature_fusion: + + if feature_fusion == 'panoptic_fpn_fusion': backbone_features['2'] = np.random.rand(2, 256, 256, 16) decoder_features['2'] = np.random.rand(2, 256, 256, 64) head = segmentation_heads.SegmentationHead( - num_classes=num_classes, + num_classes=10, level=level, - low_level=low_level, - low_level_num_filters=low_level_num_filters, feature_fusion=feature_fusion, decoder_min_level=decoder_min_level, decoder_max_level=decoder_max_level, @@ -65,18 +60,14 @@ def test_forward(self, level, feature_fusion, logits = head((backbone_features, decoder_features)) - if str(level) in decoder_features: - if feature_fusion == 'panoptic_deeplab_fusion': - h, w = decoder_features[str(low_level[-1])].shape[1:3] - else: - h, w = decoder_features[str(level)].shape[1:3] - self.assertAllEqual( - logits.numpy().shape, - [2, h, w, num_classes]) + if level in decoder_features: + self.assertAllEqual(logits.numpy().shape, [ + 2, decoder_features[str(level)].shape[1], + decoder_features[str(level)].shape[2], 10 + ]) def test_serialize_deserialize(self): - head = segmentation_heads.SegmentationHead( - num_classes=10, level=3) + head = segmentation_heads.SegmentationHead(num_classes=10, level=3) config = head.get_config() new_head = segmentation_heads.SegmentationHead.from_config(config) self.assertAllEqual(head.get_config(), new_head.get_config()) From 7e6c5502a08cc87991738b70a6f11044c05430c2 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 17:53:37 +0530 Subject: [PATCH 14/26] Revert "added `InstanceCenterHead`" This reverts commit 6742d61a045c5a5a029b6ec2d9f429edc8f161ad. --- .../modeling/heads/instance_center_head.py | 170 ------------------ 1 file changed, 170 deletions(-) delete mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py deleted file mode 100644 index f16bbfbbb24..00000000000 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Contains definition of instance center heads.""" -from typing import List, Union, Optional, Mapping, Tuple -import tensorflow as tf - -from official.vision.beta.modeling.heads import segmentation_heads -from official.vision.beta.ops import spatial_transform_ops - - -@tf.keras.utils.register_keras_serializable(package='Vision') -class InstanceCenterHead(segmentation_heads.SegmentationHead): - """Creates a segmentation head.""" - - def __init__( - self, - level: Union[int, str], - num_convs: int = 2, - num_filters: int = 256, - use_depthwise_convolution: bool = False, - kernel_size: int = 3, - prediction_kernel_size: int = 1, - upsample_factor: int = 1, - feature_fusion: Optional[str] = None, - decoder_min_level: Optional[int] = None, - decoder_max_level: Optional[int] = None, - low_level: Union[int, List[int]] = 2, - low_level_num_filters: Union[int, List[int]] = 48, - num_decoder_filters: int = 256, - activation: str = 'relu', - use_sync_bn: bool = False, - norm_momentum: float = 0.99, - norm_epsilon: float = 0.001, - kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - **kwargs): - """Initializes a instance center head. - - Args: - level: An `int` or `str`, level to use to build segmentation head. - num_convs: An `int` number of stacked convolution before the last - prediction layer. - num_filters: An `int` number to specify the number of filters used. - Default is 256. - use_depthwise_convolution: A bool to specify if use depthwise separable - convolutions. - prediction_kernel_size: An `int` number to specify the kernel size of the - prediction layer. - upsample_factor: An `int` number to specify the upsampling factor to - generate finer mask. Default 1 means no upsampling is applied. - feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, - `panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from - decoder_features[level] will be fused with low level feature maps from - backbone. If `pyramid_fusion`, multiscale features will be resized and - fused at the target level. - decoder_min_level: An `int` of minimum level from decoder to use in - feature fusion. It is only used when feature_fusion is set to - `panoptic_fpn_fusion`. - decoder_max_level: An `int` of maximum level from decoder to use in - feature fusion. It is only used when feature_fusion is set to - `panoptic_fpn_fusion`. - low_level: An `int` of backbone level to be used for feature fusion. It is - used when feature_fusion is set to `deeplabv3plus`. - low_level_num_filters: An `int` of reduced number of filters for the low - level features before fusing it with higher level features. It is only - used when feature_fusion is set to `deeplabv3plus`. - num_decoder_filters: An `int` of number of filters in the decoder outputs. - It is only used when feature_fusion is set to `panoptic_fpn_fusion`. - activation: A `str` that indicates which activation is used, e.g. 'relu', - 'swish', etc. - use_sync_bn: A `bool` that indicates whether to use synchronized batch - normalization across different replicas. - norm_momentum: A `float` of normalization momentum for the moving average. - norm_epsilon: A `float` added to variance to avoid dividing by zero. - kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for - Conv2D. Default is None. - bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. - **kwargs: Additional keyword arguments to be passed. - """ - super(InstanceCenterHead, self).__init__( - num_classes=2, - level=level, - num_convs=num_convs, - num_filters=num_filters, - use_depthwise_convolution=use_depthwise_convolution, - kernel_size=kernel_size, - prediction_kernel_size=prediction_kernel_size, - upsample_factor=upsample_factor, - feature_fusion=feature_fusion, - decoder_min_level=decoder_min_level, - decoder_max_level=decoder_max_level, - low_level=low_level, - low_level_num_filters=low_level_num_filters, - num_decoder_filters=num_decoder_filters, - activation=activation, - use_sync_bn=use_sync_bn, - norm_momentum=norm_momentum, - norm_epsilon=norm_epsilon, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - **kwargs) - - - def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): - self._instance_center_prediction_conv = tf.keras.layers.Conv2D( - name='instance_center_prediction', - filters=1, - kernel_size=self._config_dict['prediction_kernel_size'], - padding='same', - bias_initializer=tf.zeros_initializer(), - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), - kernel_regularizer=self._config_dict['kernel_regularizer'], - bias_regularizer=self._config_dict['bias_regularizer']) - super(InstanceCenterHead, self).build(input_shape) - - - def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], - Union[tf.Tensor, Mapping[str, tf.Tensor]]]): - """Forward pass of the segmentation head. - - It supports both a tuple of 2 tensors or 2 dictionaries. The first is - backbone endpoints, and the second is decoder endpoints. When inputs are - tensors, they are from a single level of feature maps. When inputs are - dictionaries, they contain multiple levels of feature maps, where the key - is the index of feature map. - - Args: - inputs: A tuple of 2 feature map tensors of shape - [batch, height_l, width_l, channels] or 2 dictionaries of tensors: - - key: A `str` of the level of the multilevel features. - - values: A `tf.Tensor` of the feature map tensors, whose shape is - [batch, height_l, width_l, channels]. - Returns: - segmentation prediction mask: A `tf.Tensor` of the segmentation mask - scores predicted from input features. - """ - x = self._fuse_features(inputs) - - for conv, norm in zip(self._convs, self._norms): - x = conv(x) - x = norm(x) - x = self._activation(x) - if self._config_dict['upsample_factor'] > 1: - x = spatial_transform_ops.nearest_upsampling( - x, scale=self._config_dict['upsample_factor']) - - instance_center_prediction = self._instance_center_prediction_conv(x) - instance_center_regression = self._prediction_conv(x) - outputs = { - 'instance_center_prediction': instance_center_prediction, - 'instance_center_regression': instance_center_regression - } - return outputs - - def get_config(self): - config_dict = super(InstanceCenterHead, self).get_config().copy() - config_dict.pop('num_classes') - return config_dict From 01685eed34e06e72aa84652c30f387a6c79594df Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 17:53:56 +0530 Subject: [PATCH 15/26] Revert "added tests for `InstanceCenterHead`" This reverts commit a6a14de72b50e4cef438f73fbe1b0cbdd237ccae. --- .../heads/instance_center_head_test.py | 87 ------------------- 1 file changed, 87 deletions(-) delete mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py deleted file mode 100644 index ab4fe281a22..00000000000 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -"""Tests for segmentation_heads.py.""" - -# Import libraries -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head - - -class InstanceCenterHeadTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.parameters( - (2, 'pyramid_fusion', None, None, 2, 48), - (3, 'pyramid_fusion', None, None, 2, 48), - (2, 'panoptic_fpn_fusion', 2, 5, 2, 48), - (2, 'panoptic_fpn_fusion', 2, 6, 2, 48), - (3, 'panoptic_fpn_fusion', 3, 5, 2, 48), - (3, 'panoptic_fpn_fusion', 3, 6, 2, 48), - (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)), - (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32))) - def test_forward(self, level, feature_fusion, - decoder_min_level, decoder_max_level, - low_level, low_level_num_filters): - backbone_features = { - '3': np.random.rand(2, 128, 128, 16), - '4': np.random.rand(2, 64, 64, 16), - '5': np.random.rand(2, 32, 32, 16), - } - decoder_features = { - '3': np.random.rand(2, 128, 128, 64), - '4': np.random.rand(2, 64, 64, 64), - '5': np.random.rand(2, 32, 32, 64), - '6': np.random.rand(2, 16, 16, 64), - } - - if 'panoptic' in feature_fusion: - backbone_features['2'] = np.random.rand(2, 256, 256, 16) - decoder_features['2'] = np.random.rand(2, 256, 256, 64) - - head = instance_center_head.InstanceCenterHead( - level=level, - low_level=low_level, - low_level_num_filters=low_level_num_filters, - feature_fusion=feature_fusion, - decoder_min_level=decoder_min_level, - decoder_max_level=decoder_max_level, - num_decoder_filters=64) - - outputs = head((backbone_features, decoder_features)) - - if str(level) in decoder_features: - if feature_fusion == 'panoptic_deeplab_fusion': - h, w = decoder_features[str(low_level[-1])].shape[1:3] - else: - h, w = decoder_features[str(level)].shape[1:3] - self.assertAllEqual( - outputs['instance_center_prediction'].numpy().shape, - [2, h, w, 1]) - self.assertAllEqual( - outputs['instance_center_regression'].numpy().shape, - [2, h, w, 2]) - - - def test_serialize_deserialize(self): - head = instance_center_head.InstanceCenterHead(level=3) - config = head.get_config() - new_head = instance_center_head.InstanceCenterHead.from_config(config) - self.assertAllEqual(head.get_config(), new_head.get_config()) - -if __name__ == '__main__': - tf.test.main() From ecbc5cba4e5c6e0f49ae5d457524372456c8d146 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 18:26:26 +0530 Subject: [PATCH 16/26] implemneted `PanopticDeeplabHead` --- .../modeling/heads/panoptic_deeplab_heads.py | 418 ++++++++++++++++++ 1 file changed, 418 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py new file mode 100644 index 00000000000..8c8cd9e6647 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py @@ -0,0 +1,418 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions for Panoptic Deeplab heads.""" + +from typing import List, Union, Optional, Mapping, Tuple +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.layers import nn_layers +from official.vision.beta.ops import spatial_transform_ops + + +class PanopticDeeplabHead(tf.keras.layers.Layer): + """Creates a panoptic deeplab head.""" + + def __init__( + self, + level: Union[int, str], + num_convs: int = 2, + num_filters: int = 256, + kernel_size: int = 3, + use_depthwise_convolution: bool = False, + upsample_factor: int = 1, + low_level: Union[List[int], Tuple[int]] = (3, 2), + low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32), + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a panoptic deeplab head. + + Args: + level: An `int` or `str`, level to use to build head. + num_convs: An `int` number of stacked convolution before the last + prediction layer. + num_filters: An `int` number to specify the number of filters used. + Default is 256. + kernel_size: An `int` number to specify the kernel size of the + stacked convolutions before the last prediction layer. + use_depthwise_convolution: A bool to specify if use depthwise separable + convolutions. + upsample_factor: An `int` number to specify the upsampling factor to + generate finer mask. Default 1 means no upsampling is applied. + low_level: An `int` of backbone level to be used for feature fusion. It is + used when feature_fusion is set to `deeplabv3plus`. + low_level_num_filters: An `int` of reduced number of filters for the low + level features before fusing it with higher level features. It is only + used when feature_fusion is set to `deeplabv3plus`. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(PanopticDeeplabHead, self).__init__(**kwargs) + + self._config_dict = { + 'level': level, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'kernel_size': kernel_size, + 'use_depthwise_convolution': use_depthwise_convolution, + 'upsample_factor': upsample_factor, + 'low_level': low_level, + 'low_level_num_filters': low_level_num_filters, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer + } + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the head.""" + kernel_size = self._config_dict['kernel_size'] + use_depthwise_convolution = self._config_dict['use_depthwise_convolution'] + random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01) + conv_op = tf.keras.layers.Conv2D + conv_kwargs = { + 'kernel_size': kernel_size if not use_depthwise_convolution else 1, + 'padding': 'same', + 'use_bias': False, + 'kernel_initializer': random_initializer, + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + } + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion( + level=self._config_dict['level'], + low_level=self._config_dict['low_level'], + num_projection_filters=self._config_dict['low_level_num_filters'], + num_output_filters=self._config_dict['num_filters'], + activation=self._config_dict['activation'], + use_sync_bn=self._config_dict['use_sync_bn'], + norm_momentum=self._config_dict['norm_momentum'], + norm_epsilon=self._config_dict['norm_epsilon'], + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + + # Stacked convolutions layers. + self._convs = [] + self._norms = [] + for i in range(self._config_dict['num_convs']): + if use_depthwise_convolution: + self._convs.append( + tf.keras.layers.DepthwiseConv2D( + name='panoptic_deeplab_head_depthwise_conv_{}'.format(i), + kernel_size=3, + padding='same', + use_bias=False, + depthwise_initializer=random_initializer, + depthwise_regularizer=self._config_dict['kernel_regularizer'], + depth_multiplier=1)) + norm_name = 'panoptic_deeplab_head_depthwise_norm_{}'.format(i) + self._norms.append(bn_op(name=norm_name, **bn_kwargs)) + conv_name = 'panoptic_deeplab_head_conv_{}'.format(i) + self._convs.append( + conv_op( + name=conv_name, + filters=self._config_dict['num_filters'], + **conv_kwargs)) + norm_name = 'panoptic_deeplab_head_norm_{}'.format(i) + self._norms.append(bn_op(name=norm_name, **bn_kwargs)) + + super().build(input_shape) + + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]], + training=None): + """Forward pass of the head. + + It supports both a tuple of 2 tensors or 2 dictionaries. The first is + backbone endpoints, and the second is decoder endpoints. When inputs are + tensors, they are from a single level of feature maps. When inputs are + dictionaries, they contain multiple levels of feature maps, where the key + is the index of feature map. + + Args: + inputs: A tuple of 2 feature map tensors of shape + [batch, height_l, width_l, channels] or 2 dictionaries of tensors: + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + Returns: + A `tf.Tensor` of the fused backbone and decoder features. + """ + if training is None: + training = tf.keras.backend.learning_phase() + + x = self._panoptic_deeplab_fusion(inputs, training=training) + + for conv, norm in zip(self._convs, self._norms): + x = conv(x) + x = norm(x, training=training) + x = self._activation(x) + + if self._config_dict['upsample_factor'] > 1: + x = spatial_transform_ops.nearest_upsampling( + x, scale=self._config_dict['upsample_factor']) + + return x + + def get_config(self): + base_config = super().get_config() + return dict(list(base_config.items()) + list(self._config_dict.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SemanticHead(PanopticDeeplabHead): + """Creates a semantic head.""" + + def __init__( + self, + num_classes: int, + level: Union[int, str], + num_convs: int = 2, + num_filters: int = 256, + kernel_size: int = 3, + prediction_kernel_size: int = 3, + use_depthwise_convolution: bool = False, + upsample_factor: int = 1, + low_level: Union[List[int], Tuple[int]] = (3, 2), + low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32), + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a instance center head. + + Args: + num_classes: An `int` number of mask classification categories. The number + of classes does not include background class. + level: An `int` or `str`, level to use to build head. + num_convs: An `int` number of stacked convolution before the last + prediction layer. + num_filters: An `int` number to specify the number of filters used. + Default is 256. + kernel_size: An `int` number to specify the kernel size of the + stacked convolutions before the last prediction layer. + prediction_kernel_size: An `int` number to specify the kernel size of the + prediction layer. + use_depthwise_convolution: A bool to specify if use depthwise separable + convolutions. + upsample_factor: An `int` number to specify the upsampling factor to + generate finer mask. Default 1 means no upsampling is applied. + low_level: An `int` of backbone level to be used for feature fusion. It is + used when feature_fusion is set to `deeplabv3plus`. + low_level_num_filters: An `int` of reduced number of filters for the low + level features before fusing it with higher level features. It is only + used when feature_fusion is set to `deeplabv3plus`. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(SemanticHead, self).__init__( + level=level, + num_convs=num_convs, + num_filters=num_filters, + use_depthwise_convolution=use_depthwise_convolution, + kernel_size=kernel_size, + upsample_factor=upsample_factor, + low_level=low_level, + low_level_num_filters=low_level_num_filters, + activation=activation, + use_sync_bn=use_sync_bn, + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + **kwargs) + self._config_dict.update({ + 'num_classes': num_classes, + 'prediction_kernel_size': prediction_kernel_size}) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the semantic head.""" + super(SemanticHead, self).build(input_shape) + self._classifier = tf.keras.layers.Conv2D( + name='semantic_output', + filters=self._config_dict['num_classes'], + kernel_size=self._config_dict['prediction_kernel_size'], + padding='same', + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]], + training=None): + """Forward pass of the head.""" + + if training is None: + training = tf.keras.backend.learning_phase() + x = super(SemanticHead, self).call(inputs, training=training) + outputs = self._classifier(x) + return outputs + +@tf.keras.utils.register_keras_serializable(package='Vision') +class InstanceHead(PanopticDeeplabHead): + """Creates a instance head.""" + + def __init__( + self, + level: Union[int, str], + num_convs: int = 2, + num_filters: int = 256, + kernel_size: int = 3, + prediction_kernel_size: int = 3, + use_depthwise_convolution: bool = False, + upsample_factor: int = 1, + low_level: Union[List[int], Tuple[int]] = (3, 2), + low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32), + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a instance center head. + + Args: + level: An `int` or `str`, level to use to build head. + num_convs: An `int` number of stacked convolution before the last + prediction layer. + num_filters: An `int` number to specify the number of filters used. + Default is 256. + kernel_size: An `int` number to specify the kernel size of the + stacked convolutions before the last prediction layer. + prediction_kernel_size: An `int` number to specify the kernel size of the + prediction layer. + use_depthwise_convolution: A bool to specify if use depthwise separable + convolutions. + upsample_factor: An `int` number to specify the upsampling factor to + generate finer mask. Default 1 means no upsampling is applied. + low_level: An `int` of backbone level to be used for feature fusion. It is + used when feature_fusion is set to `deeplabv3plus`. + low_level_num_filters: An `int` of reduced number of filters for the low + level features before fusing it with higher level features. It is only + used when feature_fusion is set to `deeplabv3plus`. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(InstanceHead, self).__init__( + level=level, + num_convs=num_convs, + num_filters=num_filters, + use_depthwise_convolution=use_depthwise_convolution, + kernel_size=kernel_size, + upsample_factor=upsample_factor, + low_level=low_level, + low_level_num_filters=low_level_num_filters, + activation=activation, + use_sync_bn=use_sync_bn, + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + **kwargs) + self._config_dict.update({ + 'prediction_kernel_size': prediction_kernel_size}) + + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the instance head.""" + super(InstanceHead, self).build(input_shape) + self._instance_center_prediction_conv = tf.keras.layers.Conv2D( + name='instance_center_prediction', + filters=1, + kernel_size=self._config_dict['prediction_kernel_size'], + padding='same', + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + + self._instance_center_regression_conv = tf.keras.layers.Conv2D( + name='instance_center_regression', + filters=2, + kernel_size=self._config_dict['prediction_kernel_size'], + padding='same', + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + + def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], + Union[tf.Tensor, Mapping[str, tf.Tensor]]], + training=None): + """Forward pass of the head.""" + + if training is None: + training = tf.keras.backend.learning_phase() + + x = super(InstanceHead, self).call(inputs, training=training) + instance_center_prediction = self._instance_center_prediction_conv(x) + instance_center_regression = self._instance_center_regression_conv(x) + outputs = { + 'instance_center_prediction': instance_center_prediction, + 'instance_center_regression': instance_center_regression + } + return outputs From abee356d6af6c55b88f883d438cc26ca9377b0fe Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 18:26:52 +0530 Subject: [PATCH 17/26] added tests for `PanopticDeeplabHead` --- .../heads/panoptic_deeplab_heads_test.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py new file mode 100644 index 00000000000..dc38bd2ce83 --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py @@ -0,0 +1,100 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for panoptic_deeplab_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads + + +class PanopticDeeplabHeadsTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (2, (2,), (48,)), + (3, (2,), (48,)), + (2, (2,), (48,)), + (2, (2,), (48,)), + (3, (2,), (48,)), + (3, (2,), (48,)), + (4, (4, 3), (64, 32)), + (4, (3, 2), (64, 32))) + def test_forward(self, level, low_level, low_level_num_filters): + backbone_features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + '5': np.random.rand(2, 32, 32, 16), + } + decoder_features = { + '3': np.random.rand(2, 128, 128, 64), + '4': np.random.rand(2, 64, 64, 64), + '5': np.random.rand(2, 32, 32, 64), + '6': np.random.rand(2, 16, 16, 64), + } + + backbone_features['2'] = np.random.rand(2, 256, 256, 16) + decoder_features['2'] = np.random.rand(2, 256, 256, 64) + + num_classes = 10 + semantic_head = panoptic_deeplab_heads.SemanticHead( + num_classes=num_classes, + level=level, + low_level=low_level, + low_level_num_filters=low_level_num_filters) + + instance_head = panoptic_deeplab_heads.InstanceHead( + level=level, + low_level=low_level, + low_level_num_filters=low_level_num_filters) + + semantic_outputs = semantic_head((backbone_features, decoder_features)) + instance_outputs = instance_head((backbone_features, decoder_features)) + + if str(level) in decoder_features: + h, w = decoder_features[str(low_level[-1])].shape[1:3] + self.assertAllEqual( + semantic_outputs.numpy().shape, + [2, h, w, num_classes]) + self.assertAllEqual( + instance_outputs['instance_center_prediction'].numpy().shape, + [2, h, w, 1]) + self.assertAllEqual( + instance_outputs['instance_center_regression'].numpy().shape, + [2, h, w, 2]) + + + def test_serialize_deserialize(self): + semantic_head = panoptic_deeplab_heads.SemanticHead(num_classes=2, level=3) + instance_head = panoptic_deeplab_heads.InstanceHead(level=3) + + semantic_head_config = semantic_head.get_config() + instance_head_config = instance_head.get_config() + + new_semantic_head = panoptic_deeplab_heads.SemanticHead.from_config( + semantic_head_config) + new_instance_head = panoptic_deeplab_heads.InstanceHead.from_config( + instance_head_config) + + self.assertAllEqual(semantic_head.get_config(), + new_semantic_head.get_config()) + self.assertAllEqual(instance_head.get_config(), + new_instance_head.get_config()) + + +if __name__ == '__main__': + tf.test.main() From 31a8e4664ba8ffca8d14b051f8c3a7ec3b5b91d1 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 18:29:25 +0530 Subject: [PATCH 18/26] use `SemanticHead` and `InstanceHead` from panoptic_deeplab_heads --- .../panoptic_maskrcnn/modeling/factory.py | 29 +++++++++---------- .../modeling/factory_test.py | 19 ++++-------- .../modeling/panoptic_deeplab_model_test.py | 23 ++++++--------- 3 files changed, 28 insertions(+), 43 deletions(-) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py index 87012bcc64d..4c2a30f0678 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py @@ -22,8 +22,8 @@ from official.vision.beta.modeling.heads import segmentation_heads from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg -from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator @@ -85,7 +85,6 @@ def build_panoptic_maskrcnn( num_classes=segmentation_config.num_classes, level=segmentation_head_config.level, num_convs=segmentation_head_config.num_convs, - kernel_size=segmentation_head_config.kernel_size, prediction_kernel_size=segmentation_head_config.prediction_kernel_size, num_filters=segmentation_head_config.num_filters, upsample_factor=segmentation_head_config.upsample_factor, @@ -185,9 +184,9 @@ def build_panoptic_deeplab( l2_regularizer=l2_regularizer) semantic_head_config = model_config.semantic_head - instnace_head_config = model_config.instance_head + instance_head_config = model_config.instance_head - semantic_head = segmentation_heads.SegmentationHead( + semantic_head = panoptic_deeplab_heads.SemanticHead( num_classes=model_config.num_classes, level=semantic_head_config.level, num_convs=semantic_head_config.num_convs, @@ -196,7 +195,6 @@ def build_panoptic_deeplab( num_filters=semantic_head_config.num_filters, use_depthwise_convolution=semantic_head_config.use_depthwise_convolution, upsample_factor=semantic_head_config.upsample_factor, - feature_fusion=semantic_head_config.feature_fusion, low_level=semantic_head_config.low_level, low_level_num_filters=semantic_head_config.low_level_num_filters, activation=norm_activation_config.activation, @@ -205,17 +203,16 @@ def build_panoptic_deeplab( norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) - instance_head = instance_center_head.InstanceCenterHead( - level=instnace_head_config.level, - num_convs=instnace_head_config.num_convs, - kernel_size=instnace_head_config.kernel_size, - prediction_kernel_size=instnace_head_config.prediction_kernel_size, - num_filters=instnace_head_config.num_filters, - use_depthwise_convolution=instnace_head_config.use_depthwise_convolution, - upsample_factor=instnace_head_config.upsample_factor, - feature_fusion=instnace_head_config.feature_fusion, - low_level=instnace_head_config.low_level, - low_level_num_filters=instnace_head_config.low_level_num_filters, + instance_head = panoptic_deeplab_heads.InstanceHead( + level=instance_head_config.level, + num_convs=instance_head_config.num_convs, + kernel_size=instance_head_config.kernel_size, + prediction_kernel_size=instance_head_config.prediction_kernel_size, + num_filters=instance_head_config.num_filters, + use_depthwise_convolution=instance_head_config.use_depthwise_convolution, + upsample_factor=instance_head_config.upsample_factor, + low_level=instance_head_config.low_level, + low_level_num_filters=instance_head_config.low_level_num_filters, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py index ed31fe8487c..e51659797d3 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py @@ -15,7 +15,6 @@ """Tests for factory.py.""" from absl.testing import parameterized -import numpy as np import tensorflow as tf from tensorflow.python.distribute import combinations @@ -74,13 +73,9 @@ class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase): decoder_type=['aspp', 'fpn'], level=[2, 3, 4], low_level=[(4, 3), (3, 2)], - shared_decoder=[True, False], - fusion_type=[ - 'pyramid_fusion', - 'panoptic_fpn_fusion', - 'panoptic_deeplab_fusion'])) + shared_decoder=[True, False])) def test_builder(self, input_size, backbone_type, level, - low_level, decoder_type, shared_decoder, fusion_type): + low_level, decoder_type, shared_decoder): num_classes = 10 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) @@ -90,20 +85,18 @@ def test_builder(self, input_size, backbone_type, level, input_size=input_size, backbone=backbones.Backbone(type=backbone_type), decoder=decoders.Decoder(type=decoder_type), - semantic_head=semantic_segmentation.SegmentationHead( + semantic_head=panoptic_deeplab_cfg.SemanticHead( level=level, num_convs=1, kernel_size=5, prediction_kernel_size=1, - low_level=low_level, - feature_fusion=fusion_type), - instance_head=panoptic_deeplab_cfg.InstanceCenterHead( + low_level=low_level), + instance_head=panoptic_deeplab_cfg.InstanceHead( level=level, num_convs=1, kernel_size=5, prediction_kernel_size=1, - low_level=low_level, - feature_fusion=fusion_type), + low_level=low_level), shared_decoder=shared_decoder) l2_regularizer = tf.keras.regularizers.l2(5e-5) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py index 95bc2e55729..ea16e8dc34e 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py @@ -22,8 +22,7 @@ from official.vision.beta.modeling import backbones from official.vision.beta.modeling.decoders import aspp -from official.vision.beta.modeling.heads import segmentation_heads -from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head +from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase): @@ -52,18 +51,16 @@ def test_panoptic_deeplab_network_creation( instance_decoder = aspp.ASPP( level=level, dilation_rates=[6, 12, 18]) - semantic_head = segmentation_heads.SegmentationHead( + semantic_head = panoptic_deeplab_heads.SemanticHead( num_classes, level=level, low_level=low_level, - low_level_num_filters=[64, 32], - feature_fusion='panoptic_deeplab_fusion') + low_level_num_filters=(64, 32)) - instance_head = instance_center_head.InstanceCenterHead( + instance_head = panoptic_deeplab_heads.InstanceHead( level=level, low_level=low_level, - low_level_num_filters=[64, 32], - feature_fusion='panoptic_deeplab_fusion') + low_level_num_filters=(64, 32)) model = panoptic_deeplab_model.PanopticDeeplabModel( backbone=backbone, @@ -114,18 +111,16 @@ def test_serialize_deserialize(self, level, low_level, shared_decoder): instance_decoder = aspp.ASPP( level=level, dilation_rates=[6, 12, 18]) - semantic_head = segmentation_heads.SegmentationHead( + semantic_head = panoptic_deeplab_heads.SemanticHead( num_classes, level=level, low_level=low_level, - low_level_num_filters=[64, 32], - feature_fusion='panoptic_deeplab_fusion') + low_level_num_filters=(64, 32)) - instance_head = instance_center_head.InstanceCenterHead( + instance_head = panoptic_deeplab_heads.InstanceHead( level=level, low_level=low_level, - low_level_num_filters=[64, 32], - feature_fusion='panoptic_deeplab_fusion') + low_level_num_filters=(64, 32)) model = panoptic_deeplab_model.PanopticDeeplabModel( backbone=backbone, From 2ad1ec15b20a70949e9c65996ec742f5bf6ce1e4 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 18:29:45 +0530 Subject: [PATCH 19/26] added configs for `SemanticHead` and `InstanceHead` --- .../configs/panoptic_deeplab.py | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py index d509ba669a7..06001b940f1 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Panoptic Mask R-CNN configuration definition.""" +"""Panoptic Deeplab configuration definition.""" import dataclasses -from typing import List, Optional, Union +from typing import List, Tuple, Union from official.modeling import hyperparams from official.vision.beta.configs import common from official.vision.beta.configs import backbones from official.vision.beta.configs import decoders -from official.vision.beta.configs import semantic_segmentation - -SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead _COCO_INPUT_PATH_BASE = 'coco/tfrecords' _COCO_TRAIN_EXAMPLES = 118287 @@ -31,17 +28,27 @@ @dataclasses.dataclass -class InstanceCenterHead(semantic_segmentation.SegmentationHead): - """Instance Center head config.""" - # None, deeplabv3plus, panoptic_fpn_fusion, - # panoptic_deeplab_fusion or pyramid_fusion +class PanopticDeeplabHead(hyperparams.Config): + """Panoptic Deeplab head config.""" + level: int = 3 + num_convs: int = 2 + num_filters: int = 256 kernel_size: int = 5 - feature_fusion: Optional[str] = None - low_level: Union[int, List[int]] = dataclasses.field( - default_factory=lambda: [3, 2]) - low_level_num_filters: Union[int, List[int]] = dataclasses.field( - default_factory=lambda: [64, 32]) + use_depthwise_convolution: bool = False + upsample_factor: int = 1 + low_level: Union[List[int], Tuple[int]] = (3, 2) + low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32) + +@dataclasses.dataclass +class SemanticHead(PanopticDeeplabHead): + """Semantic head config.""" + prediction_kernel_size: int = 1 + +@dataclasses.dataclass +class InstanceHead(PanopticDeeplabHead): + """Instance head config.""" + prediction_kernel_size: int = 1 # pytype: disable=wrong-keyword-args @dataclasses.dataclass @@ -55,7 +62,6 @@ class PanopticDeeplab(hyperparams.Config): backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='aspp') - semantic_head: SEGMENTATION_HEAD = SEGMENTATION_HEAD() - instance_head: InstanceCenterHead = InstanceCenterHead( - low_level=[3, 2]) + semantic_head: SemanticHead = SemanticHead() + instance_head: InstanceHead = InstanceHead() shared_decoder: bool = False From df60a195a0913ee126b8e0a6e731d976f33a5046 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Sat, 22 Jan 2022 18:35:40 +0530 Subject: [PATCH 20/26] revert misc changes --- official/vision/beta/modeling/heads/segmentation_heads.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py index e1d182f3850..c6bffac18fa 100644 --- a/official/vision/beta/modeling/heads/segmentation_heads.py +++ b/official/vision/beta/modeling/heads/segmentation_heads.py @@ -390,10 +390,12 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. + The first is backbone endpoints, and the second is decoder endpoints. Returns: segmentation prediction mask: A `tf.Tensor` of the segmentation mask scores predicted from input features. """ + backbone_output = inputs[0] decoder_output = inputs[1] if self._config_dict['feature_fusion'] == 'deeplabv3plus': From e0a91f29eb18d2d36db3ce81a1b9fb67d71fb43a Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 28 Jan 2022 19:30:32 +0530 Subject: [PATCH 21/26] fixed import error --- .../beta/projects/panoptic_maskrcnn/modeling/factory_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py index aa16ae174c5..840407aa654 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py @@ -15,6 +15,7 @@ """Tests for factory.py.""" from absl.testing import parameterized +import numpy as np import tensorflow as tf from tensorflow.python.distribute import combinations From 75f304ddd90e7b6adaecad20ad16817edbd32cc9 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 28 Jan 2022 19:31:24 +0530 Subject: [PATCH 22/26] move `PanopticDeepLabFusion` into project dir --- .../modeling/heads/panoptic_deeplab_heads.py | 4 +- .../modeling/layers/fusion_layers.py | 157 ++++++++++++++++++ 2 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py index 8c8cd9e6647..fac33126c64 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py @@ -18,7 +18,7 @@ import tensorflow as tf from official.modeling import tf_utils -from official.vision.beta.modeling.layers import nn_layers +from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import fusion_layers from official.vision.beta.ops import spatial_transform_ops @@ -118,7 +118,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): 'epsilon': self._config_dict['norm_epsilon'], } - self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion( + self._panoptic_deeplab_fusion = fusion_layers.PanopticDeepLabFusion( level=self._config_dict['level'], low_level=self._config_dict['low_level'], num_projection_filters=self._config_dict['low_level_num_filters'], diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py new file mode 100644 index 00000000000..a534d2a4d7d --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py @@ -0,0 +1,157 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains common building blocks for neural networks.""" +from typing import Any, Callable, Dict, List, Mapping, Optional, Union + +import tensorflow as tf + +from official.modeling import tf_utils + + +# Type annotations. +States = Dict[str, tf.Tensor] +Activation = Union[str, Callable] + + +class PanopticDeepLabFusion(tf.keras.layers.Layer): + """Creates a Panoptic DeepLab feature Fusion layer. + + This implements the feature fusion introduced in the paper: + Cheng et al. Panoptic-DeepLab + (https://arxiv.org/pdf/1911.10194.pdf) + """ + + def __init__( + self, + level: int, + low_level: List[int] = [3, 2], + num_projection_filters: List[int] = [64, 32], + num_output_filters: int = 256, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + interpolation: str = 'bilinear', + **kwargs): + """Initializes panoptic FPN feature fusion layer. + + Args: + level: An `int` level at which the decoder was appled at. + low_level: A list of `int` of minimum level to use in feature fusion. + num_filters: An `int` number of filters in conv2d layers. + activation: A `str` name of the activation function. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + interpolation: A `str` interpolation method for upsampling. Defaults to + `bilinear`. + **kwargs: Additional keyword arguments to be passed. + Returns: + A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width, + feature_channel]. + """ + super(PanopticDeepLabFusion, self).__init__(**kwargs) + + self._config_dict = { + 'level': level, + 'low_level': low_level, + 'num_projection_filters': num_projection_filters, + 'num_output_filters': num_output_filters, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + 'interpolation': interpolation + } + if tf.keras.backend.image_data_format() == 'channels_last': + self._channel_axis = -1 + else: + self._channel_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: List[tf.TensorShape]): + conv_op = tf.keras.layers.Conv2D + conv_kwargs = { + 'padding': 'same', + 'use_bias': False, + 'kernel_initializer': tf.initializers.VarianceScaling(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + } + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._channel_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._projection_convs = [] + self._projection_norms = [] + self._fusion_convs = [] + self._fusion_norms = [] + for i in range(len(self._config_dict['low_level'])): + self._projection_convs.append( + conv_op( + filters=self._config_dict['num_projection_filters'][i], + kernel_size=1, + **conv_kwargs)) + self._fusion_convs.append( + conv_op( + filters=self._config_dict['num_output_filters'], + kernel_size=5, + **conv_kwargs)) + self._projection_norms.append(bn_op(**bn_kwargs)) + self._fusion_norms.append(bn_op(**bn_kwargs)) + + def call(self, inputs, training=None): + if training is None: + training = tf.keras.backend.learning_phase() + + backbone_output = inputs[0] + decoder_output = inputs[1][str(self._config_dict['level'])] + + x = decoder_output + for i in range(len(self._config_dict['low_level'])): + feature = backbone_output[str(self._config_dict['low_level'][i])] + feature = self._projection_convs[i](feature) + feature = self._projection_norms[i](feature, training=training) + feature = self._activation(feature) + + shape = tf.shape(feature) + x = tf.image.resize( + x, size=[shape[1], shape[2]], + method=self._config_dict['interpolation']) + x = tf.concat([x, feature], axis=self._channel_axis) + + x = self._fusion_convs[i](x) + x = self._fusion_norms[i](x, training=training) + x = self._activation(x) + return x + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) From 78657911f3ec138fe4b11042b35af24f56ec0143 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Fri, 28 Jan 2022 19:32:01 +0530 Subject: [PATCH 23/26] Revert "Added `PanopticDeepLabFusion` layer" This reverts commit 54fae0423e396e26fe4e3924e98f6952241bda4a. --- .../vision/beta/modeling/layers/nn_layers.py | 130 ------------------ 1 file changed, 130 deletions(-) diff --git a/official/vision/beta/modeling/layers/nn_layers.py b/official/vision/beta/modeling/layers/nn_layers.py index dd27c2878f4..51f1db6918e 100644 --- a/official/vision/beta/modeling/layers/nn_layers.py +++ b/official/vision/beta/modeling/layers/nn_layers.py @@ -298,136 +298,6 @@ def pyramid_feature_fusion(inputs, target_level): return tf.math.add_n(resampled_feats) -class PanopticDeepLabFusion(tf.keras.layers.Layer): - """Creates a Panoptic DeepLab feature Fusion layer. - - This implements the feature fusion introduced in the paper: - Cheng et al. Panoptic-DeepLab - (https://arxiv.org/pdf/1911.10194.pdf) - """ - def __init__( - self, - level: int, - low_level: List[int] = [3, 2], - num_projection_filters: List[int] = [64, 32], - num_output_filters: int = 256, - activation: str = 'relu', - use_sync_bn: bool = False, - norm_momentum: float = 0.99, - norm_epsilon: float = 0.001, - kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, - interpolation: str = 'bilinear', - **kwargs): - - """Initializes panoptic FPN feature fusion layer. - - Args: - level: An `int` level at which the decoder was appled at. - low_level: A list of `int` of minimum level to use in feature fusion. - num_filters: An `int` number of filters in conv2d layers. - activation: A `str` name of the activation function. - use_sync_bn: A `bool` that indicates whether to use synchronized batch - normalization across different replicas. - norm_momentum: A `float` of normalization momentum for the moving average. - norm_epsilon: A `float` added to variance to avoid dividing by zero. - kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for - Conv2D. Default is None. - bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. - interpolation: A `str` interpolation method for upsampling. Defaults to - `bilinear`. - **kwargs: Additional keyword arguments to be passed. - Returns: - A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width, - feature_channel]. - """ - super(PanopticDeepLabFusion, self).__init__(**kwargs) - - self._config_dict = { - 'level': level, - 'low_level': low_level, - 'num_projection_filters': num_projection_filters, - 'num_output_filters': num_output_filters, - 'activation': activation, - 'use_sync_bn': use_sync_bn, - 'norm_momentum': norm_momentum, - 'norm_epsilon': norm_epsilon, - 'kernel_regularizer': kernel_regularizer, - 'bias_regularizer': bias_regularizer, - 'interpolation': interpolation - } - if tf.keras.backend.image_data_format() == 'channels_last': - self._channel_axis = -1 - else: - self._channel_axis = 1 - self._activation = tf_utils.get_activation(activation) - - def build(self, input_shape: List[tf.TensorShape]): - conv_op = tf.keras.layers.Conv2D - conv_kwargs = { - 'padding': 'same', - 'use_bias': False, - 'kernel_initializer': tf.initializers.VarianceScaling(), - 'kernel_regularizer': self._config_dict['kernel_regularizer'], - } - bn_op = (tf.keras.layers.experimental.SyncBatchNormalization - if self._config_dict['use_sync_bn'] - else tf.keras.layers.BatchNormalization) - bn_kwargs = { - 'axis': self._channel_axis, - 'momentum': self._config_dict['norm_momentum'], - 'epsilon': self._config_dict['norm_epsilon'], - } - - self._projection_convs = [] - self._projection_norms = [] - self._fusion_convs = [] - self._fusion_norms = [] - for i in range(len(self._config_dict['low_level'])): - self._projection_convs.append( - conv_op( - filters=self._config_dict['num_projection_filters'][i], - kernel_size=1, - **conv_kwargs)) - self._fusion_convs.append( - conv_op( - filters=self._config_dict['num_output_filters'], - kernel_size=5, - **conv_kwargs)) - self._projection_norms.append(bn_op(**bn_kwargs)) - self._fusion_norms.append(bn_op(**bn_kwargs)) - - def call(self, inputs, training=None): - if training is None: - training = tf.keras.backend.learning_phase() - - backbone_output = inputs[0] - decoder_output = inputs[1][str(self._config_dict['level'])] - - x = decoder_output - for i in range(len(self._config_dict['low_level'])): - feature = backbone_output[str(self._config_dict['low_level'][i])] - feature = self._projection_convs[i](feature) - feature = self._projection_norms[i](feature, training=training) - feature = self._activation(feature) - - shape = tf.shape(feature) - x = tf.image.resize( - x, size=[shape[1], shape[2]], - method=self._config_dict['interpolation']) - x = tf.concat([x, feature], axis=self._channel_axis) - - x = self._fusion_convs[i](x) - x = self._fusion_norms[i](x, training=training) - x = self._activation(x) - return x - - def get_config(self) -> Mapping[str, Any]: - return self._config_dict - - @classmethod - def from_config(cls, config, custom_objects=None): - return cls(**config) class PanopticFPNFusion(tf.keras.Model): """Creates a Panoptic FPN feature Fusion layer. From 2d739bb8ed89baff88a42bd5e42420b2968fdab7 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Tue, 15 Feb 2022 21:22:45 +0530 Subject: [PATCH 24/26] import code PostProcessor code from deeplab2 --- .../modeling/layers/panoptic_deeplab_merge.py | 468 ++++++++++++++++++ .../layers/panoptic_deeplab_merge_test.py | 142 ++++++ 2 files changed, 610 insertions(+) create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py new file mode 100644 index 00000000000..217353fd88b --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py @@ -0,0 +1,468 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""This file contains functions to post-process Panoptic-DeepLab results + +Note that the postprocessing class and the supporting functions are branched +from https://github.com/google-research/deeplab2/blob/main/model/post_processor/panoptic_deeplab.py +""" + +import functools +from typing import List, Tuple, Dict, Text + +import tensorflow as tf + + +def _add_zero_padding(input_tensor: tf.Tensor, kernel_size: int, + rank: int) -> tf.Tensor: + """Adds zero-padding to the input_tensor.""" + pad_total = kernel_size - 1 + pad_begin = pad_total // 2 + pad_end = pad_total - pad_begin + if rank == 3: + return tf.pad( + input_tensor, + paddings=[[pad_begin, pad_end], [pad_begin, pad_end], [0, 0]]) + else: + return tf.pad( + input_tensor, + paddings=[[0, 0], [pad_begin, pad_end], [pad_begin, pad_end], [0, 0]]) + + +def _get_semantic_predictions(semantic_logits: tf.Tensor) -> tf.Tensor: + """Computes the semantic classes from the predictions. + + Args: + semantic_logits: A tf.tensor of shape [batch, height, width, classes]. + + Returns: + A tf.Tensor containing the semantic class prediction of shape + [batch, height, width]. + """ + return tf.argmax(semantic_logits, axis=-1, output_type=tf.int32) + + +def _get_instance_centers_from_heatmap( + center_heatmap: tf.Tensor, + center_threshold: float, + nms_kernel_size: int, + keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor]: + """Computes a list of instance centers. + + Args: + center_heatmap: A tf.Tensor of shape [height, width, 1]. + center_threshold: A float setting the threshold for the center heatmap. + nms_kernel_size: An integer specifying the nms kernel size. + keep_k_centers: An integer specifying the number of centers to keep (K). + Non-positive values will keep all centers. + + Returns: + A tuple of + - tf.Tensor of shape [N, 2] containing N center coordinates (after + non-maximum suppression) in (y, x) order. + - tf.Tensor of shape [height, width] containing the center heatmap after + non-maximum suppression. + """ + # Threshold center map. + center_heatmap = tf.where( + tf.greater(center_heatmap, center_threshold), center_heatmap, 0.0) + + # Non-maximum suppression. + padded_map = _add_zero_padding(center_heatmap, nms_kernel_size, rank=3) + pooled_center_heatmap = tf.keras.backend.pool2d( + tf.expand_dims(padded_map, 0), + pool_size=(nms_kernel_size, nms_kernel_size), + strides=(1, 1), + padding='valid', + pool_mode='max') + center_heatmap = tf.where( + tf.equal(pooled_center_heatmap, center_heatmap), center_heatmap, 0.0) + center_heatmap = tf.squeeze(center_heatmap, axis=[0, 3]) + + # `centers` is of shape (N, 2) with (y, x) order of the second dimension. + centers = tf.where(tf.greater(center_heatmap, 0.0)) + + if keep_k_centers > 0 and tf.shape(centers)[0] > keep_k_centers: + topk_scores, _ = tf.math.top_k( + tf.reshape(center_heatmap, [-1]), keep_k_centers, sorted=False) + centers = tf.where(tf.greater(center_heatmap, topk_scores[-1])) + + return centers, center_heatmap + + +def _find_closest_center_per_pixel(centers: tf.Tensor, + center_offsets: tf.Tensor) -> tf.Tensor: + """Assigns all pixels to their closest center. + + Args: + centers: A tf.Tensor of shape [N, 2] containing N centers with coordinate + order (y, x). + center_offsets: A tf.Tensor of shape [height, width, 2]. + + Returns: + A tf.Tensor of shape [height, width] containing the index of the closest + center, per pixel. + """ + height = tf.shape(center_offsets)[0] + width = tf.shape(center_offsets)[1] + + x_coord, y_coord = tf.meshgrid(tf.range(width), tf.range(height)) + coord = tf.stack([y_coord, x_coord], axis=-1) + + center_per_pixel = tf.cast(coord, tf.float32) + center_offsets + + # centers: [N, 2] -> [N, 1, 2]. + # center_per_pixel: [H, W, 2] -> [1, H*W, 2]. + centers = tf.cast(tf.expand_dims(centers, 1), tf.float32) + center_per_pixel = tf.reshape(center_per_pixel, [height*width, 2]) + center_per_pixel = tf.expand_dims(center_per_pixel, 0) + + # distances: [N, H*W]. + distances = tf.norm(centers - center_per_pixel, axis=-1) + + return tf.reshape(tf.argmin(distances, axis=0), [height, width]) + + +def _get_instances_from_heatmap_and_offset( + semantic_segmentation: tf.Tensor, center_heatmap: tf.Tensor, + center_offsets: tf.Tensor, center_threshold: float, + thing_class_ids: tf.Tensor, nms_kernel_size: int, + keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + """Computes the instance assignment per pixel. + + Args: + semantic_segmentation: A tf.Tensor containing the semantic labels of shape + [height, width]. + center_heatmap: A tf.Tensor of shape [height, width, 1]. + center_offsets: A tf.Tensor of shape [height, width, 2]. + center_threshold: A float setting the threshold for the center heatmap. + thing_class_ids: A tf.Tensor of shape [N] containing N thing indices. + nms_kernel_size: An integer specifying the nms kernel size. + keep_k_centers: An integer specifying the number of centers to keep. + Negative values will keep all centers. + + Returns: + A tuple of: + - tf.Tensor containing the instance segmentation (filtered with the `thing` + segmentation from the semantic segmentation output) with shape + [height, width]. + - tf.Tensor containing the processed centermap with shape [height, width]. + - tf.Tensor containing instance scores (where higher "score" is a reasonable + signal of a higher confidence detection.) Will be of shape [height, width] + with the score for a pixel being the score of the instance it belongs to. + The scores will be zero for pixels in background/"stuff" regions. + """ + thing_segmentation = tf.zeros_like(semantic_segmentation) + for thing_id in thing_class_ids: + thing_segmentation = tf.where(tf.equal(semantic_segmentation, thing_id), + 1, + thing_segmentation) + + centers, processed_center_heatmap = _get_instance_centers_from_heatmap( + center_heatmap, center_threshold, nms_kernel_size, keep_k_centers) + if tf.shape(centers)[0] == 0: + return (tf.zeros_like(semantic_segmentation), processed_center_heatmap, + tf.zeros_like(processed_center_heatmap)) + + instance_center_index = _find_closest_center_per_pixel( + centers, center_offsets) + # Instance IDs should start with 1. So we use the index into the centers, but + # shifted by 1. + instance_segmentation = tf.cast(instance_center_index, tf.int32) + 1 + + # The value of the heatmap at an instance's center is used as the score + # for that instance. + instance_scores = tf.gather_nd(processed_center_heatmap, centers) + tf.debugging.assert_shapes([ + (centers, ('N', 2)), + (instance_scores, ('N',)), + ]) + # This will map the instance scores back to the image space: where each pixel + # has a value equal to the score of its instance. + flat_center_index = tf.reshape(instance_center_index, [-1]) + instance_score_map = tf.gather(instance_scores, flat_center_index) + instance_score_map = tf.reshape(instance_score_map, + tf.shape(instance_segmentation)) + instance_score_map *= tf.cast(thing_segmentation, tf.float32) + + return (thing_segmentation * instance_segmentation, processed_center_heatmap, + instance_score_map) + + +@tf.function +def _get_panoptic_predictions( + semantic_logits: tf.Tensor, center_heatmap: tf.Tensor, + center_offsets: tf.Tensor, center_threshold: float, + thing_class_ids: tf.Tensor, label_divisor: int, stuff_area_limit: int, + void_label: int, nms_kernel_size: int, keep_k_centers: int +) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: + """Computes the semantic class and instance ID per pixel. + + Args: + semantic_logits: A tf.Tensor of shape [batch, height, width, classes]. + center_heatmap: A tf.Tensor of shape [batch, height, width, 1]. + center_offsets: A tf.Tensor of shape [batch, height, width, 2]. + center_threshold: A float setting the threshold for the center heatmap. + thing_class_ids: A tf.Tensor of shape [N] containing N thing indices. + label_divisor: An integer specifying the label divisor of the dataset. + stuff_area_limit: An integer specifying the number of pixels that stuff + regions need to have at least. The stuff region will be included in the + panoptic prediction, only if its area is larger than the limit; otherwise, + it will be re-assigned as void_label. + void_label: An integer specifying the void label. + nms_kernel_size: An integer specifying the nms kernel size. + keep_k_centers: An integer specifying the number of centers to keep. + Negative values will keep all centers. + + Returns: + A tuple of: + - the panoptic prediction as tf.Tensor with shape [batch, height, width]. + - the semantic prediction as tf.Tensor with shape [batch, height, width]. + - the instance prediction as tf.Tensor with shape [batch, height, width]. + - the centermap prediction as tf.Tensor with shape [batch, height, width]. + - the instance score maps as tf.Tensor with shape [batch, height, width]. + """ + semantic_prediction = _get_semantic_predictions(semantic_logits) + batch_size = tf.shape(semantic_logits)[0] + + instance_map_lists = tf.TensorArray( + tf.int32, size=batch_size, dynamic_size=False) + center_map_lists = tf.TensorArray( + tf.float32, size=batch_size, dynamic_size=False) + instance_score_map_lists = tf.TensorArray( + tf.float32, size=batch_size, dynamic_size=False) + + for i in tf.range(batch_size): + (instance_map, center_map, + instance_score_map) = _get_instances_from_heatmap_and_offset( + semantic_prediction[i, ...], center_heatmap[i, ...], + center_offsets[i, ...], center_threshold, thing_class_ids, + nms_kernel_size, keep_k_centers) + instance_map_lists = instance_map_lists.write(i, instance_map) + center_map_lists = center_map_lists.write(i, center_map) + instance_score_map_lists = instance_score_map_lists.write( + i, instance_score_map) + + # This does not work with unknown shapes. + instance_maps = instance_map_lists.stack() + center_maps = center_map_lists.stack() + instance_score_maps = instance_score_map_lists.stack() + + panoptic_prediction = _merge_semantic_and_instance_maps( + semantic_prediction, instance_maps, thing_class_ids, label_divisor, + stuff_area_limit, void_label) + return (panoptic_prediction, semantic_prediction, instance_maps, center_maps, + instance_score_maps) + + +@tf.function +def _merge_semantic_and_instance_maps( + semantic_prediction: tf.Tensor, + instance_maps: tf.Tensor, + thing_class_ids: tf.Tensor, + label_divisor: int, + stuff_area_limit: int, + void_label: int) -> tf.Tensor: + """Merges semantic and instance maps to obtain panoptic segmentation. + + This function merges the semantic segmentation and class-agnostic + instance segmentation to form the panoptic segmentation. In particular, + the class label of each instance mask is inferred from the majority + votes from the corresponding pixels in the semantic segmentation. This + operation is first poposed in the DeeperLab paper and adopted by the + Panoptic-DeepLab. + + - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093. + - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020. + + Note that this function only supports batch = 1 for simplicity. Additionally, + this function has a slightly different implementation from the provided + TensorFlow implementation `merge_ops` but with a similar performance. This + function is mainly used as a backup solution when you could not successfully + compile the provided TensorFlow implementation. To reproduce our results, + please use the provided TensorFlow implementation (i.e., not use this + function, but the `merge_ops.merge_semantic_and_instance_maps`). + + Args: + semantic_prediction: A tf.Tensor of shape [batch, height, width]. + instance_maps: A tf.Tensor of shape [batch, height, width]. + thing_class_ids: A tf.Tensor of shape [N] containing N thing indices. + label_divisor: An integer specifying the label divisor of the dataset. + stuff_area_limit: An integer specifying the number of pixels that stuff + regions need to have at least. The stuff region will be included in the + panoptic prediction, only if its area is larger than the limit; otherwise, + it will be re-assigned as void_label. + void_label: An integer specifying the void label. + + Returns: + panoptic_prediction: A tf.Tensor with shape [batch, height, width]. + """ + prediction_shape = semantic_prediction.get_shape().as_list() + # This implementation only supports batch size of 1. Since model construction + # might lose batch size information (and leave it to None), override it here. + prediction_shape[0] = 1 + semantic_prediction = tf.ensure_shape(semantic_prediction, prediction_shape) + instance_maps = tf.ensure_shape(instance_maps, prediction_shape) + + # Default panoptic_prediction to have semantic label = void_label. + panoptic_prediction = tf.ones_like( + semantic_prediction) * void_label * label_divisor + + # Start to paste predicted `thing` regions to panoptic_prediction. + # Infer `thing` segmentation regions from semantic prediction. + semantic_thing_segmentation = tf.zeros_like(semantic_prediction, + dtype=tf.bool) + for thing_class in thing_class_ids: + semantic_thing_segmentation = tf.math.logical_or( + semantic_thing_segmentation, + semantic_prediction == thing_class) + # Keep track of how many instances for each semantic label. + num_instance_per_semantic_label = tf.TensorArray( + tf.int32, size=0, dynamic_size=True, clear_after_read=False) + instance_ids, _ = tf.unique(tf.reshape(instance_maps, [-1])) + for instance_id in instance_ids: + # Instance ID 0 is reserved for crowd region. + if instance_id == 0: + continue + thing_mask = tf.math.logical_and(instance_maps == instance_id, + semantic_thing_segmentation) + if tf.reduce_sum(tf.cast(thing_mask, tf.int32)) == 0: + continue + semantic_bin_counts = tf.math.bincount( + tf.boolean_mask(semantic_prediction, thing_mask)) + semantic_majority = tf.cast( + tf.math.argmax(semantic_bin_counts), tf.int32) + + while num_instance_per_semantic_label.size() <= semantic_majority: + num_instance_per_semantic_label = num_instance_per_semantic_label.write( + num_instance_per_semantic_label.size(), 0) + + new_instance_id = ( + num_instance_per_semantic_label.read(semantic_majority) + 1) + num_instance_per_semantic_label = num_instance_per_semantic_label.write( + semantic_majority, new_instance_id) + panoptic_prediction = tf.where( + thing_mask, + tf.ones_like(panoptic_prediction) * semantic_majority * label_divisor + + new_instance_id, + panoptic_prediction) + + # Done with `num_instance_per_semantic_label` tensor array. + num_instance_per_semantic_label.close() + + # Start to paste predicted `stuff` regions to panoptic prediction. + instance_stuff_regions = instance_maps == 0 + semantic_ids, _ = tf.unique(tf.reshape(semantic_prediction, [-1])) + for semantic_id in semantic_ids: + if tf.reduce_sum(tf.cast(thing_class_ids == semantic_id, tf.int32)) > 0: + continue + # Check stuff area. + stuff_mask = tf.math.logical_and(semantic_prediction == semantic_id, + instance_stuff_regions) + stuff_area = tf.reduce_sum(tf.cast(stuff_mask, tf.int32)) + if stuff_area >= stuff_area_limit: + panoptic_prediction = tf.where( + stuff_mask, + tf.ones_like(panoptic_prediction) * semantic_id * label_divisor, + panoptic_prediction) + + return panoptic_prediction + + +class PostProcessor(tf.keras.layers.Layer): + """This class contains code of a Panoptic-Deeplab post-processor.""" + + def __init__( + self, + center_score_threshold: float, + thing_class_ids: List[int], + label_divisor: int, + stuff_area_limit: int, + ignore_label: int, + nms_kernel: int, + keep_k_centers: int, + **kwargs): + """Initializes a Panoptic-Deeplab post-processor. + + Args: + center_threshold: A float setting the threshold for the center heatmap. + thing_class_ids: An integer list shape [N] containing N thing indices. + label_divisor: An integer specifying the label divisor of the dataset. + stuff_area_limit: An integer specifying the number of pixels that stuff + regions need to have at least. The stuff region will be included in the + panoptic prediction, only if its area is larger than the limit; + otherwise, it will be re-assigned as void_label. + void_label: An integer specifying the void label. + nms_kernel_size: An integer specifying the nms kernel size. + keep_k_centers: An integer specifying the number of centers to keep. + Negative values will keep all centers. + """ + super(PostProcessor, self).__init__(**kwargs) + + self._config_dict = { + 'center_score_threshold': center_score_threshold, + 'thing_class_ids': thing_class_ids, + 'label_divisor': label_divisor, + 'stuff_area_limit': stuff_area_limit, + 'ignore_label': ignore_label, + 'nms_kernel': nms_kernel, + 'keep_k_centers': keep_k_centers + } + self._post_processor = functools.partial( + _get_panoptic_predictions, + center_threshold=center_score_threshold, + thing_class_ids=tf.convert_to_tensor(thing_class_ids), + label_divisor=label_divisor, + stuff_area_limit=stuff_area_limit, + void_label=ignore_label, + nms_kernel_size=nms_kernel, + keep_k_centers=keep_k_centers) + + def call(self, result_dict: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]: + """Performs the post-processing given model predicted results. + + Args: + result_dict: A dictionary of tf.Tensor containing model results. The dict + has to contain + - segmentation_outputs + - instance_center_prediction + - instance_center_regression + + Returns: + The post-processed dict of tf.Tensor, containing the following keys: + - panoptic_outputs + - category_mask + - instance_mask + - instance_centers + - instance_score + """ + processed_dict = {} + (processed_dict['panoptic_outputs'], + processed_dict['category_mask'], + processed_dict['instance_mask'], + processed_dict['instance_centers'], + processed_dict['instance_scores'] + ) = self._post_processor( + tf.nn.softmax(result_dict['segmentation_outputs'], axis=-1), + result_dict['instance_center_prediction'], + result_dict['instance_center_regression']) + return processed_dict + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py new file mode 100644 index 00000000000..699155f044a --- /dev/null +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py @@ -0,0 +1,142 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test for panoptic_deeplab.py. + +Note that the tests are branched from +https://raw.githubusercontent.com/google-research/deeplab2/main/model/post_processor/panoptic_deeplab_test.py +""" +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge + + +class PostProcessingTest(tf.test.TestCase): + + def test_py_func_merge_semantic_and_instance_maps_can_run(self): + batch = 1 + height = 5 + width = 5 + semantic_prediction = tf.random.uniform((batch, height, width), + minval=0, + maxval=20, + dtype=tf.int32) + instance_maps = tf.random.uniform((batch, height, width), + minval=0, + maxval=3, + dtype=tf.int32) + thing_class_ids = tf.convert_to_tensor([1, 2, 3]) + label_divisor = 256 + stuff_area_limit = 3 + void_label = 255 + panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps( + semantic_prediction, instance_maps, thing_class_ids, label_divisor, + stuff_area_limit, void_label) + self.assertListEqual(semantic_prediction.get_shape().as_list(), + panoptic_prediction.get_shape().as_list()) + + def test_merge_semantic_and_instance_maps_with_a_simple_example(self): + semantic_prediction = tf.convert_to_tensor( + [[[0, 0, 0, 0], + [0, 1, 1, 0], + [0, 2, 2, 0], + [2, 2, 3, 3]]], dtype=tf.int32) + instance_maps = tf.convert_to_tensor( + [[[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 1, 1, 0], + [2, 2, 3, 3]]], dtype=tf.int32) + thing_class_ids = tf.convert_to_tensor([2, 3]) + label_divisor = 256 + stuff_area_limit = 3 + void_label = 255 + # The expected_panoptic_prediction is computed as follows. + # For `thing` segmentation, instance 1, 2, and 3 are kept, but instance 3 + # will have a new instance ID 1, since it is the first instance in its + # own semantic label. + # For `stuff` segmentation, class-0 region is kept, while class-1 region + # is re-labeled as `void_label * label_divisor` since its area is smaller + # than stuff_area_limit. + expected_panoptic_prediction = tf.convert_to_tensor( + [[[0, 0, 0, 0], + [0, void_label * label_divisor, void_label * label_divisor, 0], + [0, 2 * label_divisor + 1, 2 * label_divisor + 1, 0], + [2 * label_divisor + 2, 2 * label_divisor + 2, 3 * label_divisor + 1, + 3 * label_divisor + 1]]], dtype=tf.int32) + panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps( + semantic_prediction, instance_maps, thing_class_ids, label_divisor, + stuff_area_limit, void_label) + np.testing.assert_equal(expected_panoptic_prediction.numpy(), + panoptic_prediction.numpy()) + + def test_gets_panoptic_predictions_with_score(self): + batch = 1 + height = 5 + width = 5 + classes = 3 + + semantic_logits = tf.random.uniform((batch, 1, 1, classes)) + semantic_logits = tf.tile(semantic_logits, (1, height, width, 1)) + + center_heatmap = tf.convert_to_tensor([ + [1.0, 0.0, 0.0, 0.0, 0.0], + [0.8, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.1, 0.7], + [0.0, 0.0, 0.0, 0.0, 0.2], + ], dtype=tf.float32) + center_heatmap = tf.expand_dims(center_heatmap, 0) + center_heatmap = tf.expand_dims(center_heatmap, 3) + + center_offsets = tf.zeros((batch, height, width, 2)) + center_threshold = 0.0 + thing_class_ids = tf.range(classes) # No "stuff" classes. + label_divisor = 256 + stuff_area_limit = 16 + void_label = classes + nms_kernel_size = 3 + keep_k_centers = 2 + + result = panoptic_deeplab_merge._get_panoptic_predictions( + semantic_logits, center_heatmap, center_offsets, center_threshold, + thing_class_ids, label_divisor, stuff_area_limit, void_label, + nms_kernel_size, keep_k_centers) + instance_maps = result[2].numpy() + instance_scores = result[4].numpy() + + self.assertSequenceEqual(instance_maps.shape, (batch, height, width)) + expected_instances = [[ + [1, 1, 1, 1, 2], + [1, 1, 1, 2, 2], + [1, 1, 2, 2, 2], + [1, 2, 2, 2, 2], + [1, 2, 2, 2, 2], + ]] + np.testing.assert_array_equal(instance_maps, expected_instances) + + self.assertSequenceEqual(instance_scores.shape, (batch, height, width)) + expected_instance_scores = [[ + [1.0, 1.0, 1.0, 1.0, 0.7], + [1.0, 1.0, 1.0, 0.7, 0.7], + [1.0, 1.0, 0.7, 0.7, 0.7], + [1.0, 0.7, 0.7, 0.7, 0.7], + [1.0, 0.7, 0.7, 0.7, 0.7], + ]] + np.testing.assert_array_almost_equal(instance_scores, + expected_instance_scores) + + +if __name__ == '__main__': + tf.test.main() From 4ace44be9beb9783fb8d1c716b8d8c13a3a9fd16 Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Tue, 15 Feb 2022 21:23:57 +0530 Subject: [PATCH 25/26] added post processing layer --- .../panoptic_maskrcnn/modeling/factory.py | 14 +++- .../modeling/panoptic_deeplab_model.py | 11 ++- .../modeling/panoptic_deeplab_model_test.py | 75 +++++++++++++------ 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py index 64e1760e2f5..7ac1c9d5b0a 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py @@ -26,6 +26,7 @@ from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator +from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge def build_panoptic_maskrcnn( @@ -220,11 +221,22 @@ def build_panoptic_deeplab( norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) + post_processing_config = model_config.post_processor + post_processor = panoptic_deeplab_merge.PostProcessor( + center_score_threshold=post_processing_config.center_score_threshold, + thing_class_ids=post_processing_config.thing_class_ids, + label_divisor=post_processing_config.label_divisor, + stuff_area_limit=post_processing_config.stuff_area_limit, + ignore_label=post_processing_config.ignore_label, + nms_kernel=post_processing_config.nms_kernel, + keep_k_centers=post_processing_config.keep_k_centers) + model = panoptic_deeplab_model.PanopticDeeplabModel( backbone=backbone, semantic_decoder=semantic_decoder, instance_decoder=instance_decoder, semantic_head=semantic_head, - instance_head=instance_head) + instance_head=instance_head, + post_processor=post_processor) return model diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py index 2e963e909c9..d7f3dae752d 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py @@ -16,7 +16,7 @@ from typing import Any, Mapping, Optional, Union import tensorflow as tf - +from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge @tf.keras.utils.register_keras_serializable(package='Vision') class PanopticDeeplabModel(tf.keras.Model): @@ -29,6 +29,7 @@ def __init__( semantic_head: tf.keras.layers.Layer, instance_head: tf.keras.layers.Layer, instance_decoder: Optional[tf.keras.Model] = None, + post_processor: Optional[panoptic_deeplab_merge.PostProcessor] = None, **kwargs): """ Args: @@ -46,13 +47,15 @@ def __init__( 'semantic_decoder': semantic_decoder, 'instance_decoder': instance_decoder, 'semantic_head': semantic_head, - 'instance_head': instance_head + 'instance_head': instance_head, + 'post_processor': post_processor } self.backbone = backbone self.semantic_decoder = semantic_decoder self.instance_decoder = instance_decoder self.semantic_head = semantic_head self.instance_head = instance_head + self.post_processor = post_processor def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor: if training is None: @@ -83,6 +86,10 @@ def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor: 'instance_center_regression': instance_outputs['instance_center_regression'], } + if training: + return outputs + + outputs = self.post_processor(outputs) return outputs @property diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py index ea16e8dc34e..ce4544a7c8b 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py @@ -24,6 +24,7 @@ from official.vision.beta.modeling.decoders import aspp from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model +from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase): @@ -37,8 +38,9 @@ class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase): def test_panoptic_deeplab_network_creation( self, input_size, level, low_level, shared_decoder, training): """Test for creation of a panoptic deep lab network.""" + batch_size = 2 if training else 1 num_classes = 10 - inputs = np.random.rand(2, input_size, input_size, 3) + inputs = np.random.rand(batch_size, input_size, input_size, 3) tf.keras.backend.set_image_data_format('channels_last') backbone = backbones.ResNet(model_id=50) @@ -62,35 +64,52 @@ def test_panoptic_deeplab_network_creation( low_level=low_level, low_level_num_filters=(64, 32)) + post_processor = panoptic_deeplab_merge.PostProcessor( + center_score_threshold=0.1, + thing_class_ids=[1, 2, 3, 4], + label_divisor=[256], + stuff_area_limit=4096, + ignore_label=0, + nms_kernel=41, + keep_k_centers=41) + model = panoptic_deeplab_model.PanopticDeeplabModel( backbone=backbone, semantic_decoder=semantic_decoder, instance_decoder=instance_decoder, semantic_head=semantic_head, - instance_head=instance_head) + instance_head=instance_head, + post_processor=post_processor) outputs = model(inputs, training=training) - - self.assertIn('segmentation_outputs', outputs) - self.assertIn('instance_center_prediction', outputs) - self.assertIn('instance_center_regression', outputs) - - self.assertAllEqual( - [2, input_size // (2**low_level[-1]), - input_size //(2**low_level[-1]), - num_classes], - outputs['segmentation_outputs'].numpy().shape) - self.assertAllEqual( - [2, input_size // (2**low_level[-1]), - input_size // (2**low_level[-1]), - 1], - outputs['instance_center_prediction'].numpy().shape) - self.assertAllEqual( - [2, input_size // (2**low_level[-1]), - input_size // (2**low_level[-1]), - 2], - outputs['instance_center_regression'].numpy().shape) + if training: + self.assertIn('segmentation_outputs', outputs) + self.assertIn('instance_center_prediction', outputs) + self.assertIn('instance_center_regression', outputs) + + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size //(2**low_level[-1]), + num_classes], + outputs['segmentation_outputs'].numpy().shape) + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size // (2**low_level[-1]), + 1], + outputs['instance_center_prediction'].numpy().shape) + self.assertAllEqual( + [2, input_size // (2**low_level[-1]), + input_size // (2**low_level[-1]), + 2], + outputs['instance_center_regression'].numpy().shape) + + else: + self.assertIn('panoptic_outputs', outputs) + self.assertIn('category_mask', outputs) + self.assertIn('instance_mask', outputs) + self.assertIn('instance_centers', outputs) + self.assertIn('instance_scores', outputs) @combinations.generate( combinations.combine( @@ -122,12 +141,22 @@ def test_serialize_deserialize(self, level, low_level, shared_decoder): low_level=low_level, low_level_num_filters=(64, 32)) + post_processor = panoptic_deeplab_merge.PostProcessor( + center_score_threshold=0.1, + thing_class_ids=[1, 2, 3, 4], + label_divisor=[256], + stuff_area_limit=4096, + ignore_label=0, + nms_kernel=41, + keep_k_centers=41) + model = panoptic_deeplab_model.PanopticDeeplabModel( backbone=backbone, semantic_decoder=semantic_decoder, instance_decoder=instance_decoder, semantic_head=semantic_head, - instance_head=instance_head) + instance_head=instance_head, + post_processor=post_processor) config = model.get_config() new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config) From 8b60a5a8c7d4fd0e56d8a1098d9432f49ce0caad Mon Sep 17 00:00:00 2001 From: srihari-humbarwadi Date: Tue, 15 Feb 2022 21:24:18 +0530 Subject: [PATCH 26/26] added config for post processing layer --- .../panoptic_maskrcnn/configs/panoptic_deeplab.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py index 06001b940f1..11d5290494f 100644 --- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py +++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py @@ -50,10 +50,20 @@ class InstanceHead(PanopticDeeplabHead): """Instance head config.""" prediction_kernel_size: int = 1 -# pytype: disable=wrong-keyword-args +@dataclasses.dataclass +class PanopticDeeplabPostProcessor(hyperparams.Config): + """Panoptic Deeplab PostProcessing config.""" + center_score_threshold: float = 0.1 + thing_class_ids: List[int] = dataclasses.field(default_factory=list) + label_divisor: int = 256 * 256 * 256 + stuff_area_limit: int = 4096 + ignore_label: int = 0 + nms_kernel: int = 41 + keep_k_centers: int = 400 + @dataclasses.dataclass class PanopticDeeplab(hyperparams.Config): - """Panoptic Mask R-CNN model config.""" + """Panoptic Deeplab model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 @@ -65,3 +75,4 @@ class PanopticDeeplab(hyperparams.Config): semantic_head: SemanticHead = SemanticHead() instance_head: InstanceHead = InstanceHead() shared_decoder: bool = False + post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor()