From 54fae0423e396e26fe4e3924e98f6952241bda4a Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:24:18 +0530
Subject: [PATCH 01/26] Added `PanopticDeepLabFusion` layer

---
 .../vision/beta/modeling/layers/nn_layers.py  | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/official/vision/beta/modeling/layers/nn_layers.py b/official/vision/beta/modeling/layers/nn_layers.py
index ed1893c268c..a690235503e 100644
--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -320,6 +320,136 @@ def pyramid_feature_fusion(inputs, target_level):
 
   return tf.math.add_n(resampled_feats)
 
+class PanopticDeepLabFusion(tf.keras.layers.Layer):
+  """Creates a Panoptic DeepLab feature Fusion layer.
+
+  This implements the feature fusion introduced in the paper:
+  Cheng et al. Panoptic-DeepLab
+  (https://arxiv.org/pdf/1911.10194.pdf)
+  """
+  def __init__(
+      self,
+      level: int,
+      low_level: List[int] = [3, 2],
+      num_projection_filters: List[int] = [64, 32],
+      num_output_filters: int = 256,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      **kwargs):
+
+    """Initializes panoptic FPN feature fusion layer.
+
+    Args:
+      level: An `int` level at which the decoder was appled at.
+      low_level: A list of `int` of minimum level to use in feature fusion.
+      num_filters: An `int` number of filters in conv2d layers.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.      
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      interpolation: A `str` interpolation method for upsampling. Defaults to
+        `bilinear`.      
+      **kwargs: Additional keyword arguments to be passed.
+    Returns:
+      A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
+        feature_channel].
+    """
+    super(PanopticDeepLabFusion, self).__init__(**kwargs)
+
+    self._config_dict = {
+        'level': level,
+        'low_level': low_level,
+        'num_projection_filters': num_projection_filters,
+        'num_output_filters': num_output_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+        'interpolation': interpolation
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._channel_axis = -1
+    else:
+      self._channel_axis = 1
+    self._activation = tf_utils.get_activation(activation)
+
+  def build(self, input_shape: List[tf.TensorShape]):
+    conv_op = tf.keras.layers.Conv2D
+    conv_kwargs = {
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': tf.initializers.VarianceScaling(),
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
+             if self._config_dict['use_sync_bn']
+             else tf.keras.layers.BatchNormalization)
+    bn_kwargs = {
+        'axis': self._channel_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+
+    self._projection_convs = []
+    self._projection_norms = []
+    self._fusion_convs = []
+    self._fusion_norms = []
+    for i in range(len(self._config_dict['low_level'])):
+      self._projection_convs.append(
+          conv_op(
+              filters=self._config_dict['num_projection_filters'][i],
+              kernel_size=1,
+              **conv_kwargs))
+      self._fusion_convs.append(
+          conv_op(
+              filters=self._config_dict['num_output_filters'],
+              kernel_size=5,
+              **conv_kwargs))
+      self._projection_norms.append(bn_op(**bn_kwargs))
+      self._fusion_norms.append(bn_op(**bn_kwargs))
+
+  def call(self, inputs, training=None):
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+
+    backbone_output = inputs[0]
+    decoder_output = inputs[1][str(self._config_dict['level'])]
+
+    x = decoder_output
+    for i in range(len(self._config_dict['low_level'])):
+      feature = backbone_output[str(self._config_dict['low_level'][i])]
+      feature = self._projection_convs[i](feature)
+      feature = self._projection_norms[i](feature, training=training)
+      feature = self._activation(feature)
+
+      shape = tf.shape(feature)
+      x = tf.image.resize(
+          x, size=[shape[1], shape[2]],
+          method=self._config_dict['interpolation'])
+      x = tf.concat([x, feature], axis=self._channel_axis)
+
+      x = self._fusion_convs[i](x)
+      x = self._fusion_norms[i](x, training=training)
+      x = self._activation(x)
+    return x
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
 
 class PanopticFPNFusion(tf.keras.Model):
   """Creates a Panoptic FPN feature Fusion layer.

From 78949f92e6529d27a665193c0cc152ccfe0df163 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:28:35 +0530
Subject: [PATCH 02/26] added new feature_fusion: panoptic_deeplab_fusion

---
 .../beta/modeling/heads/segmentation_heads.py | 74 ++++++++++++-------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py
index b87e98cdd44..ecd798271a3 100644
--- a/official/vision/beta/modeling/heads/segmentation_heads.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads.py
@@ -32,13 +32,14 @@ def __init__(
       num_convs: int = 2,
       num_filters: int = 256,
       use_depthwise_convolution: bool = False,
+      kernel_size: int = 3,
       prediction_kernel_size: int = 1,
       upsample_factor: int = 1,
       feature_fusion: Optional[str] = None,
       decoder_min_level: Optional[int] = None,
       decoder_max_level: Optional[int] = None,
-      low_level: int = 2,
-      low_level_num_filters: int = 48,
+      low_level: Union[int, List[int]] = 2,
+      low_level_num_filters: Union[int, List[int]] = 48,
       num_decoder_filters: int = 256,
       activation: str = 'relu',
       use_sync_bn: bool = False,
@@ -59,6 +60,8 @@ def __init__(
         Default is 256.
       use_depthwise_convolution: A bool to specify if use depthwise separable
         convolutions.
+      kernel_size:  An `int` number to specify the kernel size of the
+      stacked convolutions before the last prediction layer.
       prediction_kernel_size: An `int` number to specify the kernel size of the
       prediction layer.
       upsample_factor: An `int` number to specify the upsampling factor to
@@ -100,6 +103,7 @@ def __init__(
         'num_convs': num_convs,
         'num_filters': num_filters,
         'use_depthwise_convolution': use_depthwise_convolution,
+        'kernel_size': kernel_size,
         'prediction_kernel_size': prediction_kernel_size,
         'upsample_factor': upsample_factor,
         'feature_fusion': feature_fusion,
@@ -123,11 +127,12 @@ def __init__(
 
   def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
     """Creates the variables of the segmentation head."""
+    kernel_size = self._config_dict['kernel_size']
     use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
     random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
     conv_op = tf.keras.layers.Conv2D
     conv_kwargs = {
-        'kernel_size': 3 if not use_depthwise_convolution else 1,
+        'kernel_size': kernel_size if not use_depthwise_convolution else 1,
         'padding': 'same',
         'use_bias': False,
         'kernel_initializer': random_initializer,
@@ -167,6 +172,19 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
           kernel_regularizer=self._config_dict['kernel_regularizer'],
           bias_regularizer=self._config_dict['bias_regularizer'])
 
+    if self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion':
+      self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion(
+          level=self._config_dict['level'],
+          low_level=self._config_dict['low_level'],
+          num_projection_filters=self._config_dict['low_level_num_filters'],
+          num_output_filters=self._config_dict['num_filters'],
+          activation=self._config_dict['activation'],
+          use_sync_bn=self._config_dict['use_sync_bn'],
+          norm_momentum=self._config_dict['norm_momentum'],
+          norm_epsilon=self._config_dict['norm_epsilon'],
+          kernel_regularizer=self._config_dict['kernel_regularizer'],
+          bias_regularizer=self._config_dict['bias_regularizer'])
+
     # Segmentation head layers.
     self._convs = []
     self._norms = []
@@ -192,7 +210,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
       norm_name = 'segmentation_head_norm_{}'.format(i)
       self._norms.append(bn_op(name=norm_name, **bn_kwargs))
 
-    self._classifier = conv_op(
+    self._prediction_conv = conv_op(
         name='segmentation_output',
         filters=self._config_dict['num_classes'],
         kernel_size=self._config_dict['prediction_kernel_size'],
@@ -204,26 +222,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
 
     super(SegmentationHead, self).build(input_shape)
 
-  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
-                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
-    """Forward pass of the segmentation head.
-
-    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
-    backbone endpoints, and the second is decoder endpoints. When inputs are
-    tensors, they are from a single level of feature maps. When inputs are
-    dictionaries, they contain multiple levels of feature maps, where the key
-    is the index of feature map.
-
-    Args:
-      inputs: A tuple of 2 feature map tensors of shape
-        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor` of the feature map tensors, whose shape is
-            [batch, height_l, width_l, channels].
-    Returns:
-      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
-        scores predicted from input features.
-    """
+  def _fuse_features(self, inputs):
     backbone_output = inputs[0]
     decoder_output = inputs[1]
     if self._config_dict['feature_fusion'] == 'deeplabv3plus':
@@ -246,9 +245,34 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
                                            self._config_dict['level'])
     elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
       x = self._panoptic_fpn_fusion(decoder_output)
+    elif self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion':
+      x = self._panoptic_deeplab_fusion(inputs)
     else:
       x = decoder_output[str(self._config_dict['level'])] if isinstance(
           decoder_output, dict) else decoder_output
+    return x
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
+    """Forward pass of the segmentation head.
+
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+            [batch, height_l, width_l, channels].
+    Returns:
+      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
+        scores predicted from input features.
+    """
+    x = self._fuse_features(inputs)
 
     for conv, norm in zip(self._convs, self._norms):
       x = conv(x)
@@ -258,7 +282,7 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
       x = spatial_transform_ops.nearest_upsampling(
           x, scale=self._config_dict['upsample_factor'])
 
-    return self._classifier(x)
+    return self._prediction_conv(x)
 
   def get_config(self):
     return self._config_dict

From c8e0233b00ced4798e9707714efc82f9b4dc4623 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:28:56 +0530
Subject: [PATCH 03/26] added tests for panoptic_deeplab_fusion

---
 .../modeling/heads/segmentation_heads_test.py | 41 +++++++++++--------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/official/vision/beta/modeling/heads/segmentation_heads_test.py b/official/vision/beta/modeling/heads/segmentation_heads_test.py
index 7d620252189..df98790ad23 100644
--- a/official/vision/beta/modeling/heads/segmentation_heads_test.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py
@@ -26,14 +26,17 @@
 class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
 
   @parameterized.parameters(
-      (2, 'pyramid_fusion', None, None),
-      (3, 'pyramid_fusion', None, None),
-      (2, 'panoptic_fpn_fusion', 2, 5),
-      (2, 'panoptic_fpn_fusion', 2, 6),
-      (3, 'panoptic_fpn_fusion', 3, 5),
-      (3, 'panoptic_fpn_fusion', 3, 6))
+      (2, 'pyramid_fusion', None, None, 2, 48),
+      (3, 'pyramid_fusion', None, None, 2, 48),
+      (2, 'panoptic_fpn_fusion', 2, 5, 2, 48),
+      (2, 'panoptic_fpn_fusion', 2, 6, 2, 48),
+      (3, 'panoptic_fpn_fusion', 3, 5, 2, 48),
+      (3, 'panoptic_fpn_fusion', 3, 6, 2, 48),
+      (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)),
+      (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32)))
   def test_forward(self, level, feature_fusion,
-                   decoder_min_level, decoder_max_level):
+                   decoder_min_level, decoder_max_level,
+                   low_level, low_level_num_filters):
     backbone_features = {
         '3': np.random.rand(2, 128, 128, 16),
         '4': np.random.rand(2, 64, 64, 16),
@@ -45,14 +48,16 @@ def test_forward(self, level, feature_fusion,
         '5': np.random.rand(2, 32, 32, 64),
         '6': np.random.rand(2, 16, 16, 64),
     }
-
-    if feature_fusion == 'panoptic_fpn_fusion':
+    num_classes = 10
+    if 'panoptic' in feature_fusion:
       backbone_features['2'] = np.random.rand(2, 256, 256, 16)
       decoder_features['2'] = np.random.rand(2, 256, 256, 64)
 
     head = segmentation_heads.SegmentationHead(
-        num_classes=10,
+        num_classes=num_classes,
         level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
         feature_fusion=feature_fusion,
         decoder_min_level=decoder_min_level,
         decoder_max_level=decoder_max_level,
@@ -60,14 +65,18 @@ def test_forward(self, level, feature_fusion,
 
     logits = head((backbone_features, decoder_features))
 
-    if level in decoder_features:
-      self.assertAllEqual(logits.numpy().shape, [
-          2, decoder_features[str(level)].shape[1],
-          decoder_features[str(level)].shape[2], 10
-      ])
+    if str(level) in decoder_features:
+      if feature_fusion == 'panoptic_deeplab_fusion':
+        h, w = decoder_features[str(low_level[-1])].shape[1:3]
+      else:
+        h, w = decoder_features[str(level)].shape[1:3]
+      self.assertAllEqual(
+          logits.numpy().shape,
+          [2, h, w, num_classes])
 
   def test_serialize_deserialize(self):
-    head = segmentation_heads.SegmentationHead(num_classes=10, level=3)
+    head = segmentation_heads.SegmentationHead(
+        num_classes=10, level=3)
     config = head.get_config()
     new_head = segmentation_heads.SegmentationHead.from_config(config)
     self.assertAllEqual(head.get_config(), new_head.get_config())

From e257b292f15ab7a9adfcae7081333e957799bf37 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:30:21 +0530
Subject: [PATCH 04/26] added `kernel_size` param for `SegmentationHead`

---
 official/vision/beta/configs/semantic_segmentation.py | 1 +
 official/vision/beta/modeling/factory.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/official/vision/beta/configs/semantic_segmentation.py b/official/vision/beta/configs/semantic_segmentation.py
index 3aefcefec70..a952e3416bd 100644
--- a/official/vision/beta/configs/semantic_segmentation.py
+++ b/official/vision/beta/configs/semantic_segmentation.py
@@ -63,6 +63,7 @@ class SegmentationHead(hyperparams.Config):
   num_convs: int = 2
   num_filters: int = 256
   use_depthwise_convolution: bool = False
+  kernel_size: int = 3
   prediction_kernel_size: int = 1
   upsample_factor: int = 1
   feature_fusion: Optional[
diff --git a/official/vision/beta/modeling/factory.py b/official/vision/beta/modeling/factory.py
index b03d0ea9d55..b75c347a44e 100644
--- a/official/vision/beta/modeling/factory.py
+++ b/official/vision/beta/modeling/factory.py
@@ -356,6 +356,7 @@ def build_segmentation_model(
       num_classes=model_config.num_classes,
       level=head_config.level,
       num_convs=head_config.num_convs,
+      kernel_size=head_config.kernel_size,
       prediction_kernel_size=head_config.prediction_kernel_size,
       num_filters=head_config.num_filters,
       use_depthwise_convolution=head_config.use_depthwise_convolution,

From 6742d61a045c5a5a029b6ec2d9f429edc8f161ad Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:30:50 +0530
Subject: [PATCH 05/26] added `InstanceCenterHead`

---
 .../modeling/heads/instance_center_head.py    | 170 ++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py
new file mode 100644
index 00000000000..f16bbfbbb24
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py
@@ -0,0 +1,170 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definition of instance center heads."""
+from typing import List, Union, Optional, Mapping, Tuple
+import tensorflow as tf
+
+from official.vision.beta.modeling.heads import segmentation_heads
+from official.vision.beta.ops import spatial_transform_ops
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InstanceCenterHead(segmentation_heads.SegmentationHead):
+  """Creates a segmentation head."""
+
+  def __init__(
+      self,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      use_depthwise_convolution: bool = False,
+      kernel_size: int = 3,
+      prediction_kernel_size: int = 1,
+      upsample_factor: int = 1,
+      feature_fusion: Optional[str] = None,
+      decoder_min_level: Optional[int] = None,
+      decoder_max_level: Optional[int] = None,
+      low_level: Union[int, List[int]] = 2,
+      low_level_num_filters: Union[int, List[int]] = 48,
+      num_decoder_filters: int = 256,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a instance center head.
+
+    Args:
+      level: An `int` or `str`, level to use to build segmentation head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+      prediction layer.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
+        `panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
+        decoder_features[level] will be fused with low level feature maps from
+        backbone. If `pyramid_fusion`, multiscale features will be resized and
+        fused at the target level.
+      decoder_min_level: An `int` of minimum level from decoder to use in
+        feature fusion. It is only used when feature_fusion is set to
+        `panoptic_fpn_fusion`.
+      decoder_max_level: An `int` of maximum level from decoder to use in
+        feature fusion. It is only used when feature_fusion is set to
+        `panoptic_fpn_fusion`.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      num_decoder_filters: An `int` of number of filters in the decoder outputs.
+        It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(InstanceCenterHead, self).__init__(
+        num_classes=2,
+        level=level,
+        num_convs=num_convs,
+        num_filters=num_filters,
+        use_depthwise_convolution=use_depthwise_convolution,
+        kernel_size=kernel_size,
+        prediction_kernel_size=prediction_kernel_size,
+        upsample_factor=upsample_factor,
+        feature_fusion=feature_fusion,
+        decoder_min_level=decoder_min_level,
+        decoder_max_level=decoder_max_level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        num_decoder_filters=num_decoder_filters,
+        activation=activation,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        **kwargs)
+
+
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    self._instance_center_prediction_conv = tf.keras.layers.Conv2D(
+        name='instance_center_prediction',
+        filters=1,
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+    super(InstanceCenterHead, self).build(input_shape)
+
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
+    """Forward pass of the segmentation head.
+
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+            [batch, height_l, width_l, channels].
+    Returns:
+      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
+        scores predicted from input features.
+    """
+    x = self._fuse_features(inputs)
+
+    for conv, norm in zip(self._convs, self._norms):
+      x = conv(x)
+      x = norm(x)
+      x = self._activation(x)
+    if self._config_dict['upsample_factor'] > 1:
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=self._config_dict['upsample_factor'])
+
+    instance_center_prediction = self._instance_center_prediction_conv(x)
+    instance_center_regression = self._prediction_conv(x)
+    outputs = {
+        'instance_center_prediction': instance_center_prediction,
+        'instance_center_regression': instance_center_regression
+    }
+    return outputs
+
+  def get_config(self):
+    config_dict = super(InstanceCenterHead, self).get_config().copy()
+    config_dict.pop('num_classes')
+    return config_dict

From a6a14de72b50e4cef438f73fbe1b0cbdd237ccae Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:31:17 +0530
Subject: [PATCH 06/26] added tests for `InstanceCenterHead`

---
 .../heads/instance_center_head_test.py        | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py
new file mode 100644
index 00000000000..ab4fe281a22
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py
@@ -0,0 +1,87 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for segmentation_heads.py."""
+
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
+
+
+class InstanceCenterHeadTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (2, 'pyramid_fusion', None, None, 2, 48),
+      (3, 'pyramid_fusion', None, None, 2, 48),
+      (2, 'panoptic_fpn_fusion', 2, 5, 2, 48),
+      (2, 'panoptic_fpn_fusion', 2, 6, 2, 48),
+      (3, 'panoptic_fpn_fusion', 3, 5, 2, 48),
+      (3, 'panoptic_fpn_fusion', 3, 6, 2, 48),
+      (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)),
+      (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32)))
+  def test_forward(self, level, feature_fusion,
+                   decoder_min_level, decoder_max_level,
+                   low_level, low_level_num_filters):
+    backbone_features = {
+        '3': np.random.rand(2, 128, 128, 16),
+        '4': np.random.rand(2, 64, 64, 16),
+        '5': np.random.rand(2, 32, 32, 16),
+    }
+    decoder_features = {
+        '3': np.random.rand(2, 128, 128, 64),
+        '4': np.random.rand(2, 64, 64, 64),
+        '5': np.random.rand(2, 32, 32, 64),
+        '6': np.random.rand(2, 16, 16, 64),
+    }
+
+    if 'panoptic' in feature_fusion:
+      backbone_features['2'] = np.random.rand(2, 256, 256, 16)
+      decoder_features['2'] = np.random.rand(2, 256, 256, 64)
+
+    head = instance_center_head.InstanceCenterHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        feature_fusion=feature_fusion,
+        decoder_min_level=decoder_min_level,
+        decoder_max_level=decoder_max_level,
+        num_decoder_filters=64)
+
+    outputs = head((backbone_features, decoder_features))
+
+    if str(level) in decoder_features:
+      if feature_fusion == 'panoptic_deeplab_fusion':
+        h, w = decoder_features[str(low_level[-1])].shape[1:3]
+      else:
+        h, w = decoder_features[str(level)].shape[1:3]
+      self.assertAllEqual(
+          outputs['instance_center_prediction'].numpy().shape,
+          [2, h, w, 1])
+      self.assertAllEqual(
+          outputs['instance_center_regression'].numpy().shape,
+          [2, h, w, 2])
+
+
+  def test_serialize_deserialize(self):
+    head = instance_center_head.InstanceCenterHead(level=3)
+    config = head.get_config()
+    new_head = instance_center_head.InstanceCenterHead.from_config(config)
+    self.assertAllEqual(head.get_config(), new_head.get_config())
+
+if __name__ == '__main__':
+  tf.test.main()

From 6ee54a60f61b0a639dfa855009c6abc3d51f4d92 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:31:59 +0530
Subject: [PATCH 07/26] added `PanopticDeeplabModel`

---
 .../configs/panoptic_deeplab.py               |  61 ++++++++++
 .../modeling/panoptic_deeplab_model.py        | 107 ++++++++++++++++++
 2 files changed, 168 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
new file mode 100644
index 00000000000..d509ba669a7
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
@@ -0,0 +1,61 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Panoptic Mask R-CNN configuration definition."""
+
+import dataclasses
+from typing import List, Optional, Union
+
+from official.modeling import hyperparams
+from official.vision.beta.configs import common
+from official.vision.beta.configs import backbones
+from official.vision.beta.configs import decoders
+from official.vision.beta.configs import semantic_segmentation
+
+SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead
+
+_COCO_INPUT_PATH_BASE = 'coco/tfrecords'
+_COCO_TRAIN_EXAMPLES = 118287
+_COCO_VAL_EXAMPLES = 5000
+
+
+@dataclasses.dataclass
+class InstanceCenterHead(semantic_segmentation.SegmentationHead):
+  """Instance Center head config."""
+  # None, deeplabv3plus, panoptic_fpn_fusion, 
+  # panoptic_deeplab_fusion or pyramid_fusion
+  kernel_size: int = 5
+  feature_fusion: Optional[str] = None  
+  low_level: Union[int, List[int]] = dataclasses.field(
+      default_factory=lambda: [3, 2])
+  low_level_num_filters: Union[int, List[int]] = dataclasses.field(
+      default_factory=lambda: [64, 32])
+
+
+# pytype: disable=wrong-keyword-args
+@dataclasses.dataclass
+class PanopticDeeplab(hyperparams.Config):
+  """Panoptic Mask R-CNN model config."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  min_level: int = 3
+  max_level: int = 6
+  norm_activation: common.NormActivation = common.NormActivation()
+  backbone: backbones.Backbone = backbones.Backbone(
+      type='resnet', resnet=backbones.ResNet())
+  decoder: decoders.Decoder = decoders.Decoder(type='aspp')
+  semantic_head: SEGMENTATION_HEAD = SEGMENTATION_HEAD()
+  instance_head: InstanceCenterHead = InstanceCenterHead(
+      low_level=[3, 2])
+  shared_decoder: bool = False
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
new file mode 100644
index 00000000000..2e963e909c9
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
@@ -0,0 +1,107 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build Panoptic Deeplab model."""
+from typing import Any, Mapping, Optional, Union
+
+import tensorflow as tf
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class PanopticDeeplabModel(tf.keras.Model):
+  """Panoptic Deeplab model."""
+
+  def __init__(
+      self, 
+      backbone: tf.keras.Model, 
+      semantic_decoder: tf.keras.Model,
+      semantic_head: tf.keras.layers.Layer,
+      instance_head: tf.keras.layers.Layer,
+      instance_decoder: Optional[tf.keras.Model] = None,
+      **kwargs):
+    """
+    Args:
+      backbone: a backbone network.
+      semantic_decoder: a decoder network. E.g. FPN.
+      semantic_head: segmentation head.
+      instance_head: instance center head .      
+      instance_decoder: Optional decoder network for instance predictions.       
+      **kwargs: keyword arguments to be passed.
+    """
+    super(PanopticDeeplabModel, self).__init__(**kwargs)
+
+    self._config_dict = {
+        'backbone': backbone,
+        'semantic_decoder': semantic_decoder,
+        'instance_decoder': instance_decoder,
+        'semantic_head': semantic_head,
+        'instance_head': instance_head
+    }
+    self.backbone = backbone
+    self.semantic_decoder = semantic_decoder
+    self.instance_decoder = instance_decoder
+    self.semantic_head = semantic_head
+    self.instance_head = instance_head
+
+  def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor:
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+
+    backbone_features = self.backbone(inputs, training=training)
+    
+    semantic_features = self.semantic_decoder(
+        backbone_features, training=training)
+
+    if self.instance_decoder is None:
+      instance_features = semantic_features
+    else:
+      instance_features = self.instance_decoder(
+          backbone_features, training=training)
+
+    segmentation_outputs = self.semantic_head(
+        (backbone_features, semantic_features), 
+        training=training)
+    instance_outputs = self.instance_head(
+        (backbone_features, instance_features),
+        training=training)
+
+    outputs = {
+        'segmentation_outputs': segmentation_outputs,
+        'instance_center_prediction': 
+            instance_outputs['instance_center_prediction'],
+        'instance_center_regression': 
+            instance_outputs['instance_center_regression'],
+    }
+    return outputs
+
+  @property
+  def checkpoint_items(
+      self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(
+        backbone=self.backbone,
+        semantic_decoder=self.semantic_decoder,
+        semantic_head=self.semantic_head,
+        instance_head=self.instance_head)
+    if self.instance_decoder is not None:
+      items.update(instance_decoder=self.instance_decoder)
+
+    return items
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)

From 8a8d5fabbb2f111ec7fd4d8ae5bf3d4f1cb29fbb Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:32:14 +0530
Subject: [PATCH 08/26] added tests for `PanopticDeeplabModel`

---
 .../modeling/panoptic_deeplab_model_test.py   | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
new file mode 100644
index 00000000000..95bc2e55729
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
@@ -0,0 +1,148 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for Panoptic Deeplab network."""
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+
+from official.vision.beta.modeling import backbones
+from official.vision.beta.modeling.decoders import aspp
+from official.vision.beta.modeling.heads import segmentation_heads
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
+from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
+
+class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          level=[2, 3, 4],
+          input_size=[256, 512],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False],
+          training=[True, False]))
+  def test_panoptic_deeplab_network_creation(
+      self, input_size, level, low_level, shared_decoder, training):
+    """Test for creation of a panoptic deep lab network."""
+    num_classes = 10
+    inputs = np.random.rand(2, input_size, input_size, 3)
+    tf.keras.backend.set_image_data_format('channels_last')
+    backbone = backbones.ResNet(model_id=50)
+
+    semantic_decoder = aspp.ASPP(
+        level=level, dilation_rates=[6, 12, 18])
+
+    if shared_decoder:
+      instance_decoder = semantic_decoder
+    else:
+      instance_decoder = aspp.ASPP(
+          level=level, dilation_rates=[6, 12, 18])
+
+    semantic_head = segmentation_heads.SegmentationHead(
+        num_classes,                                    
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=[64, 32],
+        feature_fusion='panoptic_deeplab_fusion')
+
+    instance_head = instance_center_head.InstanceCenterHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=[64, 32],
+        feature_fusion='panoptic_deeplab_fusion')
+
+    model = panoptic_deeplab_model.PanopticDeeplabModel(
+        backbone=backbone,
+        semantic_decoder=semantic_decoder,
+        instance_decoder=instance_decoder,
+        semantic_head=semantic_head,
+        instance_head=instance_head)
+
+    outputs = model(inputs, training=training)
+
+      
+    self.assertIn('segmentation_outputs', outputs)
+    self.assertIn('instance_center_prediction', outputs)
+    self.assertIn('instance_center_regression', outputs)
+
+    self.assertAllEqual(
+        [2, input_size // (2**low_level[-1]), 
+         input_size //(2**low_level[-1]), 
+         num_classes],
+        outputs['segmentation_outputs'].numpy().shape)
+    self.assertAllEqual(
+        [2, input_size // (2**low_level[-1]),
+         input_size // (2**low_level[-1]),
+         1],
+        outputs['instance_center_prediction'].numpy().shape)
+    self.assertAllEqual(
+        [2, input_size // (2**low_level[-1]),
+         input_size // (2**low_level[-1]),
+         2],
+        outputs['instance_center_regression'].numpy().shape)
+
+  @combinations.generate(
+      combinations.combine(
+          level=[2, 3, 4],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False]))
+  def test_serialize_deserialize(self, level, low_level, shared_decoder):
+    """Validate the network can be serialized and deserialized."""
+    num_classes = 10
+    backbone = backbones.ResNet(model_id=50)
+
+    semantic_decoder = aspp.ASPP(
+        level=level, dilation_rates=[6, 12, 18])
+
+    if shared_decoder:
+      instance_decoder = semantic_decoder
+    else:
+      instance_decoder = aspp.ASPP(
+          level=level, dilation_rates=[6, 12, 18])
+
+    semantic_head = segmentation_heads.SegmentationHead(
+        num_classes,
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=[64, 32],
+        feature_fusion='panoptic_deeplab_fusion')
+
+    instance_head = instance_center_head.InstanceCenterHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=[64, 32],
+        feature_fusion='panoptic_deeplab_fusion')
+
+    model = panoptic_deeplab_model.PanopticDeeplabModel(
+        backbone=backbone,
+        semantic_decoder=semantic_decoder,
+        instance_decoder=instance_decoder,
+        semantic_head=semantic_head,
+        instance_head=instance_head)
+
+    config = model.get_config()
+    new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config)
+
+    # Validate that the config can be forced to JSON.
+    _ = new_model.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(model.get_config(), new_model.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()

From c3282abe652c928a8923f07b07be2456651a7c8f Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:32:57 +0530
Subject: [PATCH 09/26] added `build_panoptic_deeplab` in panoptic factory

---
 .../panoptic_maskrcnn/modeling/factory.py     | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
index e02227fb3e2..87012bcc64d 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
@@ -20,7 +20,10 @@
 from official.vision.beta.modeling import factory as models_factory
 from official.vision.beta.modeling.decoders import factory as decoder_factory
 from official.vision.beta.modeling.heads import segmentation_heads
+from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
+from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator
 
@@ -82,6 +85,7 @@ def build_panoptic_maskrcnn(
       num_classes=segmentation_config.num_classes,
       level=segmentation_head_config.level,
       num_convs=segmentation_head_config.num_convs,
+      kernel_size=segmentation_head_config.kernel_size,
       prediction_kernel_size=segmentation_head_config.prediction_kernel_size,
       num_filters=segmentation_head_config.num_filters,
       upsample_factor=segmentation_head_config.upsample_factor,
@@ -141,3 +145,88 @@ def build_panoptic_maskrcnn(
       aspect_ratios=model_config.anchor.aspect_ratios,
       anchor_size=model_config.anchor.anchor_size)
   return model
+
+
+def build_panoptic_deeplab(
+        input_specs: tf.keras.layers.InputSpec,
+        model_config: panoptic_deeplab_cfg.PanopticDeeplab,
+        l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Builds Panoptic Deeplab model.
+
+
+  Args:
+    input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+    model_config: Config instance for the panoptic maskrcnn model.
+    l2_regularizer: Optional `tf.keras.regularizers.Regularizer`, if specified,
+      the model is built with the provided regularization layer.
+  Returns:
+    tf.keras.Model for the panoptic segmentation model.
+  """
+  norm_activation_config = model_config.norm_activation
+  backbone = backbones.factory.build_backbone(
+      input_specs=input_specs,
+      backbone_config=model_config.backbone,
+      norm_activation_config=norm_activation_config,
+      l2_regularizer=l2_regularizer)
+
+  semantic_decoder = decoder_factory.build_decoder(
+      input_specs=backbone.output_specs,
+      model_config=model_config,
+      l2_regularizer=l2_regularizer)
+
+  if model_config.shared_decoder:
+    instance_decoder = None
+  else:
+    # TODO(srihari-humbarwadi): decouple semantic and 
+    # instance decoder types
+    instance_decoder = decoder_factory.build_decoder(
+        input_specs=backbone.output_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+  semantic_head_config = model_config.semantic_head
+  instnace_head_config = model_config.instance_head
+
+  semantic_head = segmentation_heads.SegmentationHead(
+      num_classes=model_config.num_classes,
+      level=semantic_head_config.level,
+      num_convs=semantic_head_config.num_convs,
+      kernel_size=semantic_head_config.kernel_size,
+      prediction_kernel_size=semantic_head_config.prediction_kernel_size,
+      num_filters=semantic_head_config.num_filters,
+      use_depthwise_convolution=semantic_head_config.use_depthwise_convolution,
+      upsample_factor=semantic_head_config.upsample_factor,
+      feature_fusion=semantic_head_config.feature_fusion,
+      low_level=semantic_head_config.low_level,
+      low_level_num_filters=semantic_head_config.low_level_num_filters,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
+
+  instance_head = instance_center_head.InstanceCenterHead(
+      level=instnace_head_config.level,
+      num_convs=instnace_head_config.num_convs,
+      kernel_size=instnace_head_config.kernel_size,
+      prediction_kernel_size=instnace_head_config.prediction_kernel_size,
+      num_filters=instnace_head_config.num_filters,
+      use_depthwise_convolution=instnace_head_config.use_depthwise_convolution,
+      upsample_factor=instnace_head_config.upsample_factor,
+      feature_fusion=instnace_head_config.feature_fusion,
+      low_level=instnace_head_config.low_level,
+      low_level_num_filters=instnace_head_config.low_level_num_filters,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
+
+  model = panoptic_deeplab_model.PanopticDeeplabModel(
+      backbone=backbone, 
+      semantic_decoder=semantic_decoder,
+      instance_decoder=instance_decoder,
+      semantic_head=semantic_head,
+      instance_head=instance_head)
+
+  return model

From ac6713063e8a4dd57363a8e4d41e3afdf28037ee Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Thu, 13 Jan 2022 00:33:07 +0530
Subject: [PATCH 10/26] added tests for `build_panoptic_deeplab` in panoptic
 factory

---
 .../modeling/factory_test.py                  | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
index ba64f8083a6..ed31fe8487c 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
@@ -17,10 +17,14 @@
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.distribute import combinations
+
 from official.vision.beta.configs import backbones
 from official.vision.beta.configs import decoders
 from official.vision.beta.configs import semantic_segmentation
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
+from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
+
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
 
 
@@ -61,5 +65,53 @@ def test_builder(self, backbone_type, input_size, segmentation_backbone_type,
         model_config=model_config,
         l2_regularizer=l2_regularizer)
 
+class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          input_size=[(640, 640), (512, 512)],
+          backbone_type=['resnet', 'dilated_resnet'],
+          decoder_type=['aspp', 'fpn'],
+          level=[2, 3, 4],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False], 
+          fusion_type=[
+              'pyramid_fusion', 
+              'panoptic_fpn_fusion', 
+              'panoptic_deeplab_fusion']))
+  def test_builder(self, input_size, backbone_type, level, 
+                   low_level, decoder_type, shared_decoder, fusion_type):
+    num_classes = 10
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+
+    model_config = panoptic_deeplab_cfg.PanopticDeeplab(
+        num_classes=num_classes,
+        input_size=input_size,
+        backbone=backbones.Backbone(type=backbone_type),
+        decoder=decoders.Decoder(type=decoder_type),
+        semantic_head=semantic_segmentation.SegmentationHead(
+            level=level, 
+            num_convs=1, 
+            kernel_size=5, 
+            prediction_kernel_size=1, 
+            low_level=low_level,
+            feature_fusion=fusion_type),
+        instance_head=panoptic_deeplab_cfg.InstanceCenterHead(
+            level=level,
+            num_convs=1,
+            kernel_size=5,
+            prediction_kernel_size=1,
+            low_level=low_level,
+            feature_fusion=fusion_type),
+        shared_decoder=shared_decoder)
+
+    l2_regularizer = tf.keras.regularizers.l2(5e-5)
+    _ = factory.build_panoptic_deeplab(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+
 if __name__ == '__main__':
   tf.test.main()

From 4dc4f6c74bf9f79d08df27315eb01cdb0c736b5c Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 14 Jan 2022 13:29:04 +0530
Subject: [PATCH 11/26] Revert "added `kernel_size` param for
 `SegmentationHead`"

This reverts commit e257b292f15ab7a9adfcae7081333e957799bf37.
---
 official/vision/beta/configs/semantic_segmentation.py | 1 -
 official/vision/beta/modeling/factory.py              | 1 -
 2 files changed, 2 deletions(-)

diff --git a/official/vision/beta/configs/semantic_segmentation.py b/official/vision/beta/configs/semantic_segmentation.py
index 701e1653e90..0543fcc13d2 100644
--- a/official/vision/beta/configs/semantic_segmentation.py
+++ b/official/vision/beta/configs/semantic_segmentation.py
@@ -63,7 +63,6 @@ class SegmentationHead(hyperparams.Config):
   num_convs: int = 2
   num_filters: int = 256
   use_depthwise_convolution: bool = False
-  kernel_size: int = 3
   prediction_kernel_size: int = 1
   upsample_factor: int = 1
   feature_fusion: Optional[
diff --git a/official/vision/beta/modeling/factory.py b/official/vision/beta/modeling/factory.py
index 5e35fdfe5af..c91a1abceed 100644
--- a/official/vision/beta/modeling/factory.py
+++ b/official/vision/beta/modeling/factory.py
@@ -356,7 +356,6 @@ def build_segmentation_model(
       num_classes=model_config.num_classes,
       level=head_config.level,
       num_convs=head_config.num_convs,
-      kernel_size=head_config.kernel_size,
       prediction_kernel_size=head_config.prediction_kernel_size,
       num_filters=head_config.num_filters,
       use_depthwise_convolution=head_config.use_depthwise_convolution,

From 29ab89cca4c7416930b747497caeda501bec3fe6 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 14 Jan 2022 13:29:49 +0530
Subject: [PATCH 12/26] Revert "added new feature_fusion:
 panoptic_deeplab_fusion"

This reverts commit 78949f92e6529d27a665193c0cc152ccfe0df163.
---
 .../beta/modeling/heads/segmentation_heads.py | 74 +++++++------------
 1 file changed, 25 insertions(+), 49 deletions(-)

diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py
index 66967392aa6..f65b234b3cf 100644
--- a/official/vision/beta/modeling/heads/segmentation_heads.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads.py
@@ -202,14 +202,13 @@ def __init__(
       num_convs: int = 2,
       num_filters: int = 256,
       use_depthwise_convolution: bool = False,
-      kernel_size: int = 3,
       prediction_kernel_size: int = 1,
       upsample_factor: int = 1,
       feature_fusion: Optional[str] = None,
       decoder_min_level: Optional[int] = None,
       decoder_max_level: Optional[int] = None,
-      low_level: Union[int, List[int]] = 2,
-      low_level_num_filters: Union[int, List[int]] = 48,
+      low_level: int = 2,
+      low_level_num_filters: int = 48,
       num_decoder_filters: int = 256,
       activation: str = 'relu',
       use_sync_bn: bool = False,
@@ -230,8 +229,6 @@ def __init__(
         Default is 256.
       use_depthwise_convolution: A bool to specify if use depthwise separable
         convolutions.
-      kernel_size:  An `int` number to specify the kernel size of the
-      stacked convolutions before the last prediction layer.
       prediction_kernel_size: An `int` number to specify the kernel size of the
       prediction layer.
       upsample_factor: An `int` number to specify the upsampling factor to
@@ -273,7 +270,6 @@ def __init__(
         'num_convs': num_convs,
         'num_filters': num_filters,
         'use_depthwise_convolution': use_depthwise_convolution,
-        'kernel_size': kernel_size,
         'prediction_kernel_size': prediction_kernel_size,
         'upsample_factor': upsample_factor,
         'feature_fusion': feature_fusion,
@@ -297,12 +293,11 @@ def __init__(
 
   def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
     """Creates the variables of the segmentation head."""
-    kernel_size = self._config_dict['kernel_size']
     use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
     random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
     conv_op = tf.keras.layers.Conv2D
     conv_kwargs = {
-        'kernel_size': kernel_size if not use_depthwise_convolution else 1,
+        'kernel_size': 3 if not use_depthwise_convolution else 1,
         'padding': 'same',
         'use_bias': False,
         'kernel_initializer': random_initializer,
@@ -342,19 +337,6 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
           kernel_regularizer=self._config_dict['kernel_regularizer'],
           bias_regularizer=self._config_dict['bias_regularizer'])
 
-    if self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion':
-      self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion(
-          level=self._config_dict['level'],
-          low_level=self._config_dict['low_level'],
-          num_projection_filters=self._config_dict['low_level_num_filters'],
-          num_output_filters=self._config_dict['num_filters'],
-          activation=self._config_dict['activation'],
-          use_sync_bn=self._config_dict['use_sync_bn'],
-          norm_momentum=self._config_dict['norm_momentum'],
-          norm_epsilon=self._config_dict['norm_epsilon'],
-          kernel_regularizer=self._config_dict['kernel_regularizer'],
-          bias_regularizer=self._config_dict['bias_regularizer'])
-
     # Segmentation head layers.
     self._convs = []
     self._norms = []
@@ -380,7 +362,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
       norm_name = 'segmentation_head_norm_{}'.format(i)
       self._norms.append(bn_op(name=norm_name, **bn_kwargs))
 
-    self._prediction_conv = conv_op(
+    self._classifier = conv_op(
         name='segmentation_output',
         filters=self._config_dict['num_classes'],
         kernel_size=self._config_dict['prediction_kernel_size'],
@@ -392,7 +374,26 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
 
     super().build(input_shape)
 
-  def _fuse_features(self, inputs):
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
+    """Forward pass of the segmentation head.
+
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+            [batch, height_l, width_l, channels].
+    Returns:
+      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
+        scores predicted from input features.
+    """
     backbone_output = inputs[0]
     decoder_output = inputs[1]
     if self._config_dict['feature_fusion'] == 'deeplabv3plus':
@@ -415,34 +416,9 @@ def _fuse_features(self, inputs):
                                            self._config_dict['level'])
     elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
       x = self._panoptic_fpn_fusion(decoder_output)
-    elif self._config_dict['feature_fusion'] == 'panoptic_deeplab_fusion':
-      x = self._panoptic_deeplab_fusion(inputs)
     else:
       x = decoder_output[str(self._config_dict['level'])] if isinstance(
           decoder_output, dict) else decoder_output
-    return x
-
-  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
-                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
-    """Forward pass of the segmentation head.
-
-    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
-    backbone endpoints, and the second is decoder endpoints. When inputs are
-    tensors, they are from a single level of feature maps. When inputs are
-    dictionaries, they contain multiple levels of feature maps, where the key
-    is the index of feature map.
-
-    Args:
-      inputs: A tuple of 2 feature map tensors of shape
-        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor` of the feature map tensors, whose shape is
-            [batch, height_l, width_l, channels].
-    Returns:
-      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
-        scores predicted from input features.
-    """
-    x = self._fuse_features(inputs)
 
     for conv, norm in zip(self._convs, self._norms):
       x = conv(x)
@@ -452,7 +428,7 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
       x = spatial_transform_ops.nearest_upsampling(
           x, scale=self._config_dict['upsample_factor'])
 
-    return self._prediction_conv(x)
+    return self._classifier(x)
 
   def get_config(self):
     base_config = super().get_config()

From cbe473920d0b018326917aaf75650c02b2fa117b Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 14 Jan 2022 13:30:05 +0530
Subject: [PATCH 13/26] Revert "added tests for panoptic_deeplab_fusion"

This reverts commit c8e0233b00ced4798e9707714efc82f9b4dc4623.
---
 .../modeling/heads/segmentation_heads_test.py | 41 ++++++++-----------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/official/vision/beta/modeling/heads/segmentation_heads_test.py b/official/vision/beta/modeling/heads/segmentation_heads_test.py
index d882fbd94db..2ec7ded68c1 100644
--- a/official/vision/beta/modeling/heads/segmentation_heads_test.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py
@@ -26,17 +26,14 @@
 class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
 
   @parameterized.parameters(
-      (2, 'pyramid_fusion', None, None, 2, 48),
-      (3, 'pyramid_fusion', None, None, 2, 48),
-      (2, 'panoptic_fpn_fusion', 2, 5, 2, 48),
-      (2, 'panoptic_fpn_fusion', 2, 6, 2, 48),
-      (3, 'panoptic_fpn_fusion', 3, 5, 2, 48),
-      (3, 'panoptic_fpn_fusion', 3, 6, 2, 48),
-      (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)),
-      (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32)))
+      (2, 'pyramid_fusion', None, None),
+      (3, 'pyramid_fusion', None, None),
+      (2, 'panoptic_fpn_fusion', 2, 5),
+      (2, 'panoptic_fpn_fusion', 2, 6),
+      (3, 'panoptic_fpn_fusion', 3, 5),
+      (3, 'panoptic_fpn_fusion', 3, 6))
   def test_forward(self, level, feature_fusion,
-                   decoder_min_level, decoder_max_level,
-                   low_level, low_level_num_filters):
+                   decoder_min_level, decoder_max_level):
     backbone_features = {
         '3': np.random.rand(2, 128, 128, 16),
         '4': np.random.rand(2, 64, 64, 16),
@@ -48,16 +45,14 @@ def test_forward(self, level, feature_fusion,
         '5': np.random.rand(2, 32, 32, 64),
         '6': np.random.rand(2, 16, 16, 64),
     }
-    num_classes = 10
-    if 'panoptic' in feature_fusion:
+
+    if feature_fusion == 'panoptic_fpn_fusion':
       backbone_features['2'] = np.random.rand(2, 256, 256, 16)
       decoder_features['2'] = np.random.rand(2, 256, 256, 64)
 
     head = segmentation_heads.SegmentationHead(
-        num_classes=num_classes,
+        num_classes=10,
         level=level,
-        low_level=low_level,
-        low_level_num_filters=low_level_num_filters,
         feature_fusion=feature_fusion,
         decoder_min_level=decoder_min_level,
         decoder_max_level=decoder_max_level,
@@ -65,18 +60,14 @@ def test_forward(self, level, feature_fusion,
 
     logits = head((backbone_features, decoder_features))
 
-    if str(level) in decoder_features:
-      if feature_fusion == 'panoptic_deeplab_fusion':
-        h, w = decoder_features[str(low_level[-1])].shape[1:3]
-      else:
-        h, w = decoder_features[str(level)].shape[1:3]
-      self.assertAllEqual(
-          logits.numpy().shape,
-          [2, h, w, num_classes])
+    if level in decoder_features:
+      self.assertAllEqual(logits.numpy().shape, [
+          2, decoder_features[str(level)].shape[1],
+          decoder_features[str(level)].shape[2], 10
+      ])
 
   def test_serialize_deserialize(self):
-    head = segmentation_heads.SegmentationHead(
-        num_classes=10, level=3)
+    head = segmentation_heads.SegmentationHead(num_classes=10, level=3)
     config = head.get_config()
     new_head = segmentation_heads.SegmentationHead.from_config(config)
     self.assertAllEqual(head.get_config(), new_head.get_config())

From 7e6c5502a08cc87991738b70a6f11044c05430c2 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 17:53:37 +0530
Subject: [PATCH 14/26] Revert "added `InstanceCenterHead`"

This reverts commit 6742d61a045c5a5a029b6ec2d9f429edc8f161ad.
---
 .../modeling/heads/instance_center_head.py    | 170 ------------------
 1 file changed, 170 deletions(-)
 delete mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py
deleted file mode 100644
index f16bbfbbb24..00000000000
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definition of instance center heads."""
-from typing import List, Union, Optional, Mapping, Tuple
-import tensorflow as tf
-
-from official.vision.beta.modeling.heads import segmentation_heads
-from official.vision.beta.ops import spatial_transform_ops
-
-
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class InstanceCenterHead(segmentation_heads.SegmentationHead):
-  """Creates a segmentation head."""
-
-  def __init__(
-      self,
-      level: Union[int, str],
-      num_convs: int = 2,
-      num_filters: int = 256,
-      use_depthwise_convolution: bool = False,
-      kernel_size: int = 3,
-      prediction_kernel_size: int = 1,
-      upsample_factor: int = 1,
-      feature_fusion: Optional[str] = None,
-      decoder_min_level: Optional[int] = None,
-      decoder_max_level: Optional[int] = None,
-      low_level: Union[int, List[int]] = 2,
-      low_level_num_filters: Union[int, List[int]] = 48,
-      num_decoder_filters: int = 256,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a instance center head.
-
-    Args:
-      level: An `int` or `str`, level to use to build segmentation head.
-      num_convs: An `int` number of stacked convolution before the last
-        prediction layer.
-      num_filters: An `int` number to specify the number of filters used.
-        Default is 256.
-      use_depthwise_convolution: A bool to specify if use depthwise separable
-        convolutions.
-      prediction_kernel_size: An `int` number to specify the kernel size of the
-      prediction layer.
-      upsample_factor: An `int` number to specify the upsampling factor to
-        generate finer mask. Default 1 means no upsampling is applied.
-      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
-        `panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
-        decoder_features[level] will be fused with low level feature maps from
-        backbone. If `pyramid_fusion`, multiscale features will be resized and
-        fused at the target level.
-      decoder_min_level: An `int` of minimum level from decoder to use in
-        feature fusion. It is only used when feature_fusion is set to
-        `panoptic_fpn_fusion`.
-      decoder_max_level: An `int` of maximum level from decoder to use in
-        feature fusion. It is only used when feature_fusion is set to
-        `panoptic_fpn_fusion`.
-      low_level: An `int` of backbone level to be used for feature fusion. It is
-        used when feature_fusion is set to `deeplabv3plus`.
-      low_level_num_filters: An `int` of reduced number of filters for the low
-        level features before fusing it with higher level features. It is only
-        used when feature_fusion is set to `deeplabv3plus`.
-      num_decoder_filters: An `int` of number of filters in the decoder outputs.
-        It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(InstanceCenterHead, self).__init__(
-        num_classes=2,
-        level=level,
-        num_convs=num_convs,
-        num_filters=num_filters,
-        use_depthwise_convolution=use_depthwise_convolution,
-        kernel_size=kernel_size,
-        prediction_kernel_size=prediction_kernel_size,
-        upsample_factor=upsample_factor,
-        feature_fusion=feature_fusion,
-        decoder_min_level=decoder_min_level,
-        decoder_max_level=decoder_max_level,
-        low_level=low_level,
-        low_level_num_filters=low_level_num_filters,
-        num_decoder_filters=num_decoder_filters,
-        activation=activation,
-        use_sync_bn=use_sync_bn,
-        norm_momentum=norm_momentum,
-        norm_epsilon=norm_epsilon,
-        kernel_regularizer=kernel_regularizer,
-        bias_regularizer=bias_regularizer,
-        **kwargs)
-
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    self._instance_center_prediction_conv = tf.keras.layers.Conv2D(
-        name='instance_center_prediction',
-        filters=1,
-        kernel_size=self._config_dict['prediction_kernel_size'],
-        padding='same',
-        bias_initializer=tf.zeros_initializer(),
-        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'])
-    super(InstanceCenterHead, self).build(input_shape)
-
-
-  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
-                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
-    """Forward pass of the segmentation head.
-
-    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
-    backbone endpoints, and the second is decoder endpoints. When inputs are
-    tensors, they are from a single level of feature maps. When inputs are
-    dictionaries, they contain multiple levels of feature maps, where the key
-    is the index of feature map.
-
-    Args:
-      inputs: A tuple of 2 feature map tensors of shape
-        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor` of the feature map tensors, whose shape is
-            [batch, height_l, width_l, channels].
-    Returns:
-      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
-        scores predicted from input features.
-    """
-    x = self._fuse_features(inputs)
-
-    for conv, norm in zip(self._convs, self._norms):
-      x = conv(x)
-      x = norm(x)
-      x = self._activation(x)
-    if self._config_dict['upsample_factor'] > 1:
-      x = spatial_transform_ops.nearest_upsampling(
-          x, scale=self._config_dict['upsample_factor'])
-
-    instance_center_prediction = self._instance_center_prediction_conv(x)
-    instance_center_regression = self._prediction_conv(x)
-    outputs = {
-        'instance_center_prediction': instance_center_prediction,
-        'instance_center_regression': instance_center_regression
-    }
-    return outputs
-
-  def get_config(self):
-    config_dict = super(InstanceCenterHead, self).get_config().copy()
-    config_dict.pop('num_classes')
-    return config_dict

From 01685eed34e06e72aa84652c30f387a6c79594df Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 17:53:56 +0530
Subject: [PATCH 15/26] Revert "added tests for `InstanceCenterHead`"

This reverts commit a6a14de72b50e4cef438f73fbe1b0cbdd237ccae.
---
 .../heads/instance_center_head_test.py        | 87 -------------------
 1 file changed, 87 deletions(-)
 delete mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py
deleted file mode 100644
index ab4fe281a22..00000000000
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/instance_center_head_test.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Lint as: python3
-"""Tests for segmentation_heads.py."""
-
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-
-from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
-
-
-class InstanceCenterHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (2, 'pyramid_fusion', None, None, 2, 48),
-      (3, 'pyramid_fusion', None, None, 2, 48),
-      (2, 'panoptic_fpn_fusion', 2, 5, 2, 48),
-      (2, 'panoptic_fpn_fusion', 2, 6, 2, 48),
-      (3, 'panoptic_fpn_fusion', 3, 5, 2, 48),
-      (3, 'panoptic_fpn_fusion', 3, 6, 2, 48),
-      (4, 'panoptic_deeplab_fusion', None, None, (4, 3), (64, 32)),
-      (4, 'panoptic_deeplab_fusion', None, None, (3, 2), (64, 32)))
-  def test_forward(self, level, feature_fusion,
-                   decoder_min_level, decoder_max_level,
-                   low_level, low_level_num_filters):
-    backbone_features = {
-        '3': np.random.rand(2, 128, 128, 16),
-        '4': np.random.rand(2, 64, 64, 16),
-        '5': np.random.rand(2, 32, 32, 16),
-    }
-    decoder_features = {
-        '3': np.random.rand(2, 128, 128, 64),
-        '4': np.random.rand(2, 64, 64, 64),
-        '5': np.random.rand(2, 32, 32, 64),
-        '6': np.random.rand(2, 16, 16, 64),
-    }
-
-    if 'panoptic' in feature_fusion:
-      backbone_features['2'] = np.random.rand(2, 256, 256, 16)
-      decoder_features['2'] = np.random.rand(2, 256, 256, 64)
-
-    head = instance_center_head.InstanceCenterHead(
-        level=level,
-        low_level=low_level,
-        low_level_num_filters=low_level_num_filters,
-        feature_fusion=feature_fusion,
-        decoder_min_level=decoder_min_level,
-        decoder_max_level=decoder_max_level,
-        num_decoder_filters=64)
-
-    outputs = head((backbone_features, decoder_features))
-
-    if str(level) in decoder_features:
-      if feature_fusion == 'panoptic_deeplab_fusion':
-        h, w = decoder_features[str(low_level[-1])].shape[1:3]
-      else:
-        h, w = decoder_features[str(level)].shape[1:3]
-      self.assertAllEqual(
-          outputs['instance_center_prediction'].numpy().shape,
-          [2, h, w, 1])
-      self.assertAllEqual(
-          outputs['instance_center_regression'].numpy().shape,
-          [2, h, w, 2])
-
-
-  def test_serialize_deserialize(self):
-    head = instance_center_head.InstanceCenterHead(level=3)
-    config = head.get_config()
-    new_head = instance_center_head.InstanceCenterHead.from_config(config)
-    self.assertAllEqual(head.get_config(), new_head.get_config())
-
-if __name__ == '__main__':
-  tf.test.main()

From ecbc5cba4e5c6e0f49ae5d457524372456c8d146 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 18:26:26 +0530
Subject: [PATCH 16/26] implemneted `PanopticDeeplabHead`

---
 .../modeling/heads/panoptic_deeplab_heads.py  | 418 ++++++++++++++++++
 1 file changed, 418 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
new file mode 100644
index 00000000000..8c8cd9e6647
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
@@ -0,0 +1,418 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions for Panoptic Deeplab heads."""
+
+from typing import List, Union, Optional, Mapping, Tuple
+import tensorflow as tf
+
+from official.modeling import tf_utils
+from official.vision.beta.modeling.layers import nn_layers
+from official.vision.beta.ops import spatial_transform_ops
+
+
+class PanopticDeeplabHead(tf.keras.layers.Layer):
+  """Creates a panoptic deeplab head."""
+
+  def __init__(
+      self,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a panoptic deeplab head.
+
+    Args:
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(PanopticDeeplabHead, self).__init__(**kwargs)
+
+    self._config_dict = {
+        'level': level,
+        'num_convs': num_convs,
+        'num_filters': num_filters,
+        'kernel_size': kernel_size,
+        'use_depthwise_convolution': use_depthwise_convolution,
+        'upsample_factor': upsample_factor,
+        'low_level': low_level,
+        'low_level_num_filters': low_level_num_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation = tf_utils.get_activation(activation)
+
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the head."""
+    kernel_size = self._config_dict['kernel_size']
+    use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
+    random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
+    conv_op = tf.keras.layers.Conv2D
+    conv_kwargs = {
+        'kernel_size': kernel_size if not use_depthwise_convolution else 1,
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': random_initializer,
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
+             if self._config_dict['use_sync_bn']
+             else tf.keras.layers.BatchNormalization)
+    bn_kwargs = {
+        'axis': self._bn_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+
+    self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion(
+        level=self._config_dict['level'],
+        low_level=self._config_dict['low_level'],
+        num_projection_filters=self._config_dict['low_level_num_filters'],
+        num_output_filters=self._config_dict['num_filters'],
+        activation=self._config_dict['activation'],
+        use_sync_bn=self._config_dict['use_sync_bn'],
+        norm_momentum=self._config_dict['norm_momentum'],
+        norm_epsilon=self._config_dict['norm_epsilon'],
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+
+    # Stacked convolutions layers.
+    self._convs = []
+    self._norms = []
+    for i in range(self._config_dict['num_convs']):
+      if use_depthwise_convolution:
+        self._convs.append(
+            tf.keras.layers.DepthwiseConv2D(
+                name='panoptic_deeplab_head_depthwise_conv_{}'.format(i),
+                kernel_size=3,
+                padding='same',
+                use_bias=False,
+                depthwise_initializer=random_initializer,
+                depthwise_regularizer=self._config_dict['kernel_regularizer'],
+                depth_multiplier=1))
+        norm_name = 'panoptic_deeplab_head_depthwise_norm_{}'.format(i)
+        self._norms.append(bn_op(name=norm_name, **bn_kwargs))
+      conv_name = 'panoptic_deeplab_head_conv_{}'.format(i)
+      self._convs.append(
+          conv_op(
+              name=conv_name,
+              filters=self._config_dict['num_filters'],
+              **conv_kwargs))
+      norm_name = 'panoptic_deeplab_head_norm_{}'.format(i)
+      self._norms.append(bn_op(name=norm_name, **bn_kwargs))
+
+    super().build(input_shape)
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]],
+           training=None):
+    """Forward pass of the head.
+
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+            [batch, height_l, width_l, channels].
+    Returns:
+      A `tf.Tensor` of the fused backbone and decoder features.
+    """
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+
+    x = self._panoptic_deeplab_fusion(inputs, training=training)
+
+    for conv, norm in zip(self._convs, self._norms):
+      x = conv(x)
+      x = norm(x, training=training)
+      x = self._activation(x)
+
+    if self._config_dict['upsample_factor'] > 1:
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=self._config_dict['upsample_factor'])
+
+    return x
+
+  def get_config(self):
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(self._config_dict.items()))
+
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SemanticHead(PanopticDeeplabHead):
+  """Creates a semantic head."""
+
+  def __init__(
+      self,
+      num_classes: int,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      prediction_kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a instance center head.
+
+    Args:
+      num_classes: An `int` number of mask classification categories. The number
+        of classes does not include background class.
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+        prediction layer.      
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(SemanticHead, self).__init__(
+        level=level,
+        num_convs=num_convs,
+        num_filters=num_filters,
+        use_depthwise_convolution=use_depthwise_convolution,
+        kernel_size=kernel_size,
+        upsample_factor=upsample_factor,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        activation=activation,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        **kwargs)
+    self._config_dict.update({
+        'num_classes': num_classes,
+        'prediction_kernel_size': prediction_kernel_size})
+
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the semantic head."""
+    super(SemanticHead, self).build(input_shape)
+    self._classifier = tf.keras.layers.Conv2D(
+        name='semantic_output',
+        filters=self._config_dict['num_classes'],
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]], 
+           training=None):
+    """Forward pass of the head."""
+
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    x = super(SemanticHead, self).call(inputs, training=training)
+    outputs = self._classifier(x)
+    return outputs
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InstanceHead(PanopticDeeplabHead):
+  """Creates a instance head."""
+
+  def __init__(
+      self,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      prediction_kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a instance center head.
+
+    Args:
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+        prediction layer.      
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(InstanceHead, self).__init__(
+        level=level,
+        num_convs=num_convs,
+        num_filters=num_filters,
+        use_depthwise_convolution=use_depthwise_convolution,
+        kernel_size=kernel_size,
+        upsample_factor=upsample_factor,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        activation=activation,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        **kwargs)
+    self._config_dict.update({
+        'prediction_kernel_size': prediction_kernel_size})
+
+
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the instance head."""
+    super(InstanceHead, self).build(input_shape)
+    self._instance_center_prediction_conv = tf.keras.layers.Conv2D(
+        name='instance_center_prediction',
+        filters=1,
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+
+    self._instance_center_regression_conv = tf.keras.layers.Conv2D(
+        name='instance_center_regression',
+        filters=2,
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]],
+           training=None):
+    """Forward pass of the head."""
+
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+
+    x = super(InstanceHead, self).call(inputs, training=training)
+    instance_center_prediction = self._instance_center_prediction_conv(x)
+    instance_center_regression = self._instance_center_regression_conv(x)
+    outputs = {
+        'instance_center_prediction': instance_center_prediction,
+        'instance_center_regression': instance_center_regression
+    }
+    return outputs

From abee356d6af6c55b88f883d438cc26ca9377b0fe Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 18:26:52 +0530
Subject: [PATCH 17/26] added tests for `PanopticDeeplabHead`

---
 .../heads/panoptic_deeplab_heads_test.py      | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
new file mode 100644
index 00000000000..dc38bd2ce83
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
@@ -0,0 +1,100 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for panoptic_deeplab_heads.py."""
+
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
+
+
+class PanopticDeeplabHeadsTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (2, (2,), (48,)),
+      (3, (2,), (48,)),
+      (2, (2,), (48,)),
+      (2, (2,), (48,)),
+      (3, (2,), (48,)),
+      (3, (2,), (48,)),
+      (4, (4, 3), (64, 32)),
+      (4, (3, 2), (64, 32)))
+  def test_forward(self, level, low_level, low_level_num_filters):
+    backbone_features = {
+        '3': np.random.rand(2, 128, 128, 16),
+        '4': np.random.rand(2, 64, 64, 16),
+        '5': np.random.rand(2, 32, 32, 16),
+    }
+    decoder_features = {
+        '3': np.random.rand(2, 128, 128, 64),
+        '4': np.random.rand(2, 64, 64, 64),
+        '5': np.random.rand(2, 32, 32, 64),
+        '6': np.random.rand(2, 16, 16, 64),
+    }
+
+    backbone_features['2'] = np.random.rand(2, 256, 256, 16)
+    decoder_features['2'] = np.random.rand(2, 256, 256, 64)
+    
+    num_classes = 10
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
+        num_classes=num_classes,
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters)
+
+    instance_head = panoptic_deeplab_heads.InstanceHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters)
+
+    semantic_outputs = semantic_head((backbone_features, decoder_features))
+    instance_outputs = instance_head((backbone_features, decoder_features))
+
+    if str(level) in decoder_features:
+      h, w = decoder_features[str(low_level[-1])].shape[1:3]
+      self.assertAllEqual(
+          semantic_outputs.numpy().shape,
+          [2, h, w, num_classes])
+      self.assertAllEqual(
+          instance_outputs['instance_center_prediction'].numpy().shape,
+          [2, h, w, 1])
+      self.assertAllEqual(
+          instance_outputs['instance_center_regression'].numpy().shape,
+          [2, h, w, 2])
+
+
+  def test_serialize_deserialize(self):
+    semantic_head = panoptic_deeplab_heads.SemanticHead(num_classes=2, level=3)
+    instance_head = panoptic_deeplab_heads.InstanceHead(level=3)
+
+    semantic_head_config = semantic_head.get_config()
+    instance_head_config = instance_head.get_config()
+
+    new_semantic_head = panoptic_deeplab_heads.SemanticHead.from_config(
+        semantic_head_config)
+    new_instance_head = panoptic_deeplab_heads.InstanceHead.from_config(
+        instance_head_config)
+
+    self.assertAllEqual(semantic_head.get_config(),
+                        new_semantic_head.get_config())
+    self.assertAllEqual(instance_head.get_config(),
+                        new_instance_head.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()

From 31a8e4664ba8ffca8d14b051f8c3a7ec3b5b91d1 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 18:29:25 +0530
Subject: [PATCH 18/26] use `SemanticHead` and `InstanceHead` from
 panoptic_deeplab_heads

---
 .../panoptic_maskrcnn/modeling/factory.py     | 29 +++++++++----------
 .../modeling/factory_test.py                  | 19 ++++--------
 .../modeling/panoptic_deeplab_model_test.py   | 23 ++++++---------
 3 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
index 87012bcc64d..4c2a30f0678 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
@@ -22,8 +22,8 @@
 from official.vision.beta.modeling.heads import segmentation_heads
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
-from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator
 
@@ -85,7 +85,6 @@ def build_panoptic_maskrcnn(
       num_classes=segmentation_config.num_classes,
       level=segmentation_head_config.level,
       num_convs=segmentation_head_config.num_convs,
-      kernel_size=segmentation_head_config.kernel_size,
       prediction_kernel_size=segmentation_head_config.prediction_kernel_size,
       num_filters=segmentation_head_config.num_filters,
       upsample_factor=segmentation_head_config.upsample_factor,
@@ -185,9 +184,9 @@ def build_panoptic_deeplab(
         l2_regularizer=l2_regularizer)
 
   semantic_head_config = model_config.semantic_head
-  instnace_head_config = model_config.instance_head
+  instance_head_config = model_config.instance_head
 
-  semantic_head = segmentation_heads.SegmentationHead(
+  semantic_head = panoptic_deeplab_heads.SemanticHead(
       num_classes=model_config.num_classes,
       level=semantic_head_config.level,
       num_convs=semantic_head_config.num_convs,
@@ -196,7 +195,6 @@ def build_panoptic_deeplab(
       num_filters=semantic_head_config.num_filters,
       use_depthwise_convolution=semantic_head_config.use_depthwise_convolution,
       upsample_factor=semantic_head_config.upsample_factor,
-      feature_fusion=semantic_head_config.feature_fusion,
       low_level=semantic_head_config.low_level,
       low_level_num_filters=semantic_head_config.low_level_num_filters,
       activation=norm_activation_config.activation,
@@ -205,17 +203,16 @@ def build_panoptic_deeplab(
       norm_epsilon=norm_activation_config.norm_epsilon,
       kernel_regularizer=l2_regularizer)
 
-  instance_head = instance_center_head.InstanceCenterHead(
-      level=instnace_head_config.level,
-      num_convs=instnace_head_config.num_convs,
-      kernel_size=instnace_head_config.kernel_size,
-      prediction_kernel_size=instnace_head_config.prediction_kernel_size,
-      num_filters=instnace_head_config.num_filters,
-      use_depthwise_convolution=instnace_head_config.use_depthwise_convolution,
-      upsample_factor=instnace_head_config.upsample_factor,
-      feature_fusion=instnace_head_config.feature_fusion,
-      low_level=instnace_head_config.low_level,
-      low_level_num_filters=instnace_head_config.low_level_num_filters,
+  instance_head = panoptic_deeplab_heads.InstanceHead(
+      level=instance_head_config.level,
+      num_convs=instance_head_config.num_convs,
+      kernel_size=instance_head_config.kernel_size,
+      prediction_kernel_size=instance_head_config.prediction_kernel_size,
+      num_filters=instance_head_config.num_filters,
+      use_depthwise_convolution=instance_head_config.use_depthwise_convolution,
+      upsample_factor=instance_head_config.upsample_factor,
+      low_level=instance_head_config.low_level,
+      low_level_num_filters=instance_head_config.low_level_num_filters,
       activation=norm_activation_config.activation,
       use_sync_bn=norm_activation_config.use_sync_bn,
       norm_momentum=norm_activation_config.norm_momentum,
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
index ed31fe8487c..e51659797d3 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
@@ -15,7 +15,6 @@
 """Tests for factory.py."""
 
 from absl.testing import parameterized
-import numpy as np
 import tensorflow as tf
 from tensorflow.python.distribute import combinations
 
@@ -74,13 +73,9 @@ class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase):
           decoder_type=['aspp', 'fpn'],
           level=[2, 3, 4],
           low_level=[(4, 3), (3, 2)],
-          shared_decoder=[True, False], 
-          fusion_type=[
-              'pyramid_fusion', 
-              'panoptic_fpn_fusion', 
-              'panoptic_deeplab_fusion']))
+          shared_decoder=[True, False]))
   def test_builder(self, input_size, backbone_type, level, 
-                   low_level, decoder_type, shared_decoder, fusion_type):
+                   low_level, decoder_type, shared_decoder):
     num_classes = 10
     input_specs = tf.keras.layers.InputSpec(
         shape=[None, input_size[0], input_size[1], 3])
@@ -90,20 +85,18 @@ def test_builder(self, input_size, backbone_type, level,
         input_size=input_size,
         backbone=backbones.Backbone(type=backbone_type),
         decoder=decoders.Decoder(type=decoder_type),
-        semantic_head=semantic_segmentation.SegmentationHead(
+        semantic_head=panoptic_deeplab_cfg.SemanticHead(
             level=level, 
             num_convs=1, 
             kernel_size=5, 
             prediction_kernel_size=1, 
-            low_level=low_level,
-            feature_fusion=fusion_type),
-        instance_head=panoptic_deeplab_cfg.InstanceCenterHead(
+            low_level=low_level),
+        instance_head=panoptic_deeplab_cfg.InstanceHead(
             level=level,
             num_convs=1,
             kernel_size=5,
             prediction_kernel_size=1,
-            low_level=low_level,
-            feature_fusion=fusion_type),
+            low_level=low_level),
         shared_decoder=shared_decoder)
 
     l2_regularizer = tf.keras.regularizers.l2(5e-5)
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
index 95bc2e55729..ea16e8dc34e 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
@@ -22,8 +22,7 @@
 
 from official.vision.beta.modeling import backbones
 from official.vision.beta.modeling.decoders import aspp
-from official.vision.beta.modeling.heads import segmentation_heads
-from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import instance_center_head
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
 
 class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
@@ -52,18 +51,16 @@ def test_panoptic_deeplab_network_creation(
       instance_decoder = aspp.ASPP(
           level=level, dilation_rates=[6, 12, 18])
 
-    semantic_head = segmentation_heads.SegmentationHead(
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
         num_classes,                                    
         level=level,
         low_level=low_level,
-        low_level_num_filters=[64, 32],
-        feature_fusion='panoptic_deeplab_fusion')
+        low_level_num_filters=(64, 32))
 
-    instance_head = instance_center_head.InstanceCenterHead(
+    instance_head = panoptic_deeplab_heads.InstanceHead(
         level=level,
         low_level=low_level,
-        low_level_num_filters=[64, 32],
-        feature_fusion='panoptic_deeplab_fusion')
+        low_level_num_filters=(64, 32))
 
     model = panoptic_deeplab_model.PanopticDeeplabModel(
         backbone=backbone,
@@ -114,18 +111,16 @@ def test_serialize_deserialize(self, level, low_level, shared_decoder):
       instance_decoder = aspp.ASPP(
           level=level, dilation_rates=[6, 12, 18])
 
-    semantic_head = segmentation_heads.SegmentationHead(
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
         num_classes,
         level=level,
         low_level=low_level,
-        low_level_num_filters=[64, 32],
-        feature_fusion='panoptic_deeplab_fusion')
+        low_level_num_filters=(64, 32))
 
-    instance_head = instance_center_head.InstanceCenterHead(
+    instance_head = panoptic_deeplab_heads.InstanceHead(
         level=level,
         low_level=low_level,
-        low_level_num_filters=[64, 32],
-        feature_fusion='panoptic_deeplab_fusion')
+        low_level_num_filters=(64, 32))
 
     model = panoptic_deeplab_model.PanopticDeeplabModel(
         backbone=backbone,

From 2ad1ec15b20a70949e9c65996ec742f5bf6ce1e4 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 18:29:45 +0530
Subject: [PATCH 19/26] added configs for `SemanticHead` and `InstanceHead`

---
 .../configs/panoptic_deeplab.py               | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
index d509ba669a7..06001b940f1 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
@@ -12,18 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Panoptic Mask R-CNN configuration definition."""
+"""Panoptic Deeplab configuration definition."""
 
 import dataclasses
-from typing import List, Optional, Union
+from typing import List, Tuple, Union
 
 from official.modeling import hyperparams
 from official.vision.beta.configs import common
 from official.vision.beta.configs import backbones
 from official.vision.beta.configs import decoders
-from official.vision.beta.configs import semantic_segmentation
-
-SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead
 
 _COCO_INPUT_PATH_BASE = 'coco/tfrecords'
 _COCO_TRAIN_EXAMPLES = 118287
@@ -31,17 +28,27 @@
 
 
 @dataclasses.dataclass
-class InstanceCenterHead(semantic_segmentation.SegmentationHead):
-  """Instance Center head config."""
-  # None, deeplabv3plus, panoptic_fpn_fusion, 
-  # panoptic_deeplab_fusion or pyramid_fusion
+class PanopticDeeplabHead(hyperparams.Config):
+  """Panoptic Deeplab head config."""
+  level: int = 3
+  num_convs: int = 2
+  num_filters: int = 256
   kernel_size: int = 5
-  feature_fusion: Optional[str] = None  
-  low_level: Union[int, List[int]] = dataclasses.field(
-      default_factory=lambda: [3, 2])
-  low_level_num_filters: Union[int, List[int]] = dataclasses.field(
-      default_factory=lambda: [64, 32])
+  use_depthwise_convolution: bool = False
+  upsample_factor: int = 1
+  low_level: Union[List[int], Tuple[int]] = (3, 2)
+  low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32)
+
 
+@dataclasses.dataclass
+class SemanticHead(PanopticDeeplabHead):
+  """Semantic head config."""
+  prediction_kernel_size: int = 1
+
+@dataclasses.dataclass
+class InstanceHead(PanopticDeeplabHead):
+  """Instance head config."""
+  prediction_kernel_size: int = 1
 
 # pytype: disable=wrong-keyword-args
 @dataclasses.dataclass
@@ -55,7 +62,6 @@ class PanopticDeeplab(hyperparams.Config):
   backbone: backbones.Backbone = backbones.Backbone(
       type='resnet', resnet=backbones.ResNet())
   decoder: decoders.Decoder = decoders.Decoder(type='aspp')
-  semantic_head: SEGMENTATION_HEAD = SEGMENTATION_HEAD()
-  instance_head: InstanceCenterHead = InstanceCenterHead(
-      low_level=[3, 2])
+  semantic_head: SemanticHead = SemanticHead()
+  instance_head: InstanceHead = InstanceHead()
   shared_decoder: bool = False

From df60a195a0913ee126b8e0a6e731d976f33a5046 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Sat, 22 Jan 2022 18:35:40 +0530
Subject: [PATCH 20/26] revert misc changes

---
 official/vision/beta/modeling/heads/segmentation_heads.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py
index e1d182f3850..c6bffac18fa 100644
--- a/official/vision/beta/modeling/heads/segmentation_heads.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads.py
@@ -390,10 +390,12 @@ def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
         - key: A `str` of the level of the multilevel features.
         - values: A `tf.Tensor` of the feature map tensors, whose shape is
             [batch, height_l, width_l, channels].
+        The first is backbone endpoints, and the second is decoder endpoints.
     Returns:
       segmentation prediction mask: A `tf.Tensor` of the segmentation mask
         scores predicted from input features.
     """
+
     backbone_output = inputs[0]
     decoder_output = inputs[1]
     if self._config_dict['feature_fusion'] == 'deeplabv3plus':

From e0a91f29eb18d2d36db3ce81a1b9fb67d71fb43a Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 28 Jan 2022 19:30:32 +0530
Subject: [PATCH 21/26] fixed import error

---
 .../beta/projects/panoptic_maskrcnn/modeling/factory_test.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
index aa16ae174c5..840407aa654 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
@@ -15,6 +15,7 @@
 """Tests for factory.py."""
 
 from absl.testing import parameterized
+import numpy as np
 import tensorflow as tf
 from tensorflow.python.distribute import combinations
 

From 75f304ddd90e7b6adaecad20ad16817edbd32cc9 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 28 Jan 2022 19:31:24 +0530
Subject: [PATCH 22/26] move `PanopticDeepLabFusion` into project dir

---
 .../modeling/heads/panoptic_deeplab_heads.py  |   4 +-
 .../modeling/layers/fusion_layers.py          | 157 ++++++++++++++++++
 2 files changed, 159 insertions(+), 2 deletions(-)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
index 8c8cd9e6647..fac33126c64 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 from official.modeling import tf_utils
-from official.vision.beta.modeling.layers import nn_layers
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import fusion_layers
 from official.vision.beta.ops import spatial_transform_ops
 
 
@@ -118,7 +118,7 @@ def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
         'epsilon': self._config_dict['norm_epsilon'],
     }
 
-    self._panoptic_deeplab_fusion = nn_layers.PanopticDeepLabFusion(
+    self._panoptic_deeplab_fusion = fusion_layers.PanopticDeepLabFusion(
         level=self._config_dict['level'],
         low_level=self._config_dict['low_level'],
         num_projection_filters=self._config_dict['low_level_num_filters'],
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
new file mode 100644
index 00000000000..a534d2a4d7d
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
@@ -0,0 +1,157 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains common building blocks for neural networks."""
+from typing import Any, Callable, Dict, List, Mapping, Optional, Union
+
+import tensorflow as tf
+
+from official.modeling import tf_utils
+
+
+# Type annotations.
+States = Dict[str, tf.Tensor]
+Activation = Union[str, Callable]
+
+
+class PanopticDeepLabFusion(tf.keras.layers.Layer):
+  """Creates a Panoptic DeepLab feature Fusion layer.
+
+  This implements the feature fusion introduced in the paper:
+  Cheng et al. Panoptic-DeepLab
+  (https://arxiv.org/pdf/1911.10194.pdf)
+  """
+
+  def __init__(
+      self,
+      level: int,
+      low_level: List[int] = [3, 2],
+      num_projection_filters: List[int] = [64, 32],
+      num_output_filters: int = 256,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      **kwargs):
+    """Initializes panoptic FPN feature fusion layer.
+
+    Args:
+      level: An `int` level at which the decoder was appled at.
+      low_level: A list of `int` of minimum level to use in feature fusion.
+      num_filters: An `int` number of filters in conv2d layers.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.      
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      interpolation: A `str` interpolation method for upsampling. Defaults to
+        `bilinear`.      
+      **kwargs: Additional keyword arguments to be passed.
+    Returns:
+      A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
+        feature_channel].
+    """
+    super(PanopticDeepLabFusion, self).__init__(**kwargs)
+
+    self._config_dict = {
+        'level': level,
+        'low_level': low_level,
+        'num_projection_filters': num_projection_filters,
+        'num_output_filters': num_output_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+        'interpolation': interpolation
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._channel_axis = -1
+    else:
+      self._channel_axis = 1
+    self._activation = tf_utils.get_activation(activation)
+
+  def build(self, input_shape: List[tf.TensorShape]):
+    conv_op = tf.keras.layers.Conv2D
+    conv_kwargs = {
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': tf.initializers.VarianceScaling(),
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
+             if self._config_dict['use_sync_bn']
+             else tf.keras.layers.BatchNormalization)
+    bn_kwargs = {
+        'axis': self._channel_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+
+    self._projection_convs = []
+    self._projection_norms = []
+    self._fusion_convs = []
+    self._fusion_norms = []
+    for i in range(len(self._config_dict['low_level'])):
+      self._projection_convs.append(
+          conv_op(
+              filters=self._config_dict['num_projection_filters'][i],
+              kernel_size=1,
+              **conv_kwargs))
+      self._fusion_convs.append(
+          conv_op(
+              filters=self._config_dict['num_output_filters'],
+              kernel_size=5,
+              **conv_kwargs))
+      self._projection_norms.append(bn_op(**bn_kwargs))
+      self._fusion_norms.append(bn_op(**bn_kwargs))
+
+  def call(self, inputs, training=None):
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+
+    backbone_output = inputs[0]
+    decoder_output = inputs[1][str(self._config_dict['level'])]
+
+    x = decoder_output
+    for i in range(len(self._config_dict['low_level'])):
+      feature = backbone_output[str(self._config_dict['low_level'][i])]
+      feature = self._projection_convs[i](feature)
+      feature = self._projection_norms[i](feature, training=training)
+      feature = self._activation(feature)
+
+      shape = tf.shape(feature)
+      x = tf.image.resize(
+          x, size=[shape[1], shape[2]],
+          method=self._config_dict['interpolation'])
+      x = tf.concat([x, feature], axis=self._channel_axis)
+
+      x = self._fusion_convs[i](x)
+      x = self._fusion_norms[i](x, training=training)
+      x = self._activation(x)
+    return x
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)

From 78657911f3ec138fe4b11042b35af24f56ec0143 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Fri, 28 Jan 2022 19:32:01 +0530
Subject: [PATCH 23/26] Revert "Added `PanopticDeepLabFusion` layer"

This reverts commit 54fae0423e396e26fe4e3924e98f6952241bda4a.
---
 .../vision/beta/modeling/layers/nn_layers.py  | 130 ------------------
 1 file changed, 130 deletions(-)

diff --git a/official/vision/beta/modeling/layers/nn_layers.py b/official/vision/beta/modeling/layers/nn_layers.py
index dd27c2878f4..51f1db6918e 100644
--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -298,136 +298,6 @@ def pyramid_feature_fusion(inputs, target_level):
 
   return tf.math.add_n(resampled_feats)
 
-class PanopticDeepLabFusion(tf.keras.layers.Layer):
-  """Creates a Panoptic DeepLab feature Fusion layer.
-
-  This implements the feature fusion introduced in the paper:
-  Cheng et al. Panoptic-DeepLab
-  (https://arxiv.org/pdf/1911.10194.pdf)
-  """
-  def __init__(
-      self,
-      level: int,
-      low_level: List[int] = [3, 2],
-      num_projection_filters: List[int] = [64, 32],
-      num_output_filters: int = 256,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      interpolation: str = 'bilinear',
-      **kwargs):
-
-    """Initializes panoptic FPN feature fusion layer.
-
-    Args:
-      level: An `int` level at which the decoder was appled at.
-      low_level: A list of `int` of minimum level to use in feature fusion.
-      num_filters: An `int` number of filters in conv2d layers.
-      activation: A `str` name of the activation function.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.      
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      interpolation: A `str` interpolation method for upsampling. Defaults to
-        `bilinear`.      
-      **kwargs: Additional keyword arguments to be passed.
-    Returns:
-      A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
-        feature_channel].
-    """
-    super(PanopticDeepLabFusion, self).__init__(**kwargs)
-
-    self._config_dict = {
-        'level': level,
-        'low_level': low_level,
-        'num_projection_filters': num_projection_filters,
-        'num_output_filters': num_output_filters,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-        'interpolation': interpolation
-    }
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._channel_axis = -1
-    else:
-      self._channel_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: List[tf.TensorShape]):
-    conv_op = tf.keras.layers.Conv2D
-    conv_kwargs = {
-        'padding': 'same',
-        'use_bias': False,
-        'kernel_initializer': tf.initializers.VarianceScaling(),
-        'kernel_regularizer': self._config_dict['kernel_regularizer'],
-    }
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._channel_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    self._projection_convs = []
-    self._projection_norms = []
-    self._fusion_convs = []
-    self._fusion_norms = []
-    for i in range(len(self._config_dict['low_level'])):
-      self._projection_convs.append(
-          conv_op(
-              filters=self._config_dict['num_projection_filters'][i],
-              kernel_size=1,
-              **conv_kwargs))
-      self._fusion_convs.append(
-          conv_op(
-              filters=self._config_dict['num_output_filters'],
-              kernel_size=5,
-              **conv_kwargs))
-      self._projection_norms.append(bn_op(**bn_kwargs))
-      self._fusion_norms.append(bn_op(**bn_kwargs))
-
-  def call(self, inputs, training=None):
-    if training is None:
-      training = tf.keras.backend.learning_phase()
-
-    backbone_output = inputs[0]
-    decoder_output = inputs[1][str(self._config_dict['level'])]
-
-    x = decoder_output
-    for i in range(len(self._config_dict['low_level'])):
-      feature = backbone_output[str(self._config_dict['low_level'][i])]
-      feature = self._projection_convs[i](feature)
-      feature = self._projection_norms[i](feature, training=training)
-      feature = self._activation(feature)
-
-      shape = tf.shape(feature)
-      x = tf.image.resize(
-          x, size=[shape[1], shape[2]],
-          method=self._config_dict['interpolation'])
-      x = tf.concat([x, feature], axis=self._channel_axis)
-
-      x = self._fusion_convs[i](x)
-      x = self._fusion_norms[i](x, training=training)
-      x = self._activation(x)
-    return x
-
-  def get_config(self) -> Mapping[str, Any]:
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
 
 class PanopticFPNFusion(tf.keras.Model):
   """Creates a Panoptic FPN feature Fusion layer.

From 2d739bb8ed89baff88a42bd5e42420b2968fdab7 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Tue, 15 Feb 2022 21:22:45 +0530
Subject: [PATCH 24/26] import code PostProcessor code from deeplab2

---
 .../modeling/layers/panoptic_deeplab_merge.py | 468 ++++++++++++++++++
 .../layers/panoptic_deeplab_merge_test.py     | 142 ++++++
 2 files changed, 610 insertions(+)
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
 create mode 100644 official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
new file mode 100644
index 00000000000..217353fd88b
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
@@ -0,0 +1,468 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""This file contains functions to post-process Panoptic-DeepLab results
+
+Note that the postprocessing class and the supporting functions are branched
+from https://github.com/google-research/deeplab2/blob/main/model/post_processor/panoptic_deeplab.py
+"""
+
+import functools
+from typing import List, Tuple, Dict, Text
+
+import tensorflow as tf
+
+
+def _add_zero_padding(input_tensor: tf.Tensor, kernel_size: int,
+                      rank: int) -> tf.Tensor:
+  """Adds zero-padding to the input_tensor."""
+  pad_total = kernel_size - 1
+  pad_begin = pad_total // 2
+  pad_end = pad_total - pad_begin
+  if rank == 3:
+    return tf.pad(
+        input_tensor,
+        paddings=[[pad_begin, pad_end], [pad_begin, pad_end], [0, 0]])
+  else:
+    return tf.pad(
+        input_tensor,
+        paddings=[[0, 0], [pad_begin, pad_end], [pad_begin, pad_end], [0, 0]])
+
+
+def _get_semantic_predictions(semantic_logits: tf.Tensor) -> tf.Tensor:
+  """Computes the semantic classes from the predictions.
+
+  Args:
+    semantic_logits: A tf.tensor of shape [batch, height, width, classes].
+
+  Returns:
+    A tf.Tensor containing the semantic class prediction of shape
+      [batch, height, width].
+  """
+  return tf.argmax(semantic_logits, axis=-1, output_type=tf.int32)
+
+
+def _get_instance_centers_from_heatmap(
+        center_heatmap: tf.Tensor,
+        center_threshold: float,
+        nms_kernel_size: int,
+        keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor]:
+  """Computes a list of instance centers.
+
+  Args:
+    center_heatmap: A tf.Tensor of shape [height, width, 1].
+    center_threshold: A float setting the threshold for the center heatmap.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep (K).
+      Non-positive values will keep all centers.
+
+  Returns:
+    A tuple of
+    - tf.Tensor of shape [N, 2] containing N center coordinates (after
+      non-maximum suppression) in (y, x) order.
+    - tf.Tensor of shape [height, width] containing the center heatmap after
+      non-maximum suppression.
+  """
+  # Threshold center map.
+  center_heatmap = tf.where(
+      tf.greater(center_heatmap, center_threshold), center_heatmap, 0.0)
+
+  # Non-maximum suppression.
+  padded_map = _add_zero_padding(center_heatmap, nms_kernel_size, rank=3)
+  pooled_center_heatmap = tf.keras.backend.pool2d(
+      tf.expand_dims(padded_map, 0),
+      pool_size=(nms_kernel_size, nms_kernel_size),
+      strides=(1, 1),
+      padding='valid',
+      pool_mode='max')
+  center_heatmap = tf.where(
+      tf.equal(pooled_center_heatmap, center_heatmap), center_heatmap, 0.0)
+  center_heatmap = tf.squeeze(center_heatmap, axis=[0, 3])
+
+  # `centers` is of shape (N, 2) with (y, x) order of the second dimension.
+  centers = tf.where(tf.greater(center_heatmap, 0.0))
+
+  if keep_k_centers > 0 and tf.shape(centers)[0] > keep_k_centers:
+    topk_scores, _ = tf.math.top_k(
+        tf.reshape(center_heatmap, [-1]), keep_k_centers, sorted=False)
+    centers = tf.where(tf.greater(center_heatmap, topk_scores[-1]))
+
+  return centers, center_heatmap
+
+
+def _find_closest_center_per_pixel(centers: tf.Tensor,
+                                   center_offsets: tf.Tensor) -> tf.Tensor:
+  """Assigns all pixels to their closest center.
+
+  Args:
+    centers: A tf.Tensor of shape [N, 2] containing N centers with coordinate
+      order (y, x).
+    center_offsets: A tf.Tensor of shape [height, width, 2].
+
+  Returns:
+    A tf.Tensor of shape [height, width] containing the index of the closest
+      center, per pixel.
+  """
+  height = tf.shape(center_offsets)[0]
+  width = tf.shape(center_offsets)[1]
+
+  x_coord, y_coord = tf.meshgrid(tf.range(width), tf.range(height))
+  coord = tf.stack([y_coord, x_coord], axis=-1)
+
+  center_per_pixel = tf.cast(coord, tf.float32) + center_offsets
+
+  # centers: [N, 2] -> [N, 1, 2].
+  # center_per_pixel: [H, W, 2] -> [1, H*W, 2].
+  centers = tf.cast(tf.expand_dims(centers, 1), tf.float32)
+  center_per_pixel = tf.reshape(center_per_pixel, [height*width, 2])
+  center_per_pixel = tf.expand_dims(center_per_pixel, 0)
+
+  # distances: [N, H*W].
+  distances = tf.norm(centers - center_per_pixel, axis=-1)
+
+  return tf.reshape(tf.argmin(distances, axis=0), [height, width])
+
+
+def _get_instances_from_heatmap_and_offset(
+        semantic_segmentation: tf.Tensor, center_heatmap: tf.Tensor,
+        center_offsets: tf.Tensor, center_threshold: float,
+        thing_class_ids: tf.Tensor, nms_kernel_size: int,
+        keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+  """Computes the instance assignment per pixel.
+
+  Args:
+    semantic_segmentation: A tf.Tensor containing the semantic labels of shape
+      [height, width].
+    center_heatmap: A tf.Tensor of shape [height, width, 1].
+    center_offsets: A tf.Tensor of shape [height, width, 2].
+    center_threshold: A float setting the threshold for the center heatmap.
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep.
+      Negative values will keep all centers.
+
+  Returns:
+    A tuple of:
+    - tf.Tensor containing the instance segmentation (filtered with the `thing`
+      segmentation from the semantic segmentation output) with shape
+      [height, width].
+    - tf.Tensor containing the processed centermap with shape [height, width].
+    - tf.Tensor containing instance scores (where higher "score" is a reasonable
+      signal of a higher confidence detection.) Will be of shape [height, width]
+      with the score for a pixel being the score of the instance it belongs to.
+      The scores will be zero for pixels in background/"stuff" regions.
+  """
+  thing_segmentation = tf.zeros_like(semantic_segmentation)
+  for thing_id in thing_class_ids:
+    thing_segmentation = tf.where(tf.equal(semantic_segmentation, thing_id),
+                                  1,
+                                  thing_segmentation)
+
+  centers, processed_center_heatmap = _get_instance_centers_from_heatmap(
+      center_heatmap, center_threshold, nms_kernel_size, keep_k_centers)
+  if tf.shape(centers)[0] == 0:
+    return (tf.zeros_like(semantic_segmentation), processed_center_heatmap,
+            tf.zeros_like(processed_center_heatmap))
+
+  instance_center_index = _find_closest_center_per_pixel(
+      centers, center_offsets)
+  # Instance IDs should start with 1. So we use the index into the centers, but
+  # shifted by 1.
+  instance_segmentation = tf.cast(instance_center_index, tf.int32) + 1
+
+  # The value of the heatmap at an instance's center is used as the score
+  # for that instance.
+  instance_scores = tf.gather_nd(processed_center_heatmap, centers)
+  tf.debugging.assert_shapes([
+      (centers, ('N', 2)),
+      (instance_scores, ('N',)),
+  ])
+  # This will map the instance scores back to the image space: where each pixel
+  # has a value equal to the score of its instance.
+  flat_center_index = tf.reshape(instance_center_index, [-1])
+  instance_score_map = tf.gather(instance_scores, flat_center_index)
+  instance_score_map = tf.reshape(instance_score_map,
+                                  tf.shape(instance_segmentation))
+  instance_score_map *= tf.cast(thing_segmentation, tf.float32)
+
+  return (thing_segmentation * instance_segmentation, processed_center_heatmap,
+          instance_score_map)
+
+
+@tf.function
+def _get_panoptic_predictions(
+    semantic_logits: tf.Tensor, center_heatmap: tf.Tensor,
+    center_offsets: tf.Tensor, center_threshold: float,
+    thing_class_ids: tf.Tensor, label_divisor: int, stuff_area_limit: int,
+    void_label: int, nms_kernel_size: int, keep_k_centers: int
+) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
+  """Computes the semantic class and instance ID per pixel.
+
+  Args:
+    semantic_logits: A tf.Tensor of shape [batch, height, width, classes].
+    center_heatmap: A tf.Tensor of shape [batch, height, width, 1].
+    center_offsets: A tf.Tensor of shape [batch, height, width, 2].
+    center_threshold: A float setting the threshold for the center heatmap.
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    label_divisor: An integer specifying the label divisor of the dataset.
+    stuff_area_limit: An integer specifying the number of pixels that stuff
+      regions need to have at least. The stuff region will be included in the
+      panoptic prediction, only if its area is larger than the limit; otherwise,
+      it will be re-assigned as void_label.
+    void_label: An integer specifying the void label.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep.
+      Negative values will keep all centers.
+
+  Returns:
+    A tuple of:
+    - the panoptic prediction as tf.Tensor with shape [batch, height, width].
+    - the semantic prediction as tf.Tensor with shape [batch, height, width].
+    - the instance prediction as tf.Tensor with shape [batch, height, width].
+    - the centermap prediction as tf.Tensor with shape [batch, height, width].
+    - the instance score maps as tf.Tensor with shape [batch, height, width].
+  """
+  semantic_prediction = _get_semantic_predictions(semantic_logits)
+  batch_size = tf.shape(semantic_logits)[0]
+
+  instance_map_lists = tf.TensorArray(
+      tf.int32, size=batch_size, dynamic_size=False)
+  center_map_lists = tf.TensorArray(
+      tf.float32, size=batch_size, dynamic_size=False)
+  instance_score_map_lists = tf.TensorArray(
+      tf.float32, size=batch_size, dynamic_size=False)
+
+  for i in tf.range(batch_size):
+    (instance_map, center_map,
+     instance_score_map) = _get_instances_from_heatmap_and_offset(
+         semantic_prediction[i, ...], center_heatmap[i, ...],
+         center_offsets[i, ...], center_threshold, thing_class_ids,
+         nms_kernel_size, keep_k_centers)
+    instance_map_lists = instance_map_lists.write(i, instance_map)
+    center_map_lists = center_map_lists.write(i, center_map)
+    instance_score_map_lists = instance_score_map_lists.write(
+        i, instance_score_map)
+
+  # This does not work with unknown shapes.
+  instance_maps = instance_map_lists.stack()
+  center_maps = center_map_lists.stack()
+  instance_score_maps = instance_score_map_lists.stack()
+
+  panoptic_prediction = _merge_semantic_and_instance_maps(
+      semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+      stuff_area_limit, void_label)
+  return (panoptic_prediction, semantic_prediction, instance_maps, center_maps,
+          instance_score_maps)
+
+
+@tf.function
+def _merge_semantic_and_instance_maps(
+        semantic_prediction: tf.Tensor,
+        instance_maps: tf.Tensor,
+        thing_class_ids: tf.Tensor,
+        label_divisor: int,
+        stuff_area_limit: int,
+        void_label: int) -> tf.Tensor:
+  """Merges semantic and instance maps to obtain panoptic segmentation.
+
+  This function merges the semantic segmentation and class-agnostic
+  instance segmentation to form the panoptic segmentation. In particular,
+  the class label of each instance mask is inferred from the majority
+  votes from the corresponding pixels in the semantic segmentation. This
+  operation is first poposed in the DeeperLab paper and adopted by the
+  Panoptic-DeepLab.
+
+  - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093.
+  - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020.
+
+  Note that this function only supports batch = 1 for simplicity. Additionally,
+  this function has a slightly different implementation from the provided
+  TensorFlow implementation `merge_ops` but with a similar performance. This
+  function is mainly used as a backup solution when you could not successfully
+  compile the provided TensorFlow implementation. To reproduce our results,
+  please use the provided TensorFlow implementation (i.e., not use this
+  function, but the `merge_ops.merge_semantic_and_instance_maps`).
+
+  Args:
+    semantic_prediction: A tf.Tensor of shape [batch, height, width].
+    instance_maps: A tf.Tensor of shape [batch, height, width].
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    label_divisor: An integer specifying the label divisor of the dataset.
+    stuff_area_limit: An integer specifying the number of pixels that stuff
+      regions need to have at least. The stuff region will be included in the
+      panoptic prediction, only if its area is larger than the limit; otherwise,
+      it will be re-assigned as void_label.
+    void_label: An integer specifying the void label.
+
+  Returns:
+    panoptic_prediction: A tf.Tensor with shape [batch, height, width].
+  """
+  prediction_shape = semantic_prediction.get_shape().as_list()
+  # This implementation only supports batch size of 1. Since model construction
+  # might lose batch size information (and leave it to None), override it here.
+  prediction_shape[0] = 1
+  semantic_prediction = tf.ensure_shape(semantic_prediction, prediction_shape)
+  instance_maps = tf.ensure_shape(instance_maps, prediction_shape)
+
+  # Default panoptic_prediction to have semantic label = void_label.
+  panoptic_prediction = tf.ones_like(
+      semantic_prediction) * void_label * label_divisor
+
+  # Start to paste predicted `thing` regions to panoptic_prediction.
+  # Infer `thing` segmentation regions from semantic prediction.
+  semantic_thing_segmentation = tf.zeros_like(semantic_prediction,
+                                              dtype=tf.bool)
+  for thing_class in thing_class_ids:
+    semantic_thing_segmentation = tf.math.logical_or(
+        semantic_thing_segmentation,
+        semantic_prediction == thing_class)
+  # Keep track of how many instances for each semantic label.
+  num_instance_per_semantic_label = tf.TensorArray(
+      tf.int32, size=0, dynamic_size=True, clear_after_read=False)
+  instance_ids, _ = tf.unique(tf.reshape(instance_maps, [-1]))
+  for instance_id in instance_ids:
+    # Instance ID 0 is reserved for crowd region.
+    if instance_id == 0:
+      continue
+    thing_mask = tf.math.logical_and(instance_maps == instance_id,
+                                     semantic_thing_segmentation)
+    if tf.reduce_sum(tf.cast(thing_mask, tf.int32)) == 0:
+      continue
+    semantic_bin_counts = tf.math.bincount(
+        tf.boolean_mask(semantic_prediction, thing_mask))
+    semantic_majority = tf.cast(
+        tf.math.argmax(semantic_bin_counts), tf.int32)
+
+    while num_instance_per_semantic_label.size() <= semantic_majority:
+      num_instance_per_semantic_label = num_instance_per_semantic_label.write(
+          num_instance_per_semantic_label.size(), 0)
+
+    new_instance_id = (
+        num_instance_per_semantic_label.read(semantic_majority) + 1)
+    num_instance_per_semantic_label = num_instance_per_semantic_label.write(
+        semantic_majority, new_instance_id)
+    panoptic_prediction = tf.where(
+        thing_mask,
+        tf.ones_like(panoptic_prediction) * semantic_majority * label_divisor
+        + new_instance_id,
+        panoptic_prediction)
+
+  # Done with `num_instance_per_semantic_label` tensor array.
+  num_instance_per_semantic_label.close()
+
+  # Start to paste predicted `stuff` regions to panoptic prediction.
+  instance_stuff_regions = instance_maps == 0
+  semantic_ids, _ = tf.unique(tf.reshape(semantic_prediction, [-1]))
+  for semantic_id in semantic_ids:
+    if tf.reduce_sum(tf.cast(thing_class_ids == semantic_id, tf.int32)) > 0:
+      continue
+    # Check stuff area.
+    stuff_mask = tf.math.logical_and(semantic_prediction == semantic_id,
+                                     instance_stuff_regions)
+    stuff_area = tf.reduce_sum(tf.cast(stuff_mask, tf.int32))
+    if stuff_area >= stuff_area_limit:
+      panoptic_prediction = tf.where(
+          stuff_mask,
+          tf.ones_like(panoptic_prediction) * semantic_id * label_divisor,
+          panoptic_prediction)
+
+  return panoptic_prediction
+
+
+class PostProcessor(tf.keras.layers.Layer):
+  """This class contains code of a Panoptic-Deeplab post-processor."""
+
+  def __init__(
+      self,
+      center_score_threshold: float,
+      thing_class_ids: List[int],
+      label_divisor: int,
+      stuff_area_limit: int,
+      ignore_label: int,
+      nms_kernel: int,
+      keep_k_centers: int, 
+      **kwargs):
+    """Initializes a Panoptic-Deeplab post-processor.
+
+    Args:
+      center_threshold: A float setting the threshold for the center heatmap.
+      thing_class_ids: An integer list shape [N] containing N thing indices.
+      label_divisor: An integer specifying the label divisor of the dataset.
+      stuff_area_limit: An integer specifying the number of pixels that stuff
+        regions need to have at least. The stuff region will be included in the
+        panoptic prediction, only if its area is larger than the limit; 
+        otherwise, it will be re-assigned as void_label.
+      void_label: An integer specifying the void label.
+      nms_kernel_size: An integer specifying the nms kernel size.
+      keep_k_centers: An integer specifying the number of centers to keep.
+        Negative values will keep all centers.          
+    """
+    super(PostProcessor, self).__init__(**kwargs)
+    
+    self._config_dict = {
+        'center_score_threshold': center_score_threshold,
+        'thing_class_ids': thing_class_ids,
+        'label_divisor': label_divisor,
+        'stuff_area_limit': stuff_area_limit,
+        'ignore_label': ignore_label,
+        'nms_kernel': nms_kernel,
+        'keep_k_centers': keep_k_centers
+    }
+    self._post_processor = functools.partial(
+        _get_panoptic_predictions,
+        center_threshold=center_score_threshold,
+        thing_class_ids=tf.convert_to_tensor(thing_class_ids),
+        label_divisor=label_divisor,
+        stuff_area_limit=stuff_area_limit,
+        void_label=ignore_label,
+        nms_kernel_size=nms_kernel,
+        keep_k_centers=keep_k_centers)
+
+  def call(self, result_dict: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
+    """Performs the post-processing given model predicted results.
+
+    Args:
+      result_dict: A dictionary of tf.Tensor containing model results. The dict
+      has to contain
+        - segmentation_outputs
+        - instance_center_prediction
+        - instance_center_regression
+
+    Returns:
+      The post-processed dict of tf.Tensor, containing the following keys:
+        - panoptic_outputs
+        - category_mask
+        - instance_mask
+        - instance_centers
+        - instance_score
+    """
+    processed_dict = {}
+    (processed_dict['panoptic_outputs'],
+     processed_dict['category_mask'],
+     processed_dict['instance_mask'],
+     processed_dict['instance_centers'],
+     processed_dict['instance_scores']
+     ) = self._post_processor(
+         tf.nn.softmax(result_dict['segmentation_outputs'], axis=-1),
+         result_dict['instance_center_prediction'],
+         result_dict['instance_center_regression'])
+    return processed_dict
+
+  def get_config(self):
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
new file mode 100644
index 00000000000..699155f044a
--- /dev/null
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
@@ -0,0 +1,142 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test for panoptic_deeplab.py.
+
+Note that the tests are branched from
+https://raw.githubusercontent.com/google-research/deeplab2/main/model/post_processor/panoptic_deeplab_test.py
+"""
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
+
+
+class PostProcessingTest(tf.test.TestCase):
+
+  def test_py_func_merge_semantic_and_instance_maps_can_run(self):
+    batch = 1
+    height = 5
+    width = 5
+    semantic_prediction = tf.random.uniform((batch, height, width),
+                                            minval=0,
+                                            maxval=20,
+                                            dtype=tf.int32)
+    instance_maps = tf.random.uniform((batch, height, width),
+                                      minval=0,
+                                      maxval=3,
+                                      dtype=tf.int32)
+    thing_class_ids = tf.convert_to_tensor([1, 2, 3])
+    label_divisor = 256
+    stuff_area_limit = 3
+    void_label = 255
+    panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
+        semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+        stuff_area_limit, void_label)
+    self.assertListEqual(semantic_prediction.get_shape().as_list(),
+                         panoptic_prediction.get_shape().as_list())
+
+  def test_merge_semantic_and_instance_maps_with_a_simple_example(self):
+    semantic_prediction = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, 1, 1, 0],
+          [0, 2, 2, 0],
+          [2, 2, 3, 3]]], dtype=tf.int32)
+    instance_maps = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, 0, 0, 0],
+          [0, 1, 1, 0],
+          [2, 2, 3, 3]]], dtype=tf.int32)
+    thing_class_ids = tf.convert_to_tensor([2, 3])
+    label_divisor = 256
+    stuff_area_limit = 3
+    void_label = 255
+    # The expected_panoptic_prediction is computed as follows.
+    # For `thing` segmentation, instance 1, 2, and 3 are kept, but instance 3
+    # will have a new instance ID 1, since it is the first instance in its
+    # own semantic label.
+    # For `stuff` segmentation, class-0 region is kept, while class-1 region
+    # is re-labeled as `void_label * label_divisor` since its area is smaller
+    # than stuff_area_limit.
+    expected_panoptic_prediction = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, void_label * label_divisor, void_label * label_divisor, 0],
+          [0, 2 * label_divisor + 1, 2 * label_divisor + 1, 0],
+          [2 * label_divisor + 2, 2 * label_divisor + 2, 3 * label_divisor + 1,
+           3 * label_divisor + 1]]], dtype=tf.int32)
+    panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
+        semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+        stuff_area_limit, void_label)
+    np.testing.assert_equal(expected_panoptic_prediction.numpy(),
+                            panoptic_prediction.numpy())
+
+  def test_gets_panoptic_predictions_with_score(self):
+    batch = 1
+    height = 5
+    width = 5
+    classes = 3
+
+    semantic_logits = tf.random.uniform((batch, 1, 1, classes))
+    semantic_logits = tf.tile(semantic_logits, (1, height, width, 1))
+
+    center_heatmap = tf.convert_to_tensor([
+        [1.0, 0.0, 0.0, 0.0, 0.0],
+        [0.8, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.1, 0.7],
+        [0.0, 0.0, 0.0, 0.0, 0.2],
+    ], dtype=tf.float32)
+    center_heatmap = tf.expand_dims(center_heatmap, 0)
+    center_heatmap = tf.expand_dims(center_heatmap, 3)
+
+    center_offsets = tf.zeros((batch, height, width, 2))
+    center_threshold = 0.0
+    thing_class_ids = tf.range(classes)  # No "stuff" classes.
+    label_divisor = 256
+    stuff_area_limit = 16
+    void_label = classes
+    nms_kernel_size = 3
+    keep_k_centers = 2
+
+    result = panoptic_deeplab_merge._get_panoptic_predictions(
+        semantic_logits, center_heatmap, center_offsets, center_threshold,
+        thing_class_ids, label_divisor, stuff_area_limit, void_label,
+        nms_kernel_size, keep_k_centers)
+    instance_maps = result[2].numpy()
+    instance_scores = result[4].numpy()
+
+    self.assertSequenceEqual(instance_maps.shape, (batch, height, width))
+    expected_instances = [[
+        [1, 1, 1, 1, 2],
+        [1, 1, 1, 2, 2],
+        [1, 1, 2, 2, 2],
+        [1, 2, 2, 2, 2],
+        [1, 2, 2, 2, 2],
+    ]]
+    np.testing.assert_array_equal(instance_maps, expected_instances)
+
+    self.assertSequenceEqual(instance_scores.shape, (batch, height, width))
+    expected_instance_scores = [[
+        [1.0, 1.0, 1.0, 1.0, 0.7],
+        [1.0, 1.0, 1.0, 0.7, 0.7],
+        [1.0, 1.0, 0.7, 0.7, 0.7],
+        [1.0, 0.7, 0.7, 0.7, 0.7],
+        [1.0, 0.7, 0.7, 0.7, 0.7],
+    ]]
+    np.testing.assert_array_almost_equal(instance_scores,
+                                         expected_instance_scores)
+
+
+if __name__ == '__main__':
+  tf.test.main()

From 4ace44be9beb9783fb8d1c716b8d8c13a3a9fd16 Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Tue, 15 Feb 2022 21:23:57 +0530
Subject: [PATCH 25/26] added post processing layer

---
 .../panoptic_maskrcnn/modeling/factory.py     | 14 +++-
 .../modeling/panoptic_deeplab_model.py        | 11 ++-
 .../modeling/panoptic_deeplab_model_test.py   | 75 +++++++++++++------
 3 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
index 64e1760e2f5..7ac1c9d5b0a 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
@@ -26,6 +26,7 @@
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
 
 
 def build_panoptic_maskrcnn(
@@ -220,11 +221,22 @@ def build_panoptic_deeplab(
       norm_epsilon=norm_activation_config.norm_epsilon,
       kernel_regularizer=l2_regularizer)
 
+  post_processing_config = model_config.post_processor
+  post_processor = panoptic_deeplab_merge.PostProcessor(
+      center_score_threshold=post_processing_config.center_score_threshold,
+      thing_class_ids=post_processing_config.thing_class_ids,
+      label_divisor=post_processing_config.label_divisor,
+      stuff_area_limit=post_processing_config.stuff_area_limit,
+      ignore_label=post_processing_config.ignore_label,
+      nms_kernel=post_processing_config.nms_kernel,
+      keep_k_centers=post_processing_config.keep_k_centers)
+
   model = panoptic_deeplab_model.PanopticDeeplabModel(
       backbone=backbone, 
       semantic_decoder=semantic_decoder,
       instance_decoder=instance_decoder,
       semantic_head=semantic_head,
-      instance_head=instance_head)
+      instance_head=instance_head, 
+      post_processor=post_processor)
 
   return model
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
index 2e963e909c9..d7f3dae752d 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
@@ -16,7 +16,7 @@
 from typing import Any, Mapping, Optional, Union
 
 import tensorflow as tf
-
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
 
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class PanopticDeeplabModel(tf.keras.Model):
@@ -29,6 +29,7 @@ def __init__(
       semantic_head: tf.keras.layers.Layer,
       instance_head: tf.keras.layers.Layer,
       instance_decoder: Optional[tf.keras.Model] = None,
+      post_processor: Optional[panoptic_deeplab_merge.PostProcessor] = None,
       **kwargs):
     """
     Args:
@@ -46,13 +47,15 @@ def __init__(
         'semantic_decoder': semantic_decoder,
         'instance_decoder': instance_decoder,
         'semantic_head': semantic_head,
-        'instance_head': instance_head
+        'instance_head': instance_head,
+        'post_processor': post_processor
     }
     self.backbone = backbone
     self.semantic_decoder = semantic_decoder
     self.instance_decoder = instance_decoder
     self.semantic_head = semantic_head
     self.instance_head = instance_head
+    self.post_processor = post_processor
 
   def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor:
     if training is None:
@@ -83,6 +86,10 @@ def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor:
         'instance_center_regression': 
             instance_outputs['instance_center_regression'],
     }
+    if training:
+      return outputs
+
+    outputs = self.post_processor(outputs)
     return outputs
 
   @property
diff --git a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
index ea16e8dc34e..ce4544a7c8b 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
@@ -24,6 +24,7 @@
 from official.vision.beta.modeling.decoders import aspp
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
 
 class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
 
@@ -37,8 +38,9 @@ class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
   def test_panoptic_deeplab_network_creation(
       self, input_size, level, low_level, shared_decoder, training):
     """Test for creation of a panoptic deep lab network."""
+    batch_size = 2 if training else 1
     num_classes = 10
-    inputs = np.random.rand(2, input_size, input_size, 3)
+    inputs = np.random.rand(batch_size, input_size, input_size, 3)
     tf.keras.backend.set_image_data_format('channels_last')
     backbone = backbones.ResNet(model_id=50)
 
@@ -62,35 +64,52 @@ def test_panoptic_deeplab_network_creation(
         low_level=low_level,
         low_level_num_filters=(64, 32))
 
+    post_processor = panoptic_deeplab_merge.PostProcessor(
+        center_score_threshold=0.1,
+        thing_class_ids=[1, 2, 3, 4],
+        label_divisor=[256],
+        stuff_area_limit=4096,
+        ignore_label=0,
+        nms_kernel=41,
+        keep_k_centers=41)
+
     model = panoptic_deeplab_model.PanopticDeeplabModel(
         backbone=backbone,
         semantic_decoder=semantic_decoder,
         instance_decoder=instance_decoder,
         semantic_head=semantic_head,
-        instance_head=instance_head)
+        instance_head=instance_head,
+        post_processor=post_processor)
 
     outputs = model(inputs, training=training)
 
-      
-    self.assertIn('segmentation_outputs', outputs)
-    self.assertIn('instance_center_prediction', outputs)
-    self.assertIn('instance_center_regression', outputs)
-
-    self.assertAllEqual(
-        [2, input_size // (2**low_level[-1]), 
-         input_size //(2**low_level[-1]), 
-         num_classes],
-        outputs['segmentation_outputs'].numpy().shape)
-    self.assertAllEqual(
-        [2, input_size // (2**low_level[-1]),
-         input_size // (2**low_level[-1]),
-         1],
-        outputs['instance_center_prediction'].numpy().shape)
-    self.assertAllEqual(
-        [2, input_size // (2**low_level[-1]),
-         input_size // (2**low_level[-1]),
-         2],
-        outputs['instance_center_regression'].numpy().shape)
+    if training:
+      self.assertIn('segmentation_outputs', outputs)
+      self.assertIn('instance_center_prediction', outputs)
+      self.assertIn('instance_center_regression', outputs)
+
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size //(2**low_level[-1]),
+           num_classes],
+          outputs['segmentation_outputs'].numpy().shape)
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size // (2**low_level[-1]),
+           1],
+          outputs['instance_center_prediction'].numpy().shape)
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size // (2**low_level[-1]),
+           2],
+          outputs['instance_center_regression'].numpy().shape)
+
+    else:
+      self.assertIn('panoptic_outputs', outputs)
+      self.assertIn('category_mask', outputs)
+      self.assertIn('instance_mask', outputs)
+      self.assertIn('instance_centers', outputs)
+      self.assertIn('instance_scores', outputs)
 
   @combinations.generate(
       combinations.combine(
@@ -122,12 +141,22 @@ def test_serialize_deserialize(self, level, low_level, shared_decoder):
         low_level=low_level,
         low_level_num_filters=(64, 32))
 
+    post_processor = panoptic_deeplab_merge.PostProcessor(
+        center_score_threshold=0.1,
+        thing_class_ids=[1, 2, 3, 4],
+        label_divisor=[256],
+        stuff_area_limit=4096,
+        ignore_label=0,
+        nms_kernel=41,
+        keep_k_centers=41)
+
     model = panoptic_deeplab_model.PanopticDeeplabModel(
         backbone=backbone,
         semantic_decoder=semantic_decoder,
         instance_decoder=instance_decoder,
         semantic_head=semantic_head,
-        instance_head=instance_head)
+        instance_head=instance_head,
+        post_processor=post_processor)
 
     config = model.get_config()
     new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config)

From 8b60a5a8c7d4fd0e56d8a1098d9432f49ce0caad Mon Sep 17 00:00:00 2001
From: srihari-humbarwadi <sriharihumbarwadi97@gmail.com>
Date: Tue, 15 Feb 2022 21:24:18 +0530
Subject: [PATCH 26/26] added config for post processing layer

---
 .../panoptic_maskrcnn/configs/panoptic_deeplab.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
index 06001b940f1..11d5290494f 100644
--- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
@@ -50,10 +50,20 @@ class InstanceHead(PanopticDeeplabHead):
   """Instance head config."""
   prediction_kernel_size: int = 1
 
-# pytype: disable=wrong-keyword-args
+@dataclasses.dataclass
+class PanopticDeeplabPostProcessor(hyperparams.Config):
+  """Panoptic Deeplab PostProcessing config."""
+  center_score_threshold: float = 0.1
+  thing_class_ids: List[int] = dataclasses.field(default_factory=list)
+  label_divisor: int = 256 * 256 * 256
+  stuff_area_limit: int = 4096
+  ignore_label: int = 0
+  nms_kernel: int = 41
+  keep_k_centers: int = 400
+
 @dataclasses.dataclass
 class PanopticDeeplab(hyperparams.Config):
-  """Panoptic Mask R-CNN model config."""
+  """Panoptic Deeplab model config."""
   num_classes: int = 0
   input_size: List[int] = dataclasses.field(default_factory=list)
   min_level: int = 3
@@ -65,3 +75,4 @@ class PanopticDeeplab(hyperparams.Config):
   semantic_head: SemanticHead = SemanticHead()
   instance_head: InstanceHead = InstanceHead()
   shared_decoder: bool = False
+  post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor()