diff --git a/HLTrigger/Configuration/scripts/hltFindDuplicates b/HLTrigger/Configuration/scripts/hltFindDuplicates
index 0dd2f41c92eee..6a771dd358fb3 100755
--- a/HLTrigger/Configuration/scripts/hltFindDuplicates
+++ b/HLTrigger/Configuration/scripts/hltFindDuplicates
@@ -1,11 +1,33 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
+"""hltFindDuplicates: script to find duplicate modules of an HLT configuration.
+
+Input.
+ Path to a local cmsRun configuration file, or stdin.
+
+Output.
+ A directory containing
+ (1) the input cmsRun configuration, and
+ (2) text files listing the groups of duplicate modules.
+
+Examples.
+
+ # input: local configuration file
+ hltFindDuplicates tmp.py -o output_dir
+
+ # input: stdin
+ hltConfigFromDB --configName /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir
+ hltGetConfiguration /dev/CMSSW_X_Y_0/GRun/Vn | hltFindDuplicates -o output_dir -x realData=0 globalTag=@
+"""
+import os
+import sys
+import argparse
+import re
+import itertools
+import shutil
 
-from __future__ import print_function
-import sys, imp, re, itertools
-from HLTrigger.Configuration.Tools.frozendict import frozendict
 import FWCore.ParameterSet.Config as cms
 
-debug = True
+from HLTrigger.Configuration.Tools.frozendict import frozendict
 
 whitelist_types = [
   'HLTPrescaler',
@@ -22,19 +44,21 @@ whitelist_labels = [
 def whitelist(module):
   return module.label in whitelist_labels or module.type in whitelist_types
 
+def iterate(arg):
+  return (not isinstance(arg, str) and '__iter__' in dir(arg))
 
 def freeze(arg):
   if type(arg) == dict:
-    return frozendict((k, freeze(v)) for (k, v) in arg.iteritems())
-  elif '__iter__' in dir(arg):
+    return frozendict((k, freeze(v)) for (k, v) in iter(arg.items()))
+  elif iterate(arg):
     return tuple( freeze(v) for v in arg )
   else:
     return arg
 
 def unfreeze(arg):
   if type(arg) == frozendict:
-    return dict((k, unfreeze(v)) for (k, v) in arg.iteritems())
-  elif '__iter__' in dir(arg):
+    return dict((k, unfreeze(v)) for (k, v) in iter(arg.items()))
+  elif iterate(arg):
     return list( unfreeze(v) for v in arg )
   else:
     return arg
@@ -43,68 +67,87 @@ def pythonize(arg):
   if 'parameters_' in dir(arg):
     arg = arg.parameters_()
 
-  if 'value' in dir(arg):
+  elif 'value' in dir(arg):
     arg = arg.value()
 
   if type(arg) == dict:
-    return frozendict((k, pythonize(v)) for (k, v) in arg.iteritems())
-  elif '__iter__' in dir(arg):
+    return frozendict((k, pythonize(v)) for (k, v) in iter(arg.items()))
+  elif iterate(arg):
     return tuple( pythonize(v) for v in arg )
   else:
     return arg
 
+def mkdirp(dirpath):
+  try:
+    os.makedirs(dirpath)
+  except OSError:
+    if not os.path.isdir(dirpath):
+      raise
 
 class Module(object):
-  type   = ''
-  label  = ''
+  type = ''
+  label = ''
   params = frozendict()
-  hash   = 0
+  hash = 0
 
   def __init__(self, module):
-    self.label  = module.label_()
-    self.type   = module.type_()
+    self.label = module.label_()
+    self.type = module.type_()
     self.params = pythonize(module.parameters_())
-    self.__rehash()
+    self.__rehash(self.params)
 
+  def __str__(self):
+    return f'{self.label} (type: {self.type}): {self.params}'
 
   def key(self):
     return self.hash
 
-  def __rehash(self):
-    self.hash = (hash(self.type) << 4) + hash(self.params)
+  def __rehash(self, params):
+    self.hash = (hash(self.type) << 4) + hash(params)
 
-  def __check(self, value, group):
-    return type(value) is str and bool(group.match(value))
+  def __check(self, value, check):
+    if isinstance(value, list):
+      return any(self.__check(foo, check) for foo in value)
+    elif isinstance(value, dict):
+      return any(self.__check(value[foo], check) for foo in value)
+    else:
+      return isinstance(value, str) and bool(check.match(value))
 
   def __sub(self, value, group, label):
-    if type(value) is str:
+    if isinstance(value, list):
+      return [self.__sub(foo, group, label) for foo in value]
+    elif isinstance(value, dict):
+      return {foo:self.__sub(value[foo], group, label) for foo in value}
+    elif isinstance(value, str):
       return group.sub(r'%s\2' % label, value)
     else:
       return value
 
-  def apply_rename(self, groups):
+  def apply_rename(self, groups, verbosity_level):
     modified = False
     newparams = unfreeze(self.params)
-    for label, (group, check) in groups.iteritems():
-      for k, p in newparams.iteritems():
-        if '__iter__' in dir(p):
-          if any(self.__check(v, check) for v in p):
-            newparams[k] = tuple(self.__sub(v, check, label) for v in p)
-            modified = True
-        else:
-          if self.__check(p, check):
-            newparams[k] = self.__sub(p, check, label)
-            modified = True
 
-    if modified:
-      self.params = frozendict(newparams)
-      self.__rehash()
+    if verbosity_level > 2:
+      print('')
+      print(f'             {self.label} ({self.type})')
+      print(f'             parameters before: {newparams}')
 
+    for label, (group, check) in iter(groups.items()):
+      for k, p in iter(newparams.items()):
+        if self.__check(p, check):
+          newparams[k] = self.__sub(p, check, label)
+          modified = True
 
+    if verbosity_level > 2:
+      print(f'             parameters after:  {newparams}')
+      print(f'             modified = {modified}')
 
+    if modified:
+      self.__rehash(frozendict(newparams))
 
 class ModuleList(object):
   modules = []
+  hashToLabelDict = {}
 
   def append(self, module):
     m = Module(module)
@@ -117,98 +160,196 @@ class ModuleList(object):
 
   def __init__(self, *args):
     for arg in args:
-      if '__iter__' in dir(arg):
+      if iterate(arg):
         self.extend(arg)
       else:
         self.append(arg)
 
+  def hash_label(self, hash_value):
+    return self.hashToLabelDict.get(hash_value, None)
+
   def sort(self):
     self.modules.sort(key = Module.key)
 
   def group(self):
     groups = dict()
     self.sort()
-    i = 0
     for v, g in itertools.groupby(self.modules, Module.key):
       group = list(g)
       if len(group) > 1:
-        i = i + 1
         g = [ m.label for m in group ]
         g.sort()
-        l = 'hltGroup%d' %i
+        # hash identifying the group (it is the same for every module in the group)
+        g_key = group[0].key()
+        if g_key not in self.hashToLabelDict:
+          # label identifying this group of modules
+          # (set only once so it cannot change from step to step)
+          self.hashToLabelDict[g_key] = f'{group[0].type} ({g[0]})'
         r = re.compile(r'^(%s)($|:)' % r'|'.join(g))
-        groups[l] = (g, r)
+        groups[g_key] = (g, r)
     return groups
 
-  def apply_rename(self, groups):
+  def apply_rename(self, groups, verbosity_level):
     for module in self.modules:
-      module.apply_rename(groups)
+      module.apply_rename(groups, verbosity_level)
 
-  def dump(self):
+  def dump(self, indent=0):
     for m in self.modules:
-      print("%s = (%s) {" % (m.label, m.type))
-      for k, v in m.params.iteritems():
-        print("\t%s = %s" % (k, v))
-      print('}')
-      print()
-
+      print(' '*indent + "%s = (%s) {" % (m.label, m.type))
+      for k, v in iter(m.params.items()):
+        print(' '*indent + "  %s = %s" % (k, v))
+      print(' '*indent + '}\n')
 
+def findDuplicates(process, output_dir, verbosity_level):
+  mkdirp(output_dir)
 
-def findDuplicates(process):
   modules = ModuleList( 
-    process._Process__analyzers.itervalues(), 
-    process._Process__producers.itervalues(),
-    process._Process__filters.itervalues() 
+    iter(process.analyzers_().values()),
+    iter(process.producers_().values()),
+    iter(process.filters_().values())
   )
 
   oldups = 0
   groups = modules.group()
-  dups   = sum(len(g[0]) for g in groups.itervalues()) - len(groups)
+  dups = sum(len(g[0]) for g in groups.values()) - len(groups)
 
   index = 1
-  while(dups != oldups):
-    if debug:
-      dump = open('step%d.sed' % index, 'w')
-      for target, (group, regexp) in groups.iteritems():
-        dump.write('s#\\<\\(%s\\)\\>#%s#g\n' % ('\\|'.join(group), target))
-      dump.close()
-      dump = open('step%d.txt' % index, 'w')
-      for target, (group, regexp) in groups.iteritems():
-        dump.write('#%s\n%s\n\n' % ( target, '\n'.join(group)))
-      dump.close()
-    print("found %3d duplicates in %3d groups" % (dups, len(groups)))
+  while dups != oldups:
+    groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
+
+    dump = open(os.path.join(output_dir, f'step{index}.sed'), 'w')
+    for group_label in sorted(groupLabelToHashDict.keys()):
+      (group, regexp) = groups[groupLabelToHashDict[group_label]]
+      dump.write('s#\\<\\(%s\\)\\>#%s#g\n' % ('\\|'.join(group), group_label))
+    dump.close()
+
+    dump = open(os.path.join(output_dir, f'step{index}.txt'), 'w')
+    first_entry = True
+    for group_label in sorted(groupLabelToHashDict.keys()):
+      (group, regexp) = groups[groupLabelToHashDict[group_label]]
+      dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
+      first_entry = False
+    dump.close()
+
+    if verbosity_level > 0:
+      print(f"[step {index:>2d}] found {dups:>3d} duplicates in {len(groups):>3d} groups")
+
+    if verbosity_level > 2:
+      print(f'[step {index:>2d}]   groups={groups}')
+      print(f'[step {index:>2d}]   ---------------')
+      print(f'[step {index:>2d}]   apply_rename ..')
+
     oldups = dups
-    modules.apply_rename(groups)
+    modules.apply_rename(groups, verbosity_level)
+
+    if verbosity_level > 2:
+      print()
+      print(f'            ------------------------')
+      print(f'            modules (after renaming)')
+      print(f'            ------------------------')
+      modules.dump(indent=14)
+
     groups = modules.group()
-    dups   = sum(len(g[0]) for g in groups.itervalues()) - len(groups)
-    index  = index + 1
+    dups = sum(len(g[0]) for g in groups.values()) - len(groups)
+    index += 1
 
-  dump = open('groups.sed', 'w')
-  for target, (group, regexp) in groups.iteritems():
-    dump.write('s#\\<\\(%s\\)\\>#%s#\n' % ('\\|'.join(group), target))
-  dump.close()
+  groupLabelToHashDict = {modules.hash_label(group_hash):group_hash for group_hash in groups}
 
-  dump = open('groups.txt', 'w')
-  for target, (group, regexp) in groups.iteritems():
-    dump.write('#%s\n%s\n\n' % ( target, '\n'.join(group)))
+  dump = open(os.path.join(output_dir, 'groups.sed'), 'w')
+  for group_label in sorted(groupLabelToHashDict.keys()):
+    (group, regexp) = groups[groupLabelToHashDict[group_label]]
+    dump.write('s#\\<\\(%s\\)\\>#%s#\n' % ('\\|'.join(group), group_label))
   dump.close()
 
+  dump = open(os.path.join(output_dir, 'groups.txt'), 'w')
+  first_entry = True
+  for group_label in sorted(groupLabelToHashDict.keys()):
+    (group, regexp) = groups[groupLabelToHashDict[group_label]]
+    dump.write('\n'*(not first_entry) + '# %s\n%s\n' % ( group_label, '\n'.join(group)))
+    first_entry = False
+  dump.close()
 
+##
+## main
+##
+if __name__ == '__main__':
+
+    ### args
+    parser = argparse.ArgumentParser(
+        prog = './'+os.path.basename(__file__),
+        formatter_class = argparse.RawDescriptionHelpFormatter,
+        description = __doc__,
+        argument_default = argparse.SUPPRESS,
+    )
+
+    # menu: name of ConfDB config, or local cmsRun cfg file, or stdin
+    parser.add_argument('menu',
+                        nargs = '?',
+                        metavar = 'MENU',
+                        default = None,
+                        help = 'Path to cmsRun configuration file (if not specified, stdin is used)')
+
+    # output-dir: path to directory containing output files
+    parser.add_argument('-o', '--output-dir',
+                        metavar = 'OUTPUT_DIR',
+                        default = 'hltFindDuplicates_output',
+                        help = 'Path to directory containing output files')
+
+    # menu arguments: list of arguments to be applied to the cmsRun configuration file
+    # (via argparse, VarParsing, or similar)
+    parser.add_argument('-x', '--menu-args',
+                        nargs = '+',
+                        metavar = 'MENU_ARGS',
+                        default = [],
+                        help = 'List of arguments (each without whitespaces) to be applied to the cmsRun configuration file')
+
+    # verbosity level: level of verbosity of stdout/stderr printouts
+    parser.add_argument('-v', '--verbosity-level',
+                        metavar = 'VERBOSITY_LEVEL',
+                        type = int,
+                        default = 1,
+                        help = 'Verbosity level')
+
+    # parse command line arguments and options
+    opts = parser.parse_args()
+
+    print('-'*25)
+    print('hltFindDuplicates')
+    print('-'*25)
+
+    # create new output directory
+    if os.path.exists(opts.output_dir):
+        log_msg = 'Failed to create output directory (a directory or file already exists under that path)'
+        raise RuntimeError(f'{log_msg}: {opts.output_dir}')
+
+    mkdirp(opts.output_dir)
+    output_config_filepath = os.path.join(opts.output_dir, 'config.py')
+
+    print(f'output directory: {opts.output_dir}')
+    print('-'*25)
+
+    # parse the HLT configuration from a local cfg file, or from standard input
+    hlt = {'process': None, 'fragment': None}
+
+    if opts.menu != None:
+        if not os.path.isfile(opts.menu):
+            raise RuntimeError(f'Invalid path to input file (file does not exist): {opts.menu}')
+        shutil.copyfile(opts.menu, output_config_filepath)
+    else:
+        with open(output_config_filepath, 'w') as config_file:
+            config_file.write(sys.stdin.read())
 
-def main():
-  # parse the HLT configuration from standard input or from the given file
-  hlt = imp.new_module('hlt')
-  try:
-    configname = sys.argv[1]
-  except:
-    config = sys.stdin
-  else:
-    config = open(configname)
-  exec(config, globals(), hlt.__dict__)
-  config.close()
-  findDuplicates(hlt.process)
+    sys.argv = [sys.argv[0], output_config_filepath] + opts.menu_args
+    exec(open(output_config_filepath).read(), globals(), hlt)
 
+    # find cms.Process object
+    process = None
+    if hlt['process'] != None:
+        process = hlt['process']
+    if hlt['fragment'] != None:
+        process = hlt['fragment']
 
-if __name__ == "__main__":
-    main()
+    if process == None or not isinstance(process, cms.Process):
+        raise RuntimeError('Failed to find object of type cms.Process !')
 
+    findDuplicates(process, output_dir=opts.output_dir, verbosity_level=opts.verbosity_level)
diff --git a/HLTrigger/Configuration/test/BuildFile.xml b/HLTrigger/Configuration/test/BuildFile.xml
index 11c72ffe9f4fb..e106df22c9b7d 100644
--- a/HLTrigger/Configuration/test/BuildFile.xml
+++ b/HLTrigger/Configuration/test/BuildFile.xml
@@ -11,3 +11,6 @@
 
 <!-- test script hltMenuContentToCSVs -->
 <test name="test_hltMenuContentToCSVs" command="test_hltMenuContentToCSVs.sh"/>
+
+<!-- test script hltFindDuplicates -->
+<test name="test_hltFindDuplicates" command="test_hltFindDuplicates.sh"/>
diff --git a/HLTrigger/Configuration/test/test_hltFindDuplicates.sh b/HLTrigger/Configuration/test/test_hltFindDuplicates.sh
new file mode 100755
index 0000000000000..34087048908c9
--- /dev/null
+++ b/HLTrigger/Configuration/test/test_hltFindDuplicates.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Pass in name and status
+function die {
+  printf "\n%s: status %s\n" "$1" "$2"
+  if [ $# -gt 2 ]; then
+    printf "%s\n" "=== Log File =========="
+    cat $3
+    printf "%s\n" "=== End of Log File ==="
+  fi
+  exit $2
+}
+
+if [ -z "${SCRAM_TEST_PATH}" ]; then
+  printf "\n%s\n\n" "ERROR -- environment variable SCRAM_TEST_PATH not defined"
+  exit 1
+fi
+
+###
+### test #1: "mode == 0"
+###
+rm -rf test_hltFindDuplicates_mode0_output
+
+hltFindDuplicates "${SCRAM_TEST_PATH}"/test_hltFindDuplicates_cfg.py -x="--mode=0" -v 2 \
+  -o test_hltFindDuplicates_mode0_output &> test_hltFindDuplicates_mode0_log \
+  || die 'Failure running hltFindDuplicates (mode: 0)' $? test_hltFindDuplicates_mode0_log
+
+cat <<@EOF > test_hltFindDuplicates_mode0_groups_expected
+# A3 (d3x)
+d3x
+d3y
+m3x
+m3y
+
+# F2 (d2x)
+d2x
+d2y
+m2x
+m2y
+
+# P1 (d1x)
+d1x
+d1y
+m1x
+m1y
+@EOF
+
+diff test_hltFindDuplicates_mode0_groups_expected test_hltFindDuplicates_mode0_output/groups.txt \
+  || die "Unexpected differences in groups.txt output of hltFindDuplicates (mode: 0)" $?
+
+###
+### test #2: "mode == 1"
+###
+rm -rf test_hltFindDuplicates_mode1_output
+
+hltFindDuplicates "${SCRAM_TEST_PATH}"/test_hltFindDuplicates_cfg.py -x="--mode=1" -v 2 \
+  -o test_hltFindDuplicates_mode1_output &> test_hltFindDuplicates_mode1_log \
+  || die 'Failure running hltFindDuplicates (mode: 1)' $? test_hltFindDuplicates_mode1_log
+
+cat <<@EOF > test_hltFindDuplicates_mode1_groups_expected
+# A3 (d3x)
+d3x
+d3y
+m3x
+
+# F2 (d2x)
+d2x
+d2y
+m2x
+
+# P1 (d1x)
+d1x
+d1y
+m1x
+@EOF
+
+diff test_hltFindDuplicates_mode1_groups_expected test_hltFindDuplicates_mode1_output/groups.txt \
+  || die "Unexpected differences in groups.txt output of hltFindDuplicates (mode: 1)" $?
diff --git a/HLTrigger/Configuration/test/test_hltFindDuplicates_cfg.py b/HLTrigger/Configuration/test/test_hltFindDuplicates_cfg.py
new file mode 100644
index 0000000000000..9ab81636a9e8e
--- /dev/null
+++ b/HLTrigger/Configuration/test/test_hltFindDuplicates_cfg.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Configuration file to be used as input in unit tests of the utility hltFindDuplicates.
+
+The configuration is made of modules labelled "d*" and "m*".
+
+Details on the configuration.
+ - For each group of modules (d* and m*),
+   - modules are ordered in 3 levels (e.g. d1*, d2*, d3*), and
+   - for every level, there are two versions (*x and *y) of the module (e.g. d1x, d1y).
+ - The *x (*y) modules depend only on *x (*y) modules, and not on *y (*x) modules.
+ - The *2* modules depend on *1* modules.
+ - The *3* modules depend on *1* and *2* modules.
+ - The m* modules are the counterparts of the d* modules.
+   - The m* modules do not depend on d* modules (and viceversa).
+   - A given m{1,2,3}{x,y} module may or may not be a duplicate of the corresponding d* module.
+
+The --mode option determines how the ED modules are configured.
+
+  - mode == 0:
+     the m* modules are duplicates of the corresponding d* modules.
+
+  - mode == 1:
+     one parameter in m1y is changed compared to d1y
+     and this makes all the m*y modules unique,
+     while the m*x modules should ultimately
+     be identified as duplicates of the d*x modules.
+"""
+import FWCore.ParameterSet.Config as cms
+
+import os
+import argparse
+
+parser = argparse.ArgumentParser(
+    prog = 'python3 '+os.path.basename(__file__),
+    formatter_class = argparse.RawDescriptionHelpFormatter,
+    description = __doc__,
+    argument_default = argparse.SUPPRESS,
+)
+
+parser.add_argument("--mode",
+    type = int,
+    default = 0,
+    choices = [0,1],
+    help = "Choose how to configure the modules."
+)
+
+args,_ = parser.parse_known_args()
+
+process = cms.Process('TEST')
+
+### "d*" modules: the duplicates
+###  - the *x (*y) modules depend only on *x (*y) modules, and not on *y (*x) modules
+###  - the *2* modules depend on *1* modules
+###  - the *3* modules depend on *1* and *2* modules
+process.d1x = cms.EDProducer('P1',
+    p1 = cms.InputTag('rawDataCollector'),
+    p2 = cms.bool(False),
+    p3 = cms.vbool(False, True),
+    p4 = cms.uint32(1),
+    p5 = cms.vuint32(1,2,3),
+    p6 = cms.int32(-1),
+    p7 = cms.vint32(-1,2,-3),
+    p8 = cms.double(1.1),
+    p9 = cms.vdouble(2.3, 4.5)
+)
+
+process.d1y = process.d1x.clone()
+
+process.d2x = cms.EDFilter('F2',
+    p1 = cms.vint32(1, 2, 3),
+    p2 = cms.VInputTag('d1x'),
+    p3 = cms.PSet(
+        theStrings = cms.vstring('keyword1', 'keyword2')
+    )
+)
+
+process.d2y = process.d2x.clone( p2 = ['d1y'] )
+
+process.d3x = cms.EDAnalyzer('A3',
+    p1 = cms.VPSet(
+        cms.PSet(
+            pset_a = cms.PSet(
+                tag1 = cms.InputTag('d1x')
+            ),
+            pset_b = cms.PSet(
+                tag2 = cms.InputTag('d2x')
+            ),
+        )
+    ),
+    p2 = cms.PSet(
+        p_a = cms.PSet(
+            p_b = cms.PSet(
+                p_c = cms.VInputTag('d2x', 'd1x')
+            )
+        )
+    )
+)
+
+process.d3y = process.d3x.clone()
+process.d3y.p1[0].pset_a.tag1 = 'd1y'
+process.d3y.p1[0].pset_b.tag2 = 'd2y'
+process.d3y.p2.p_a.p_b.p_c = ['d2y', 'd1y']
+
+### m* modules
+###  - the m* modules are the counterparts of the d* modules
+###  - m* modules do not depend on d* modules (and viceversa)
+###  - if the mode "unique-m*y" is chosen,
+###    one parameter in m1y is changed compared to d1y
+###    and this makes all the m*y modules unique,
+###    while the m*x modules should ultimately
+###    be flagged as duplicates of the d*x modules
+process.m1x = process.d1x.clone()
+
+if args.mode == 0:
+    process.m1y = process.d1y.clone()
+elif args.mode == 1:
+    process.m1y = process.d1y.clone( p2 = True )
+
+process.m2x = process.d2x.clone( p2 = ['m1x'] )
+process.m2y = process.d2y.clone( p2 = ['m1y'] )
+process.m3x = process.d3x.clone()
+
+process.m3x.p1[0].pset_a.tag1 = 'm1x'
+process.m3x.p1[0].pset_b.tag2 = 'm2x'
+process.m3x.p2.p_a.p_b.p_c = ['m2x', 'm1x']
+
+process.m3y = process.d3y.clone()
+process.m3y.p1[0].pset_a.tag1 = 'm1y'
+process.m3y.p1[0].pset_b.tag2 = 'm2y'
+process.m3y.p2.p_a.p_b.p_c = ['m2y', 'm1y']