diff --git a/README.md b/README.md
index 7f20db1c4..93135fe8c 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,8 @@ The common issues we run into we try to document here [Troubleshooting Guide](Tr
 
 | Build Type | OS | Python | Tensorflow | Onnx opset | Status |
 | ---        | ---    | ---    | ---        | ---        | ---    |
-| Unit Test - Basic | Linux, MacOS<sup>\*</sup>, Windows<sup>\*</sup> | 3.6, 3.7, 3.8 | 1.12-1.15, 2.1-2.4 | 7-13 | [![Build Status](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_apis/build/status/unit_test?branchName=master)](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_build/latest?definitionId=16&branchName=master) |
-| Unit Test - Full | Linux, MacOS, Windows | 3.6, 3.7, 3.8 | 1.12-1.15, 2.1-2.4 | 7-13 | [![Build Status](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_apis/build/status/unit_test-matrix?branchName=master)](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_build/latest?definitionId=18&branchName=master) | |
+| Unit Test - Basic | Linux, MacOS<sup>\*</sup>, Windows<sup>\*</sup> | 3.6, 3.7, 3.8 | 1.12-1.15, 2.1-2.5 | 7-13 | [![Build Status](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_apis/build/status/unit_test?branchName=master)](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_build/latest?definitionId=16&branchName=master) |
+| Unit Test - Full | Linux, MacOS, Windows | 3.6, 3.7, 3.8 | 1.12-1.15, 2.1-2.5 | 7-13 | [![Build Status](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_apis/build/status/unit_test-matrix?branchName=master)](https://dev.azure.com/tensorflow-onnx/tensorflow-onnx/_build/latest?definitionId=18&branchName=master) | |
 <br/>
 
 ## Supported Versions
diff --git a/setup.py b/setup.py
index b6a0f5c12..30d2b3748 100644
--- a/setup.py
+++ b/setup.py
@@ -81,5 +81,22 @@ def run(self):
     author='onnx@microsoft.com',
     author_email='onnx@microsoft.com',
     url='https://github.com/onnx/tensorflow-onnx',
-    install_requires=['numpy>=1.14.1', 'onnx>=1.4.1', 'requests', 'six', 'flatbuffers']
+    install_requires=['numpy>=1.14.1', 'onnx>=1.4.1', 'requests', 'six', 'flatbuffers'],
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'Intended Audience :: Education',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: Apache2 License',
+        'Topic :: Scientific/Engineering',
+        'Topic :: Scientific/Engineering :: Mathematics',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9']
 )
diff --git a/tests/huggingface.py b/tests/huggingface.py
index 7675c86b4..18cfc5c90 100644
--- a/tests/huggingface.py
+++ b/tests/huggingface.py
@@ -1,6 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
-"""Unit tests for huggingface tensorflow transformers."""
+"""
+Unit tests for huggingface tensorflow transformers.
+
+tested with tf-2.4.1, transformers-4.5.1
+
+"""
 
 # pylint: disable=missing-docstring,invalid-name,unused-argument
 # pylint: disable=bad-classmethod-argument,wrong-import-position
@@ -19,7 +24,9 @@
 import tensorflow as tf
 import tf2onnx
 
-compare_perf = False
+compare_perf = True
+time_to_run = 10
+time_step = 10
 
 
 class TestTransformers(unittest.TestCase):
@@ -47,26 +54,31 @@ def run_onnxruntime(self, model_path, input_dict, output_names):
         m = rt.InferenceSession(model_path, sess_options=opt, providers=providers)
         results = m.run(output_names, input_dict)
         if compare_perf:
-            count = 10
+            n = 0
             time_start = time.time()
-            for _ in range(count):
-                _ = m.run(output_names, input_dict.keys())
+            time_stop = time_start + time_to_run
+            while time.time() < time_stop:
+                for _ in range(time_step):
+                    _ = m.run(output_names, input_dict)
+                n += time_step
             time_end = time.time()
-            val = str((time_end - time_start) / count)
-            print(f'==== avg ort name={self.name}, time={val}')
+            val = (time_end - time_start) / n
+            print(f'= avg ort name={self.name}, time={val}, n={n}')
         return results
 
     def run_keras(self, model, inputs):
-        print(f"==== {self.name}")
         pred = model(inputs)
         if compare_perf:
-            count = 10
+            n = 0
             time_start = time.time()
-            for _ in range(count):
-                _ = model(inputs)
-            time_end = time.time()
-            val = str((time_end - time_start) / count)
-            print(f'==== avg keras name={self.name}, time={val}')
+            time_stop = time_start + time_to_run
+            while time.time() < time_stop:
+                for _ in range(time_step):
+                    _ = model(inputs)
+                n += time_step
+            time_stop = time.time()
+            val = (time_stop - time_start) / n
+            print(f'= avg keras name={self.name}, time={val}, n={n}')
         return pred
 
     def run_test(self, model, input_dict, rtol=1e-2, atol=1e-4, input_signature=None,
@@ -96,8 +108,11 @@ def run_test(self, model, input_dict, rtol=1e-2, atol=1e-4, input_signature=None
         if not large:
             model_path = model_path + ".onnx"
         print("= convert")
+        time_start = time.time()
         _, _ = tf2onnx.convert.from_keras(model, input_signature=input_signature,
                                           opset=13, large_model=large, output_path=model_path)
+        time_stop = time.time()
+        print(f"= convertsion took {time_stop - time_start}")
 
         if large:
             # need to unpack the zip for run_onnxruntime()
@@ -163,18 +178,45 @@ def test_TFDisillBertModel(self):
 
     ## FUNNEL
 
-    def _test_TFFunnelSquad(self, size, large=False):
+    def _test_TFFunnel(self, size, large=False):
         from transformers import FunnelTokenizer, TFFunnelForQuestionAnswering
         tokenizer = FunnelTokenizer.from_pretrained(size)
         model = TFFunnelForQuestionAnswering.from_pretrained(size)
         question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
         input_dict = tokenizer(question, text, return_tensors='tf')
-        spec, input_dict = self.spec_and_pad(input_dict, max_length=model.config.max_length)
+        spec, input_dict = self.spec_and_pad(input_dict, 128)
         outputs = ["start_logits", "end_logits"]
         self.run_test(model, input_dict, input_signature=spec, outputs=outputs, rtol=1e-5)
 
-    def test_TFFunnelSquadSmall(self):
-        self._test_TFFunnelSquad("funnel-transformer/small")
+    def test_TFFunnelSmall(self):
+        self._test_TFFunnel("funnel-transformer/small")
+
+    def test_TFFunnelSmallBase(self):
+        self._test_TFFunnel("funnel-transformer/small-base")
+
+    def test_TFFunnelMedium(self):
+        self._test_TFFunnel("funnel-transformer/medium")
+
+    def test_TFFunnelMediumBase(self):
+        self._test_TFFunnel("funnel-transformer/medium-base")
+
+    def test_TFFunnelIntermediate(self):
+        self._test_TFFunnel("funnel-transformer/intermediate")
+
+    def test_TFFunnelIntermediateBase(self):
+        self._test_TFFunnel("funnel-transformer/intermediate-base")
+
+    def test_TFFunnelLarge(self):
+        self._test_TFFunnel("funnel-transformer/large")
+
+    def test_TFFunnelLargeBase(self):
+        self._test_TFFunnel("funnel-transformer/large-base")
+
+    def test_TFFunnelXLarge(self):
+        self._test_TFFunnel("funnel-transformer/xlarge")
+
+    def test_TFFunnelXLargeBase(self):
+        self._test_TFFunnel("funnel-transformer/xlarge-base")
 
     ## T5
 
@@ -352,13 +394,16 @@ def _test_TFBart(self, size, large=False):
         tokenizer = BartTokenizer.from_pretrained(size)
         model = TFBartModel.from_pretrained(size)
         input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
-        spec, input_dict = self.spec_and_pad(input_dict, max_length=model.config.max_length)
+        spec, input_dict = self.spec_and_pad(input_dict, max_length=128)
         outputs = ["last_hidden_state"]
         self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
 
     def test_TFBartBase(self):
         self._test_TFBart("facebook/bart-base", large=True)
 
+    def test_TFBartLarge(self):
+        self._test_TFBart("facebook/bart-large", large=True)
+
     def test_TFBartLargeCnn(self):
         self._test_TFBart("facebook/bart-large-cnn", large=True)
 
diff --git a/tests/run_pretrained_models.py b/tests/run_pretrained_models.py
index 777f0e6bc..caa2231ef 100644
--- a/tests/run_pretrained_models.py
+++ b/tests/run_pretrained_models.py
@@ -53,8 +53,8 @@
 logger = logging.getLogger("run_pretrained")
 
 TEMP_DIR = os.path.join(utils.get_temp_directory(), "run_pretrained")
-PERFITER = 1000
-
+PERF_STEP = 10
+PERF_TIME = 10
 
 def get_img(shape, path, dtype, should_scale=True):
     """Get image as input."""
@@ -292,10 +292,15 @@ def run_tensorflow(self, sess, inputs):
         result = sess.run(self.output_names, feed_dict=feed_dict)
         if self.perf:
             logger.info("Running TF perf")
+            n = 0
             start = time.time()
-            for _ in range(PERFITER):
-                _ = sess.run(self.output_names, feed_dict=feed_dict)
-            self.tf_runtime = time.time() - start
+            stop = start + PERF_TIME
+            while time.time() < stop:
+                for _ in range(PERF_STEP):
+                    _ = sess.run(self.output_names, feed_dict=feed_dict)
+                n += PERF_STEP
+            self.tf_runtime = 1000 * (time.time() - start) / n
+            logger.info("TF perf {:.2f}ms/inference, n={}".format(self.tf_runtime, n))
         return result
 
     def to_onnx(self, tf_graph, opset=None, extra_opset=None, shape_override=None, input_names=None,
@@ -312,18 +317,6 @@ def to_onnx(self, tf_graph, opset=None, extra_opset=None, shape_override=None, i
                                 tflite_path=tflite_path, dequantize=self.dequantize,
                                 tensors_to_rename=tensors_to_rename)
 
-    def run_caffe2(self, name, model_proto, inputs):
-        """Run test again caffe2 backend."""
-        import caffe2.python.onnx.backend
-        prepared_backend = caffe2.python.onnx.backend.prepare(model_proto)
-        results = prepared_backend.run(inputs)
-        if self.perf:
-            start = time.time()
-            for _ in range(PERFITER):
-                _ = prepared_backend.run(inputs)
-            self.onnx_runtime = time.time() - start
-        return results
-
     def run_onnxruntime(self, name, model_proto, inputs, outputs, external_tensor_storage=None):
         """Run test against onnxruntime backend."""
         import onnxruntime as rt
@@ -340,10 +333,15 @@ def run_onnxruntime(self, name, model_proto, inputs, outputs, external_tensor_st
             m = rt.InferenceSession(model_path)
         results = m.run(outputs, inputs)
         if self.perf:
+            n = 0
             start = time.time()
-            for _ in range(PERFITER):
-                _ = m.run(outputs, inputs)
-            self.onnx_runtime = time.time() - start
+            stop = start + PERF_TIME
+            while time.time() < stop:
+                for _ in range(PERF_STEP):
+                    _ = m.run(outputs, inputs)
+                n += PERF_STEP
+            self.onnx_runtime = 1000 * (time.time() - start) / n
+            logger.info("ORT perf {:.2f}ms/inference, n={}".format(self.onnx_runtime, n))
         return results
 
     @staticmethod
@@ -357,8 +355,7 @@ def create_onnx_file(name, model_proto, inputs, outdir, external_tensor_storage=
             utils.save_onnx_zip(model_path, model_proto, external_tensor_storage)
         logger.info("Created %s", model_path)
 
-    def run_test(self, name, backend="caffe2", onnx_file=None, opset=None, extra_opset=None,
-                 perf=None, fold_const=None):
+    def run_test(self, name, backend="onnxruntime", onnx_file=None, opset=None, extra_opset=None, perf=None):
         """Run complete test against backend."""
         self.perf = perf
 
@@ -422,10 +419,15 @@ def run_tflite():
             tf_results = run_tflite()
             if self.perf:
                 logger.info("Running TFLite perf")
+                n = 0
                 start = time.time()
-                for _ in range(PERFITER):
-                    _ = run_tflite()
-                self.tf_runtime = time.time() - start
+                stop = start + PERF_TIME
+                while time.time() < stop:
+                    for _ in range(PERF_STEP):
+                        _ = run_tflite()
+                    n += PERF_STEP
+                self.tf_runtime = 1000 * (time.time() - start) / n
+                logger.info("TFLite perf {:.2f}ms/inference, n={}".format(self.tf_runtime, n))
             logger.info("TFLite OK")
 
         if not self.run_tf_frozen:
@@ -444,10 +446,15 @@ def run_tflite():
             tf_results = [tf_res.numpy() for tf_res in tf_results]
             if self.perf:
                 logger.info("Running TF perf")
+                n = 0
                 start = time.time()
-                for _ in range(PERFITER):
-                    _ = concrete_func(**inputs)
-                self.tf_runtime = time.time() - start
+                stop = start + PERF_TIME
+                while time.time() < stop:
+                    for _ in range(PERF_STEP):
+                        _ = concrete_func(**inputs)
+                    n += PERF_STEP
+                self.tf_runtime = 1000 * (time.time() - start) / n
+                logger.info("TF perf {:.2f}ms/inference, n={}".format(self.tf_runtime, n))
             logger.info("TensorFlow OK")
 
         shape_override = {}
@@ -533,9 +540,7 @@ def run_tflite():
 
         try:
             onnx_results = None
-            if backend == "caffe2":
-                onnx_results = self.run_caffe2(name, model_proto, inputs)
-            elif backend == "onnxruntime":
+            if backend == "onnxruntime":
                 if to_rename is None:
                     struc_outputs = self.output_names
                 else:
@@ -614,7 +619,7 @@ def get_args():
     parser.add_argument("--tests", help="tests to run")
     parser.add_argument("--target", default="", help="target platform")
     parser.add_argument("--backend", default="onnxruntime",
-                        choices=["caffe2", "onnxruntime"], help="backend to use")
+                        choices=["onnxruntime"], help="backend to use")
     parser.add_argument("--opset", type=int, default=None, help="opset to use")
     parser.add_argument("--extra_opset", default=None,
                         help="extra opset with format like domain:version, e.g. com.microsoft:1")
@@ -625,9 +630,6 @@ def get_args():
     parser.add_argument("--list", help="list tests", action="store_true")
     parser.add_argument("--onnx-file", help="create onnx file in directory")
     parser.add_argument("--perf", help="capture performance numbers")
-    parser.add_argument("--perfiter", type=int, default=PERFITER, help="number of inferences for perf testing")
-    parser.add_argument("--fold_const", help="enable tf constant_folding transformation before conversion",
-                        action="store_true")
     parser.add_argument("--include-disabled", help="include disabled tests", action="store_true")
     args = parser.parse_args()
 
@@ -699,7 +701,6 @@ def load_tests_from_yaml(path):
 
 
 def main():
-    global PERFITER
     args = get_args()
     logging.basicConfig(level=logging.get_verbosity_level(args.verbose))
     if args.debug:
@@ -718,7 +719,6 @@ def main():
 
     failed = 0
     count = 0
-    PERFITER = args.perfiter
     for test in test_keys:
         logger.info("===================================")
 
@@ -749,8 +749,7 @@ def main():
         try:
             logger.info("Running %s", test)
             ret = t.run_test(test, backend=args.backend, onnx_file=args.onnx_file,
-                             opset=args.opset, extra_opset=args.extra_opset, perf=args.perf,
-                             fold_const=args.fold_const)
+                             opset=args.opset, extra_opset=args.extra_opset, perf=args.perf)
         except Exception:
             logger.error("Failed to run %s", test, exc_info=1)
             ret = None
@@ -770,7 +769,7 @@ def main():
                 t = tests[test]
                 if t.perf:
                     # Report perf in ms per inference
-                    f.write("{},{},{}\n".format(test, t.tf_runtime * 1000 / PERFITER, t.onnx_runtime * 1000 / PERFITER))
+                    f.write("{},{},{}\n".format(test, t.tf_runtime, t.onnx_runtime))
     return failed