diff --git a/torchbenchmark/operator_loader/__init__.py b/torchbenchmark/operator_loader/__init__.py new file mode 100644 index 000000000..1529b9d65 --- /dev/null +++ b/torchbenchmark/operator_loader/__init__.py @@ -0,0 +1,157 @@ +import argparse +import sys +import types +from typing import Any, Generator, List, Optional + +import torch +from torch._dynamo.backends.cudagraphs import cudagraphs_inner +from torch._inductor.compile_fx import compile_fx +from torch._inductor.utils import gen_gm_and_inputs +from torch._ops import OpOverload +from torch.utils._pytree import tree_map_only + +from torchbenchmark.util.triton_op import ( + BenchmarkOperator, + register_benchmark_mannually, +) + +from .operator_inp_utils import aten, OperatorInputsLoader, to_channels_last + +timm_loader = None +huggingface_loader = None +torchbench_loader = None + + +def maybe_load_operator_inputs_loader(): + global timm_loader, huggingface_loader, torchbench_loader + if timm_loader is None: + timm_loader = OperatorInputsLoader.get_timm_loader() + if huggingface_loader is None: + huggingface_loader = OperatorInputsLoader.get_huggingface_loader() + if torchbench_loader is None: + torchbench_loader = OperatorInputsLoader.get_torchbench_loader() + + +def parse_args(extra_args: Optional[List[str]] = None): + parser = argparse.ArgumentParser(allow_abbrev=False) + parser.add_argument( + "--channel-list", + action="store_true", + help="Flag to enable channel list benchmarking.", + ) + return parser.parse_known_args(extra_args) + + +def list_operators() -> List[str]: + """In the original operator benchmark design, all operators are registered in the + operator loader. We need to collect them here. + """ + maybe_load_operator_inputs_loader() + all_ops = ( + list(timm_loader.get_all_ops()) + + list(huggingface_loader.get_all_ops()) + + list(torchbench_loader.get_all_ops()) + ) + # remove duplicate operators + all_ops_str = list(set(str(item) for item in all_ops)) + return all_ops_str + + +def load_opbench_by_name_from_loader(args: argparse.Namespace): + all_ops_str = list_operators() + if args.op not in all_ops_str: + raise ValueError(f"{args.op} is not found in the operator loader.") + # args.op is a string, we need to evaluate it to get the actual operator overload + op_eval = eval(args.op) + return dynamically_create_aten_op_class(op_eval) + + +def create_operator_class(op_eval: OpOverload): + """Create a new class for the operator overload.""" + + def __init__( + self, tb_args: argparse.Namespace, extra_args: Optional[List[str]] = None + ): + BenchmarkOperator.__init__(self, tb_args, extra_args) + native_args, _ = parse_args(extra_args) + self.channel_list = native_args.channel_list + self.device = tb_args.device + self.huggingface_loader = huggingface_loader + self.torchbench_loader = torchbench_loader + self.timm_loader = timm_loader + # We enable cuda graphs by default when we get the input iter. So, we don't + # utilize tritonbench's cuda graphs. + self.use_cuda_graphs = False + self.DEFAULT_PRECISION = "fp16" + assert self.dtype in ( + torch.float16, + torch.float32, + ), f"AtenOpBenchmark only supports fp16 and fp32, but got {self.dtype}" + + def get_input_iter(self) -> Generator: + inps_gens = [self.huggingface_loader, self.torchbench_loader, self.timm_loader] + for inp_gen in inps_gens: + for inp in inp_gen.get_inputs_for_operator( + self.op_eval, self.dtype, self.device + ): + args, kwargs = inp + if self.channel_list: + args, kwargs = tree_map_only( + torch.Tensor, to_channels_last, (args, kwargs) + ) + gm, gm_args = gen_gm_and_inputs(self.op_eval, args, kwargs) + torch.jit._builtins._register_builtin( + torch.ops.aten.convolution_backward.default, + "aten::convolution_backward", + ) + if self.device == "cuda": + cudagraph_eager = cudagraphs_inner( + gm, gm_args, copy_outputs=False, copy_inputs=False + ) + self.eager_op = cudagraph_eager + compiled_fn = compile_fx(gm, gm_args) + cudagraph_compiled = cudagraphs_inner( + compiled_fn, gm_args, copy_outputs=False, copy_inputs=False + ) + self.inductor_op = cudagraph_compiled + else: + self.eager_op = gm + self.inductor_op = gm + + yield gm_args + + def eager(self, input): + return lambda: self.eager_op(input) + + def inductor(self, input): + return lambda: self.inductor_op(input) + + class_attrs = { + "eager": eager, + "inductor": inductor, + "get_input_iter": get_input_iter, + "__init__": __init__, + } + new_class = type("Operator", (BenchmarkOperator,), class_attrs) + new_class.op_eval = op_eval + return new_class + + +def dynamically_create_aten_op_class(op_eval: OpOverload): + """ + To keep same with custom operators, we dynamically create aten operator classes here. + """ + maybe_load_operator_inputs_loader() + class_name = f"aten_{str(op_eval).replace('.', '_')}" + module_name = f"torchbenchmark.operator_loader.{class_name}" + # create a new module for each operator + op_name_module = types.ModuleType(module_name) + sys.modules[module_name] = op_name_module + op_class = create_operator_class(op_eval) + # need to set __module__ to make _find_op_name_from_module_path work + op_class.__module__ = module_name + op_name_module.Operator = op_class + # because the class is dynamically created, decorator can't get the desired module_path. + register_benchmark_mannually(class_name, "eager", baseline=True) + register_benchmark_mannually(class_name, "inductor") + return op_class diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForMaskedLM_training.txt new file mode 100644 index 000000000..b2374b7fa --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForMaskedLM_training.txt @@ -0,0 +1,115 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 30000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 30000], f16), T([1024, 30000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([2, 64, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([2, 64, 512, 512], f16), T([2, 64, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([2, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([2, 64, 512, 64], f16), [128, 512, 64]), {}) +cnt: 12, ((T([2, 64, 64, 512], f16), [128, 64, 512]), {}) +cnt: 12, ((T([128, 512, 512], f16), [2, 64, 512, 512]), {}) +cnt: 12, ((T([128, 512, 64], f16), [2, 64, 512, 64]), {}) +cnt: 36, ((T([2, 512, 64, 64], f16), [2, 512, 4096]), {}) +cnt: 12, ((T([2, 512, 4096], f16), [1024, 4096]), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([2, 512, 128], f16), T([2, 512, 128], f16)), {}) +cnt: 12, ((T([2, 64, 512, 512], f16), T([2, 1, 1, 512], f16)), {}) +cnt: 72, ((T([2, 512, 4096], f16), T([2, 512, 4096], f16)), {}) +cnt: 36, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) +cnt: 12, ((T([2, 512, 16384], f16), 1.0), {}) +cnt: 1, ((T([2, 512, 128], f16), 1.0), {}) +cnt: 99, ((T([4096], f16), T([4096], f16)), {}) +cnt: 11, ((T([4096, 16384], f16), T([4096, 16384], f16)), {}) +cnt: 11, ((T([16384], f16), T([16384], f16)), {}) +cnt: 11, ((T([16384, 4096], f16), T([16384, 4096], f16)), {}) +cnt: 44, ((T([4096, 4096], f16), T([4096, 4096], f16)), {}) +cnt: 1, ((T([30000, 128], f16), T([30000, 128], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([2, 512, 128], f16), T([1, 512, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([4096], f16), T([1024, 128], f16), T([128, 4096], f16, stride=(1, 128))), {}) +cnt: 48, ((T([4096], f16), T([1024, 4096], f16), T([4096, 4096], f16, stride=(1, 4096))), {}) +cnt: 12, ((T([16384], f16), T([1024, 4096], f16), T([4096, 16384], f16, stride=(1, 4096))), {}) +cnt: 12, ((T([4096], f16), T([1024, 16384], f16), T([16384, 4096], f16, stride=(1, 16384))), {}) +cnt: 1, ((T([128], f16), T([1024, 4096], f16), T([4096, 128], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([30000], f16), T([1024, 128], f16), T([128, 30000], f16, stride=(1, 128))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([128, 512, 64], f16), T([128, 64, 512], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16), T([128, 512, 64], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16, stride=(262144, 1, 512)), T([128, 512, 64], f16)), {}) +cnt: 12, ((T([128, 512, 64], f16), T([128, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([128, 64, 512], f16, stride=(32768, 1, 64)), T([128, 512, 512], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16), T([128, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 2, ((T([2, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([2, 512], i64), T([2, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([2, 64, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30000, 128], f16), T([2, 512], i64), 0), {}) +cnt: 1, ((T([2, 128], f16), T([2, 512], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 128], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([2, 512, 128], f16), T([2, 512], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([2, 512, 128], f16), T([2, 512], i64), 30000, 0, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 30000], f16), T([30000, 128], f16)), {}) +cnt: 1, ((T([30000, 1024], f16, stride=(1, 30000)), T([1024, 128], f16)), {}) +cnt: 1, ((T([1024, 128], f16), T([128, 4096], f16)), {}) +cnt: 1, ((T([128, 1024], f16, stride=(1, 128)), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 4096], f16), T([4096, 16384], f16)), {}) +cnt: 12, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 16384], f16)), {}) +cnt: 12, ((T([1024, 16384], f16), T([16384, 4096], f16)), {}) +cnt: 12, ((T([16384, 1024], f16, stride=(1, 16384)), T([1024, 4096], f16)), {}) +cnt: 48, ((T([1024, 4096], f16), T([4096, 4096], f16)), {}) +cnt: 48, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 4096], f16)), {}) +cnt: 1, ((T([1024, 4096], f16), T([4096, 128], f16)), {}) +cnt: 1, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 128], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([2, 512, 128], f16), 3.0), {}) +cnt: 12, ((T([2, 512, 16384], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([2, 1, 1, 512], f16), -65504.0), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.5), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.044715), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.7978845608028654), {}) +cnt: 48, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) +cnt: 2, ((T([2, 512, 128], f16), 0.5), {}) +cnt: 2, ((T([2, 512, 128], f16), 0.044715), {}) +cnt: 2, ((T([2, 512, 128], f16), 0.7978845608028654), {}) +cnt: 4, ((T([2, 512, 128], f16), T([2, 512, 128], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 2, ((T([2, 512, 128], f16), [128], T([128], f16), T([128], f16), 1e-12), {}) +cnt: 24, ((T([2, 512, 4096], f16), [4096], T([4096], f16), T([4096], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 2, ((T([2, 512, 128], f16), T([2, 512, 128], f16), [128], T([2, 512, 1], f32), T([2, 512, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 24, ((T([2, 512, 4096], f16), T([2, 512, 4096], f16), [4096], T([2, 512, 1], f32), T([2, 512, 1], f32), T([4096], f16), T([4096], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 30000], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 30000], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([2, 512, 16384], f16), 3.0), {}) +cnt: 1, ((T([2, 512, 128], f16), 3.0), {}) +cnt: 1, ((T([2, 512, 128], f16), 2.0), {}) +cnt: 12, ((T([2, 512, 16384], f16), 2.0), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([2, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 30000], f16), [0], True), {}) +cnt: 1, ((T([1024, 128], f16), [0], True), {}) +cnt: 61, ((T([1024, 4096], f16), [0], True), {}) +cnt: 12, ((T([1024, 16384], f16), [0], True), {}) +cnt: 1, ((T([2, 512, 128], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 12, ((T([2, 512, 16384], f16),), {}) +cnt: 1, ((T([2, 512, 128], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([2, 512, 128], f16), T([2, 512, 128], f16)), {}) +cnt: 12, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForQuestionAnswering_training.txt new file mode 100644 index 000000000..8e25df927 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AlbertForQuestionAnswering_training.txt @@ -0,0 +1,110 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([2, 512], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([2, 512], f16), T([2, 512], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([2, 64, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([2, 64, 512, 512], f16), T([2, 64, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([2, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([2, 64, 512, 64], f16), [128, 512, 64]), {}) +cnt: 12, ((T([2, 64, 64, 512], f16), [128, 64, 512]), {}) +cnt: 12, ((T([128, 512, 512], f16), [2, 64, 512, 512]), {}) +cnt: 12, ((T([128, 512, 64], f16), [2, 64, 512, 64]), {}) +cnt: 36, ((T([2, 512, 64, 64], f16), [2, 512, 4096]), {}) +cnt: 12, ((T([2, 512, 4096], f16), [1024, 4096]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([2, 512, 128], f16), T([2, 512, 128], f16)), {}) +cnt: 12, ((T([2, 64, 512, 512], f16), T([2, 1, 1, 512], f16)), {}) +cnt: 72, ((T([2, 512, 4096], f16), T([2, 512, 4096], f16)), {}) +cnt: 36, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) +cnt: 12, ((T([2, 512, 16384], f16), 1.0), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +cnt: 99, ((T([4096], f16), T([4096], f16)), {}) +cnt: 11, ((T([4096, 16384], f16), T([4096, 16384], f16)), {}) +cnt: 11, ((T([16384], f16), T([16384], f16)), {}) +cnt: 11, ((T([16384, 4096], f16), T([16384, 4096], f16)), {}) +cnt: 44, ((T([4096, 4096], f16), T([4096, 4096], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([2, 512, 128], f16), T([1, 512, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([4096], f16), T([1024, 128], f16), T([128, 4096], f16, stride=(1, 128))), {}) +cnt: 48, ((T([4096], f16), T([1024, 4096], f16), T([4096, 4096], f16, stride=(1, 4096))), {}) +cnt: 12, ((T([16384], f16), T([1024, 4096], f16), T([4096, 16384], f16, stride=(1, 4096))), {}) +cnt: 12, ((T([4096], f16), T([1024, 16384], f16), T([16384, 4096], f16, stride=(1, 16384))), {}) +cnt: 1, ((T([2], f16), T([1024, 4096], f16), T([4096, 2], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([128, 512, 64], f16), T([128, 64, 512], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16), T([128, 512, 64], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16, stride=(262144, 1, 512)), T([128, 512, 64], f16)), {}) +cnt: 12, ((T([128, 512, 64], f16), T([128, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([128, 64, 512], f16, stride=(32768, 1, 64)), T([128, 512, 512], f16)), {}) +cnt: 12, ((T([128, 512, 512], f16), T([128, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.cat.default +cnt: 1, (([T([2, 512, 1], f16), T([2, 512, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([2], i64), 0, 512), {}) +Operator: aten.clone.default +cnt: 1, ((T([2, 512], i64),), {}) +cnt: 2, ((T([2], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([2, 512], i64), T([2, 512], i64)), {}) +cnt: 2, ((T([2], i64), T([2], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([2, 64, 512, 512], f16), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30000, 128], f16), T([2, 512], i64), 0), {}) +cnt: 1, ((T([2, 128], f16), T([2, 512], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 128], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([2, 512, 128], f16), T([2, 512], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([2, 512, 128], f16), T([2, 512], i64), 30000, 0, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 2], f16), T([2, 4096], f16)), {}) +cnt: 1, ((T([2, 1024], f16, stride=(1, 2)), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 4096], f16), T([4096, 16384], f16)), {}) +cnt: 12, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 16384], f16)), {}) +cnt: 12, ((T([1024, 16384], f16), T([16384, 4096], f16)), {}) +cnt: 12, ((T([16384, 1024], f16, stride=(1, 16384)), T([1024, 4096], f16)), {}) +cnt: 48, ((T([1024, 4096], f16), T([4096, 4096], f16)), {}) +cnt: 48, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 4096], f16)), {}) +cnt: 1, ((T([1024, 4096], f16), T([4096, 128], f16)), {}) +cnt: 1, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 128], f16)), {}) +Operator: aten.mul.Scalar +cnt: 12, ((T([2, 512, 16384], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([2, 1, 1, 512], f16), -65504.0), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.5), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.044715), {}) +cnt: 24, ((T([2, 512, 16384], f16), 0.7978845608028654), {}) +cnt: 48, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([2, 512, 128], f16), [128], T([128], f16), T([128], f16), 1e-12), {}) +cnt: 24, ((T([2, 512, 4096], f16), [4096], T([4096], f16), T([4096], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 24, ((T([2, 512, 4096], f16), T([2, 512, 4096], f16), [4096], T([2, 512, 1], f32), T([2, 512, 1], f32), T([4096], f16), T([4096], f16), [True, True, True]), {}) +cnt: 1, ((T([2, 512, 128], f16), T([2, 512, 128], f16), [128], T([2, 512, 1], f32), T([2, 512, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([2, 512], f16), T([2], i64), None, 1, 512, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([2, 512], f16), T([2], i64), None, 1, 512), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([2, 512, 16384], f16), 3.0), {}) +cnt: 12, ((T([2, 512, 16384], f16), 2.0), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([2, 1, 1, 512], f16), 1.0), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([2, 512, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 2], f16), [0], True), {}) +cnt: 61, ((T([1024, 4096], f16), [0], True), {}) +cnt: 12, ((T([1024, 16384], f16), [0], True), {}) +cnt: 1, ((T([2, 512, 128], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 12, ((T([2, 512, 16384], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 12, ((T([2, 512, 16384], f16), T([2, 512, 16384], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AllenaiLongformerBase_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AllenaiLongformerBase_training.txt new file mode 100644 index 000000000..5cf276860 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/AllenaiLongformerBase_training.txt @@ -0,0 +1,186 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50265], f16), T([1024, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([1, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), -1, True), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([1, 1024, 12, 513], f32), T([1, 1024, 12, 513], f32), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 1, 1, 1024], f32),), {'dtype': f16}) +cnt: 1, ((T([1, 1024], b8),), {'dtype': i32}) +cnt: 1, ((T([1, 1024], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([1, 1024], i32),), {'dtype': i64}) +cnt: 12, ((T([1, 1024, 1, 1], b8),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 12, ((T([1, 1024, 12, 513], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 12, ((T([1, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([12, 3, 512, 64, 1], f16), [36, 512, 64]), {}) +cnt: 12, ((T([12, 3, 64, 512, 1], f16), [36, 64, 512]), {}) +cnt: 12, ((T([12, 4, 768, 64, 1], f16), [48, 768, 64]), {}) +cnt: 24, ((T([1024, 1, 12, 64], f16), [1024, 1, 768]), {}) +cnt: 12, ((T([12, 4, 256, 1, 64], f16), [48, 256, 64]), {}) +cnt: 12, ((T([12, 4, 768, 64], i64), [2359296]), {}) +cnt: 12, ((T([12, 3, 512, 64], f16), [1179648]), {}) +cnt: 24, ((T([12, 3, 512, 64], i64), [1179648]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([1, 1024], i64), 1), {}) +cnt: 50, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 36, ((T([12, 3, 512, 513], f16), T([12, 3, 512, 513], f16)), {}) +cnt: 24, ((T([1024, 1, 768], f16), T([1024, 1, 768], f16)), {}) +cnt: 1, ((T([50265, 768], f16), T([50265, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 12, ((T([1, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), T([1, 1024, 1, 513], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([1024, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([1024, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([1024, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([50265], f16), T([1024, 768], f16), T([768, 50265], f16, stride=(1, 768))), {}) +Operator: aten.any.default +cnt: 1, ((T([1024], b8),), {}) +Operator: aten.bmm.default +cnt: 12, ((T([36, 512, 64], f16), T([36, 64, 512], f16)), {}) +cnt: 12, ((T([48, 256, 768], f16, stride=(197120, 769, 1)), T([48, 768, 64], f16)), {}) +cnt: 12, ((T([48, 768, 256], f16, stride=(197120, 1, 769)), T([48, 256, 64], f16)), {}) +cnt: 12, ((T([48, 256, 64], f16), T([48, 64, 768], f16, stride=(49152, 1, 64))), {}) +cnt: 12, ((T([36, 64, 512], f16, stride=(32768, 1, 64)), T([36, 512, 512], f16)), {}) +cnt: 12, ((T([36, 512, 512], f16), T([36, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 1024], i64),), {}) +Operator: aten.constant_pad_nd.default +cnt: 12, ((T([12, 3, 512, 512], f16), [0, 0, 0, 1], 0.0), {}) +cnt: 12, ((T([1, 3, 512, 512], f16), [0, 0, 0, 1], 0.0), {}) +cnt: 12, ((T([12, 1024, 64], f16, stride=(64, 768, 1)), [0, 0, 256, 256], -1.0), {}) +cnt: 12, ((T([12, 4, 256, 513], f16, stride=(513, 1575936, 6156, 1)), [0, 257], 0.0), {}) +cnt: 12, ((T([12, 4, 256, 770], f16), [0, -257]), {}) +cnt: 12, ((T([12, 1536, 64], f16), [0, 0, -256, -256]), {}) +cnt: 12, ((T([12, 3, 513, 512], f16), [0, 0, 0, -1]), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 1024], i64), T([1, 1024], i64)), {}) +cnt: 12, ((T([12, 3, 256, 257], f16, stride=(525312, 131328, 513, 1)), T([12, 3, 256, 257], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([12, 256, 257], f16, stride=(525312, 513, 1)), T([12, 256, 257], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([12, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([12, 3, 256, 256], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([12, 255, 255], f16, stride=(525312, 513, 1)), T([12, 255, 255], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([1, 3, 256, 257], f16, stride=(525312, 131328, 513, 1)), T([1, 3, 256, 257], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([1, 256, 257], f16, stride=(525312, 513, 1)), T([1, 256, 257], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([1, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([1, 3, 256, 256], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([1, 255, 255], f16, stride=(525312, 513, 1)), T([1, 255, 255], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([1024, 12, 513], f16, stride=(513, 525312, 1)), T([1024, 12, 513], f16)), {}) +cnt: 84, ((T([12, 4, 256, 513], f16), T([12, 4, 256, 513], f16)), {}) +cnt: 12, ((T([1, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), T([1, 1024, 12, 513], f16)), {}) +cnt: 24, ((T([1, 256, 12, 257], f16, stride=(6303744, 513, 525312, 1)), T([1, 256, 12, 257], f16)), {}) +cnt: 12, ((T([12, 255, 255], f16, stride=(525312, 513, 1)), T([12, 255, 255], f16)), {}) +cnt: 12, ((T([12, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([12, 3, 256, 256], f16)), {}) +cnt: 12, ((T([12, 256, 257], f16, stride=(525312, 513, 1)), T([12, 256, 257], f16)), {}) +cnt: 24, ((T([1024, 768], f16), T([1024, 768], f16)), {}) +cnt: 12, ((T([1024, 1, 768], f16), T([1024, 1, 768], f16)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([1, 1024], i32), 1), {}) +Operator: aten.div.Tensor +cnt: 12, ((T([1024, 1, 768], f16), 8.0), {}) +Operator: aten.div_.Tensor +cnt: 12, ((T([1024, 1, 768], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 768], f16), T([1, 1024], i64), 1), {}) +cnt: 1, ((T([4098, 768], f16), T([1, 1024], i64), 1), {}) +cnt: 1, ((T([1, 768], f16), T([1, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 1, -1, False), {}) +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 4098, 1, False), {}) +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 50265, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 24, ((T([1, 256, 12, 257], f16, stride=(65792, 257, 0, 1)), 1), {}) +cnt: 24, ((T([1, 256, 1, 257], f16), 1), {}) +Operator: aten.flip.default +cnt: 24, ((T([256, 257], f16), [0]), {}) +cnt: 24, ((T([1, 256, 1, 257], f16), [1, 3]), {}) +Operator: aten.gelu.default +cnt: 12, ((T([1, 1024, 3072], f16),), {}) +cnt: 1, ((T([1, 1024, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 12, ((T([1, 1024, 3072], f16), T([1, 1024, 3072], f16)), {}) +Operator: aten.gt.Scalar +cnt: 1, ((T([1, 1024], f16), 0), {}) +Operator: aten.index_add_.default +cnt: 12, ((T([1179648], f16), 0, T([2359296], i64), T([2359296], f16)), {}) +cnt: 24, ((T([786432], f16), 0, T([1179648], i64), T([1179648], f16)), {}) +Operator: aten.lt.Scalar +cnt: 1, ((T([1, 1024], f16), 0), {}) +Operator: aten.masked_fill.Scalar +cnt: 12, ((T([1, 1024, 1, 1], f16), T([1, 1024, 1, 1], b8), -65504.0), {}) +cnt: 12, ((T([1, 1024, 12, 513], f32), T([1, 1024, 1, 1], b8), 0.0), {}) +cnt: 12, ((T([1, 1024, 12, 513], f32, stride=(6303744, 513, 525312, 1)), T([1, 1024, 1, 1], b8), 0), {}) +cnt: 24, ((T([1, 256, 12, 257], f16), T([1, 256, 12, 257], b8), 0), {}) +Operator: aten.masked_fill_.Scalar +cnt: 24, ((T([1, 256, 12, 257], f16, stride=(6303744, 513, 525312, 1)), T([1, 256, 12, 257], b8), -inf), {}) +cnt: 24, ((T([1, 256, 1, 257], f16, stride=(525312, 513, 525312, 1)), T([1, 256, 1, 257], b8), -inf), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 50265], f16), T([50265, 768], f16)), {}) +cnt: 1, ((T([50265, 1024], f16, stride=(1, 50265)), T([1024, 768], f16)), {}) +cnt: 49, ((T([1024, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 768], f16)), {}) +cnt: 12, ((T([1024, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 3072], f16)), {}) +cnt: 12, ((T([1024, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 1024], f16, stride=(1, 3072)), T([1024, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([1, 1, 1, 1024], f16), -65504.0), {}) +cnt: 1, ((T([1, 1024], i32), T([1, 1024], i32)), {}) +cnt: 12, ((T([1, 3, 512, 1], f16, stride=(1024, 256, 1, 1)), T([1, 3, 1, 512], f16, stride=(1024, 256, 1, 1))), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([1, 1024, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16), [768], T([1, 1024, 1], f32), T([1, 1024, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([1, 1024], i64), 1), {}) +cnt: 12, ((T([1, 1024], f16), 0), {}) +Operator: aten.new_empty.default +cnt: 12, ((T([12, 3, 512, 513], f16), [12, 4, 256, 513]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([1, 3, 512, 513], f16), [1, 4, 256, 513]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_empty_strided.default +cnt: 84, ((T([12, 4, 256, 513], f16), [12, 4, 256, 513], [525312, 131328, 513, 1]), {}) +cnt: 12, ((T([1024, 768], f16), [1024, 768], [768, 1]), {}) +Operator: aten.new_ones.default +cnt: 12, ((T([1, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), [256, 257]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([1, 1024, 1, 1], f16), [1, 1024, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([1, 1024, 1, 513], f16), [256, 257]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 12, ((T([12, 4, 768, 64], f16), [1179648]), {}) +cnt: 12, ((T([1024, 12, 513], f16), [6303744]), {}) +cnt: 12, ((T([12, 3, 512, 64], f16, stride=(98304, 32768, 1, 512)), [786432]), {}) +cnt: 12, ((T([12, 3, 512, 64], f16), [786432]), {}) +cnt: 12, ((T([1024, 768], f16), [786432]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50265], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50265], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([1, 1, 1, 1024], f16), 1.0), {}) +Operator: aten.select_backward.default +cnt: 12, ((T([12, 512, 513], f16), [12, 3, 512, 513], 1, 0), {}) +cnt: 12, ((T([12, 512, 513], f16), [12, 3, 512, 513], 1, -1), {}) +Operator: aten.slice_backward.default +cnt: 12, ((T([12, 4, 256, 768], f16), [12, 4, 256, 769], 3, 0, -1, 1), {}) +cnt: 12, ((T([12, 4, 256, 769], f16), [12, 4, 256, 769], 2, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 4, 256, 769], f16), [12, 4, 256, 769], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 4, 256, 769], f16), [12, 4, 256, 769], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 4, 196864], f16), [12, 4, 197120], 2, 0, -256, 1), {}) +cnt: 12, ((T([12, 4, 197120], f16), [12, 4, 197120], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 4, 197120], f16), [12, 4, 197120], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 255, 255], f16), [12, 255, 513], 2, -255, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 255, 513], f16), [12, 512, 513], 1, 0, 255, 1), {}) +cnt: 48, ((T([12, 3, 512, 513], f16), [12, 3, 512, 513], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 3, 256, 256], f16), [12, 3, 256, 513], 3, 257, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 3, 256, 513], f16), [12, 3, 512, 513], 2, -257, -1, 1), {}) +cnt: 24, ((T([12, 3, 512, 513], f16), [12, 3, 512, 513], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 256, 257], f16), [12, 256, 513], 2, 0, 257, 1), {}) +cnt: 12, ((T([12, 256, 513], f16), [12, 512, 513], 1, 256, 9223372036854775807, 1), {}) +cnt: 12, ((T([12, 3, 256, 257], f16), [12, 3, 256, 513], 3, 0, 257, 1), {}) +cnt: 12, ((T([12, 3, 256, 513], f16), [12, 3, 512, 513], 2, 0, 256, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 50265], f16), [0], True), {}) +cnt: 61, ((T([1024, 768], f16), [0], True), {}) +cnt: 12, ((T([1024, 3072], f16), [0], True), {}) +Operator: aten.tril.default +cnt: 24, ((T([256, 257], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForCausalLM_training.txt new file mode 100644 index 000000000..25d8b0b7a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForCausalLM_training.txt @@ -0,0 +1,73 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([4096, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([4096, 50265], f16), T([4096, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 1024, 1024], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 1024, 1024], f16), T([64, 1024, 1024], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1024, 1024], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 1024, 1024], f16, stride=(0, 1048576, 1024, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 1024, 16, 64], f16), [4, 1024, 1024]), {}) +cnt: 1, ((T([4096, 50265], f16), [4, 1024, 50265]), {}) +cnt: 12, ((T([4, 16, 1024, 64], f16), [64, 1024, 64]), {}) +cnt: 12, ((T([4, 1024, 1024], f16), [4096, 1024]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([1024], i64), 1), {}) +cnt: 1, ((T([4, 1024], i64, stride=(0, 1)), 2), {}) +cnt: 73, ((T([4, 1024, 1024], f16), T([4, 1024, 1024], f16)), {}) +cnt: 12, ((T([4, 16, 1024, 1024], f16), T([4, 1, 1024, 1024], f16)), {}) +cnt: 1, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([1024], f16), T([4096, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([4096], f16), T([4096, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([1024], f16), T([4096, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([64, 1024, 64], f16), T([64, 64, 1024], f16, stride=(65536, 1, 64))), {}) +cnt: 24, ((T([64, 1024, 1024], f16), T([64, 1024, 64], f16)), {}) +cnt: 12, ((T([64, 1024, 1024], f16, stride=(1048576, 1, 1024)), T([64, 1024, 64], f16)), {}) +cnt: 12, ((T([64, 64, 1024], f16, stride=(65536, 1, 64)), T([64, 1024, 1024], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 1024], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 1024], i64), T([4, 1024], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 1024], f16), T([4, 1024], i64), 1), {}) +cnt: 1, ((T([1026, 1024], f16), T([4, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([4, 1024, 1024], f16), T([4, 1024], i64), 1026, -1, False), {}) +cnt: 1, ((T([4, 1024, 1024], f16), T([4, 1024], i64), 50265, 1, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 1024, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([4, 1024, 4096], f16), T([4, 1024, 4096], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([1024], i64), T([1024, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([1024, 1024], f32), T([1024, 1024], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 4096], f16, stride=(1, 50265)), T([4096, 1024], f16)), {}) +cnt: 1, ((T([4096, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 12, ((T([4096, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 4096], f16, stride=(1, 1024)), T([4096, 4096], f16)), {}) +cnt: 12, ((T([4096, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 12, ((T([4096, 4096], f16, stride=(1, 4096)), T([4096, 1024], f16)), {}) +cnt: 48, ((T([4096, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 4096], f16, stride=(1, 1024)), T([4096, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([4, 1024, 1024], f16), 1.0), {}) +cnt: 24, ((T([4, 1024, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([4, 1024, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([4, 1024, 1024], f16), T([4, 1024, 1024], f16), [1024], T([4, 1024, 1], f32), T([4, 1024, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([4096, 50265], f16), T([4096], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([4096, 50265], f16), T([4096], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 60, ((T([4096, 1024], f16), [0], True), {}) +cnt: 12, ((T([4096, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForConditionalGeneration_training.txt new file mode 100644 index 000000000..0e388c606 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BartForConditionalGeneration_training.txt @@ -0,0 +1,89 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 50265], f16), T([2048, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 36, ((T([32, 1024, 1024], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 36, ((T([32, 1024, 1024], f16), T([32, 1024, 1024], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1024, 1024], f32),), {'dtype': f16}) +cnt: 1, ((T([2, 1, 1024, 1024], f16, stride=(0, 1048576, 1024, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 108, ((T([2, 1024, 16, 64], f16), [2, 1024, 1024]), {}) +cnt: 1, ((T([2048, 50265], f16), [2, 1024, 50265]), {}) +cnt: 36, ((T([2, 16, 1024, 64], f16), [32, 1024, 64]), {}) +cnt: 36, ((T([2, 1024, 1024], f16), [2048, 1024]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([2, 1024], i64, stride=(0, 1)), 2), {}) +cnt: 193, ((T([2, 1024, 1024], f16), T([2, 1024, 1024], f16)), {}) +cnt: 1, ((T([1024], i64), 1), {}) +cnt: 12, ((T([2, 16, 1024, 1024], f16), T([2, 1, 1024, 1024], f16)), {}) +cnt: 1, ((T([2, 1024, 50265], f16), T([1, 50265], f16)), {}) +cnt: 2, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 144, ((T([1024], f16), T([2048, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([2048, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([2048, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.any.default +cnt: 24, ((T([2, 1024, 1024], b8),), {}) +Operator: aten.bmm.default +cnt: 72, ((T([32, 1024, 64], f16), T([32, 64, 1024], f16, stride=(65536, 1, 64))), {}) +cnt: 72, ((T([32, 1024, 1024], f16), T([32, 1024, 64], f16)), {}) +cnt: 36, ((T([32, 1024, 1024], f16, stride=(1048576, 1, 1024)), T([32, 1024, 64], f16)), {}) +cnt: 36, ((T([32, 64, 1024], f16, stride=(65536, 1, 64)), T([32, 1024, 1024], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([2, 1024], i64),), {}) +cnt: 1, ((T([2, 1023], i64, stride=(1024, 1)),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([2, 1024], i64), T([2, 1024], i64)), {}) +cnt: 1, ((T([2, 1023], i64, stride=(1024, 1)), T([2, 1023], i64)), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50265, 1024], f16), T([2, 1024], i64), 1), {}) +cnt: 2, ((T([1026, 1024], f16), T([2, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([2, 1024, 1024], f16), T([2, 1024], i64), 1026, -1, False), {}) +cnt: 2, ((T([2, 1024, 1024], f16), T([2, 1024], i64), 50265, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([2, 1024], i64), -100), {}) +Operator: aten.fill_.Tensor +cnt: 1, ((T([2], i64, stride=(1024,)), T([], i64)), {}) +Operator: aten.gelu.default +cnt: 24, ((T([2, 1024, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([2, 1024, 4096], f16), T([2, 1024, 4096], f16)), {}) +Operator: aten.isinf.default +cnt: 12, ((T([2, 1024, 1024], f16),), {}) +Operator: aten.isnan.default +cnt: 12, ((T([2, 1024, 1024], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([1024], i64), T([1024, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([2, 1024], i64), T([2, 1024], b8), 1), {}) +cnt: 1, ((T([1024, 1024], f32), T([1024, 1024], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 2048], f16, stride=(1, 50265)), T([2048, 1024], f16)), {}) +cnt: 1, ((T([2048, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 24, ((T([2048, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 2048], f16, stride=(1, 1024)), T([2048, 4096], f16)), {}) +cnt: 24, ((T([2048, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 2048], f16, stride=(1, 4096)), T([2048, 1024], f16)), {}) +cnt: 144, ((T([2048, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 144, ((T([1024, 2048], f16, stride=(1, 1024)), T([2048, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([2, 1024, 1024], f16), 1.0), {}) +cnt: 72, ((T([2, 1024, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 62, ((T([2, 1024, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 62, ((T([2, 1024, 1024], f16), T([2, 1024, 1024], f16), [1024], T([2, 1024, 1], f32), T([2, 1024, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([2, 1024], i64), [2, 1024]), {'dtype': i64, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 50265], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 50265], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 168, ((T([2048, 1024], f16), [0], True), {}) +cnt: 24, ((T([2048, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForMaskedLM_training.txt new file mode 100644 index 000000000..5cd41366b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForMaskedLM_training.txt @@ -0,0 +1,81 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([8192, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([8192, 30522], f16), T([8192, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([64, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 128, 64], f16), [768, 128, 64]), {}) +cnt: 12, ((T([64, 12, 64, 128], f16), [768, 64, 128]), {}) +cnt: 12, ((T([768, 128, 128], f16), [64, 12, 128, 128]), {}) +cnt: 12, ((T([768, 128, 64], f16), [64, 12, 128, 64]), {}) +cnt: 24, ((T([64, 128, 12, 64], f16), [64, 128, 768]), {}) +cnt: 12, ((T([64, 128, 768], f16), [8192, 768]), {}) +Operator: aten.add.Tensor +cnt: 73, ((T([64, 128, 768], f16), T([64, 128, 768], f16)), {}) +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 1, 1, 128], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([64, 128, 768], f16), T([1, 128, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([8192, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([8192, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([8192, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([8192, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16, stride=(16384, 1, 128)), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([768, 64, 128], f16, stride=(8192, 1, 64)), T([768, 128, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.clone.default +cnt: 2, ((T([64, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([64, 128], i64), T([64, 128], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([64, 12, 128, 128], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([64, 128], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([64, 128], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 768], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 768], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 128, 3072], f16),), {}) +cnt: 1, ((T([64, 128, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([64, 128, 768], f16), T([64, 128, 768], f16)), {}) +cnt: 12, ((T([64, 128, 3072], f16), T([64, 128, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 30522], f16), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 8192], f16, stride=(1, 30522)), T([8192, 768], f16)), {}) +cnt: 49, ((T([8192, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 768], f16)), {}) +cnt: 12, ((T([8192, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 3072], f16)), {}) +cnt: 12, ((T([8192, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 8192], f16, stride=(1, 3072)), T([8192, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([64, 1, 1, 128], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([64, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([64, 128, 768], f16), T([64, 128, 768], f16), [768], T([64, 128, 1], f32), T([64, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([8192, 30522], f16), T([8192], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([8192, 30522], f16), T([8192], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([64, 1, 1, 128], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8192, 30522], f16), [0], True), {}) +cnt: 61, ((T([8192, 768], f16), [0], True), {}) +cnt: 12, ((T([8192, 3072], f16), [0], True), {}) +cnt: 1, ((T([64, 128, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForQuestionAnswering_training.txt new file mode 100644 index 000000000..463fb6ada --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BertForQuestionAnswering_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([64, 128], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([64, 128], f16), T([64, 128], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([64, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 128, 64], f16), [768, 128, 64]), {}) +cnt: 12, ((T([64, 12, 64, 128], f16), [768, 64, 128]), {}) +cnt: 12, ((T([768, 128, 128], f16), [64, 12, 128, 128]), {}) +cnt: 12, ((T([768, 128, 64], f16), [64, 12, 128, 64]), {}) +cnt: 24, ((T([64, 128, 12, 64], f16), [64, 128, 768]), {}) +cnt: 12, ((T([64, 128, 768], f16), [8192, 768]), {}) +Operator: aten.add.Tensor +cnt: 73, ((T([64, 128, 768], f16), T([64, 128, 768], f16)), {}) +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 1, 1, 128], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([64, 128, 768], f16), T([1, 128, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([8192, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([8192, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([8192, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([2], f16), T([8192, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16, stride=(16384, 1, 128)), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([768, 64, 128], f16, stride=(8192, 1, 64)), T([768, 128, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 128, 1], f16), T([64, 128, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([64], i64), 0, 128), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 128], i64),), {}) +cnt: 2, ((T([64], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 128], i64), T([64, 128], i64)), {}) +cnt: 2, ((T([64], i64), T([64], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([64, 12, 128, 128], f16), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([64, 128], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([64, 128], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 768], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 768], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 128, 3072], f16), T([64, 128, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 8192], f16, stride=(1, 2)), T([8192, 768], f16)), {}) +cnt: 12, ((T([8192, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 3072], f16)), {}) +cnt: 12, ((T([8192, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 8192], f16, stride=(1, 3072)), T([8192, 768], f16)), {}) +cnt: 48, ((T([8192, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([64, 1, 1, 128], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([64, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 128, 768], f16), T([64, 128, 768], f16), [768], T([64, 128, 1], f32), T([64, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([64, 128], f16), T([64], i64), None, 1, 128, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([64, 128], f16), T([64], i64), None, 1, 128), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([64, 1, 1, 128], f16), 1.0), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([64, 128, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8192, 2], f16), [0], True), {}) +cnt: 60, ((T([8192, 768], f16), [0], True), {}) +cnt: 12, ((T([8192, 3072], f16), [0], True), {}) +cnt: 1, ((T([64, 128, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BigBird_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BigBird_training.txt new file mode 100644 index 000000000..7bc500b33 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BigBird_training.txt @@ -0,0 +1,237 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50358], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50358], f16), T([1024, 50358], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1, 12, 64, 1024], f16), -1, False), {}) +cnt: 24, ((T([1, 12, 64, 448], f16), -1, False), {}) +cnt: 12, ((T([1, 12, 12, 64, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1, 12, 64, 1024], f16), T([1, 12, 64, 1024], f16), -1, f16), {}) +cnt: 24, ((T([1, 12, 64, 448], f16), T([1, 12, 64, 448], f16), -1, f16), {}) +cnt: 12, ((T([1, 12, 12, 64, 512], f16), T([1, 12, 12, 64, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 12, ((T([1, 1, 12, 64, 192], f32),), {'dtype': f16}) +cnt: 12, ((T([1, 1, 1024, 1], f32),), {'dtype': f16}) +cnt: 12, ((T([1, 1, 1, 1024], f32),), {'dtype': f16}) +cnt: 12, ((T([12, 14, 3], i32),), {'dtype': i64, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 24, ((T([1, 12, 16, 64, 64], f16), [192, 64, 64]), {}) +cnt: 24, ((T([1, 12, 12, 64, 64], f16), [144, 64, 64]), {}) +cnt: 24, ((T([1, 12, 12, 192, 64], f16), [144, 192, 64]), {}) +cnt: 24, ((T([1, 1024, 12, 64], f16), [1, 1024, 768]), {}) +Operator: aten.add.Tensor +cnt: 76, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 24, ((T([504], i64), T([504], i64)), {}) +cnt: 36, ((T([1, 1024, 3072], f16), T([1, 1024, 3072], f16)), {}) +cnt: 12, ((T([1, 1024, 3072], f16), 1.0), {}) +cnt: 1, ((T([1, 1024, 768], f16), 1.0), {}) +cnt: 360, ((T([1, 12, 16, 64, 64], f16), T([1, 12, 16, 64, 64], f16)), {}) +cnt: 36, ((T([1, 12, 12, 64, 512], f16), T([1, 12, 12, 64, 512], f16)), {}) +cnt: 48, ((T([1, 12, 14, 192, 64], f16), T([1, 12, 14, 192, 64], f16)), {}) +cnt: 36, ((T([1, 12, 12, 64, 64], f16), T([1, 12, 12, 64, 64], f16)), {}) +cnt: 24, ((T([1, 12, 1024, 64], f16), T([1, 12, 1024, 64], f16)), {}) +cnt: 12, ((T([1, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024)), T([1, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024))), {}) +cnt: 12, ((T([1, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024)), T([1, 12, 1024, 64], f16)), {}) +cnt: 1, ((T([50358, 768], f16), T([50358, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 24, ((T([1, 12, 64, 1024], f16), T([1, 1, 1, 1024], f16)), {}) +cnt: 24, ((T([1, 12, 64, 448], f16), T([1, 12, 64, 448], f32)), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f16), T([1, 1, 12, 64, 192], f16)), {}) +cnt: 24, ((T([1, 12, 12, 64, 64], f16), T([1, 1, 1, 1, 64], f16)), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f16), T([1, 12, 12, 64, 192], f32)), {}) +cnt: 36, ((T([1, 12, 12, 64, 64], f16), T([1, 12, 12, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([1024, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([1024, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([1024, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([1, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50358], f16), T([1024, 768], f16), T([768, 50358], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 48, ((T([12, 64, 64], f16, stride=(64, 768, 1)), T([12, 64, 1024], f16, stride=(64, 1, 768))), {}) +cnt: 48, ((T([12, 64, 1024], f16), T([12, 1024, 64], f16, stride=(64, 768, 1))), {}) +cnt: 48, ((T([12, 64, 64], f16, stride=(64, 768, 1)), T([12, 64, 448], f16, stride=(28672, 1, 64))), {}) +cnt: 48, ((T([12, 64, 448], f16), T([12, 448, 64], f16)), {}) +cnt: 48, ((T([144, 64, 64], f16), T([144, 64, 192], f16, stride=(12288, 1, 64))), {}) +cnt: 24, ((T([12, 768, 64], f16, stride=(64, 768, 1)), T([12, 64, 64], f16, stride=(64, 1, 768))), {}) +cnt: 24, ((T([144, 64, 192], f16, stride=(32768, 512, 1)), T([144, 192, 64], f16)), {}) +cnt: 24, ((T([12, 768, 64], f16, stride=(393216, 512, 1)), T([12, 64, 64], f16, stride=(64, 768, 1))), {}) +cnt: 24, ((T([12, 1024, 64], f16, stride=(65536, 1, 1024)), T([12, 64, 64], f16, stride=(64, 768, 1))), {}) +cnt: 24, ((T([12, 64, 64], f16, stride=(64, 1, 768)), T([12, 64, 1024], f16)), {}) +cnt: 24, ((T([12, 448, 64], f16, stride=(28672, 1, 448)), T([12, 64, 64], f16, stride=(64, 768, 1))), {}) +cnt: 24, ((T([12, 64, 64], f16, stride=(64, 1, 768)), T([12, 64, 448], f16)), {}) +cnt: 24, ((T([12, 64, 768], f16, stride=(393216, 1, 512)), T([12, 768, 64], f16)), {}) +cnt: 24, ((T([12, 768, 64], f16), T([12, 64, 64], f16, stride=(64, 1, 768))), {}) +cnt: 24, ((T([144, 192, 64], f16, stride=(32768, 1, 512)), T([144, 64, 64], f16)), {}) +cnt: 24, ((T([12, 64, 768], f16, stride=(64, 1, 768)), T([12, 768, 64], f16)), {}) +cnt: 24, ((T([12, 768, 64], f16), T([12, 64, 64], f16, stride=(64, 768, 1))), {}) +cnt: 24, ((T([144, 64, 64], f16, stride=(4096, 1, 64)), T([144, 64, 192], f16)), {}) +cnt: 24, ((T([144, 64, 192], f16), T([144, 192, 64], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1, 12, 64], f32), T([1, 12, 64], f32), T([1, 12, 64], f32)], 2), {}) +cnt: 12, (([T([1, 12, 14, 3], i64)],), {}) +cnt: 48, (([T([1, 12, 64, 64], f16, stride=(768, 64, 768, 1)), T([1, 12, 64, 64], f16, stride=(768, 64, 768, 1)), T([1, 12, 64, 64], f16, stride=(768, 64, 768, 1)), T([1, 12, 64, 64], f16, stride=(768, 64, 768, 1)), T([1, 12, 192, 64], f16, stride=(2064384, 172032, 64, 1))], 2), {}) +cnt: 12, (([T([1, 1, 1, 192], f16), T([1, 1, 1, 64], f16), T([1, 1, 1, 192], f16)], 3), {}) +cnt: 24, (([T([1, 12, 64, 256], f32), T([1, 12, 64, 192], f32, stride=(2064384, 172032, 192, 1))], 3), {}) +cnt: 24, (([T([1, 12, 12, 64, 64], f16, stride=(768, 64, 49152, 768, 1)), T([1, 12, 12, 64, 64], f16, stride=(768, 64, 49152, 768, 1)), T([1, 12, 12, 64, 64], f16, stride=(768, 64, 49152, 768, 1))], 3), {}) +cnt: 12, (([T([1, 12, 12, 64, 64], f16), T([1, 12, 12, 64, 192], f16), T([1, 12, 12, 64, 192], f16), T([1, 12, 12, 64, 64], f16)], -1), {}) +cnt: 12, (([T([1, 1, 1, 64], f16), T([1, 1, 1, 192], f16), T([1, 1, 1, 192], f16)], 3), {}) +cnt: 12, (([T([1, 12, 1, 64, 64], f16), T([1, 12, 1, 64, 64], f16), T([1, 12, 12, 64, 64], f16), T([1, 12, 1, 64, 64], f16), T([1, 12, 1, 64, 64], f16)], 2), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 1024], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 1024], i64), T([1, 1024], i64)), {}) +cnt: 12, ((T([12, 12, 64, 64], f16), T([12, 12, 64, 64], f16, stride=(64, 49152, 768, 1))), {}) +cnt: 36, ((T([144, 64, 64], f16), T([144, 64, 64], f16)), {}) +cnt: 36, ((T([1, 12, 12, 64, 64], f16), T([1, 12, 12, 64, 64], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50358, 768], f16), T([1, 1024], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([1, 1024], i64)), {}) +cnt: 1, ((T([4096, 768], f16), T([1, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 4096, -1, False), {}) +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 2, -1, False), {}) +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 50358, 0, False), {}) +Operator: aten.floor_divide.default +cnt: 24, ((T([504], i64), 42), {}) +Operator: aten.index.Tensor +cnt: 12, ((T([16, 64], f32), [T([504], i64)]), {}) +Operator: aten.index_add.default +cnt: 24, ((T([192, 64, 64], f16), 0, T([504], i64), T([504, 64, 64], f16)), {}) +Operator: aten.index_select.default +cnt: 24, ((T([192, 64, 64], f16), 0, T([504], i64)), {}) +Operator: aten.minimum.default +cnt: 24, ((T([1, 1, 1, 448], f16), T([1, 12, 64, 448], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 50358], f16), T([50358, 768], f16)), {}) +cnt: 1, ((T([50358, 1024], f16, stride=(1, 50358)), T([1024, 768], f16)), {}) +cnt: 37, ((T([1024, 768], f16), T([768, 768], f16)), {}) +cnt: 37, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 768], f16)), {}) +cnt: 12, ((T([1024, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 3072], f16)), {}) +cnt: 12, ((T([1024, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 1024], f16, stride=(1, 3072)), T([1024, 768], f16)), {}) +cnt: 12, ((T([1024, 768], f16, stride=(1, 1024)), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 1024], f16), T([1024, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([1, 1024, 768], f16), 3.0), {}) +cnt: 12, ((T([1, 1024, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([1, 12, 64, 1], f32), T([1, 12, 1, 192], f32)), {}) +cnt: 12, ((T([1, 1, 14, 64, 1], f32), T([1, 12, 14, 1, 192], f32)), {}) +cnt: 24, ((T([504], i64), 16), {}) +cnt: 48, ((T([1, 12, 64, 1024], f16), 0.125), {}) +cnt: 24, ((T([1, 1, 1, 1024], f16), -10000.0), {}) +cnt: 48, ((T([1, 12, 64, 448], f16), 0.125), {}) +cnt: 24, ((T([1, 12, 64, 448], f32), -10000.0), {}) +cnt: 24, ((T([1, 12, 12, 64, 192], f16), 0.125), {}) +cnt: 24, ((T([1, 12, 12, 64, 64], f16), 0.125), {}) +cnt: 12, ((T([1, 1, 12, 64, 192], f16), -10000.0), {}) +cnt: 24, ((T([1, 1, 1, 1, 64], f16), -10000.0), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f32), -10000.0), {}) +cnt: 12, ((T([1, 12, 1024, 64], f16), T([1, 1, 1024, 1], f16)), {}) +cnt: 24, ((T([1, 1024, 3072], f16), 0.5), {}) +cnt: 24, ((T([1, 1024, 3072], f16), 0.044715), {}) +cnt: 24, ((T([1, 1024, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([1, 1024, 3072], f16), T([1, 1024, 3072], f16)), {}) +cnt: 2, ((T([1, 1024, 768], f16), 0.5), {}) +cnt: 2, ((T([1, 1024, 768], f16), 0.044715), {}) +cnt: 2, ((T([1, 1024, 768], f16), 0.7978845608028654), {}) +cnt: 4, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 12, ((T([1, 12, 1024, 64], f16, stride=(786432, 64, 768, 1)), T([1, 1, 1024, 1], f16)), {}) +cnt: 24, ((T([1, 12, 12, 64, 64], f16, stride=(4718592, 393216, 32768, 512, 1)), 0.125), {}) +cnt: 24, ((T([1, 12, 12, 64, 192], f16, stride=(4718592, 393216, 32768, 512, 1)), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([1, 1024, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16), [768], T([1, 1024, 1], f32), T([1, 1024, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 36, ((T([144, 64, 64], f16), [144, 64, 64], [4096, 64, 1]), {}) +Operator: aten.new_ones.default +cnt: 24, ((T([1, 1, 1, 1024], f16), [1, 1, 1, 192]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 24, ((T([1, 12, 14, 64, 192], f32), [1, 12, 64, 256]), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 12, ((T([12, 12, 64, 64], f16, stride=(64, 49152, 768, 1)), [589824]), {}) +cnt: 24, ((T([504, 64, 64], f16), [192, 64, 64]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50358], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50358], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([1, 1024, 3072], f16), 3.0), {}) +cnt: 1, ((T([1, 1024, 768], f16), 3.0), {}) +cnt: 1, ((T([1, 1024, 768], f16), 2.0), {}) +cnt: 12, ((T([1, 1024, 3072], f16), 2.0), {}) +Operator: aten.rsub.Scalar +cnt: 24, ((T([1, 1, 1, 1024], f16), 1.0), {}) +cnt: 24, ((T([1, 12, 64, 448], f32), 1.0), {}) +cnt: 12, ((T([1, 1, 12, 64, 192], f16), 1.0), {}) +cnt: 24, ((T([1, 1, 1, 1, 64], f16), 1.0), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f32, stride=(2064384, 172032, 12288, 192, 1)), 1.0), {}) +Operator: aten.select_backward.default +cnt: 24, ((T([1, 12, 64, 64], f16), [1, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([1, 12, 64, 64], f16), [1, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([1, 12, 192, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 14, 192, 64], 2, -1), {}) +cnt: 24, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, -3), {}) +cnt: 24, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([1, 12, 192, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 14, 192, 64], 2, -1), {}) +cnt: 24, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, -3), {}) +cnt: 24, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, 0), {}) +cnt: 24, ((T([1, 12, 64, 64], f16), [1, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(64, 4096, 1, 64)), [1, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(64, 4096, 1, 64)), [1, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([1, 12, 64, 64], f16), [1, 12, 16, 64, 64], 2, 1), {}) +cnt: 12, ((T([1, 12, 192, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 14, 192, 64], 2, 0), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, 2), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [1, 12, 16, 64, 64], 2, 1), {}) +cnt: 12, ((T([1, 12, 192, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 14, 192, 64], 2, 0), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, 2), {}) +cnt: 12, ((T([1, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [1, 12, 16, 64, 64], 2, 1), {}) +Operator: aten.slice_backward.default +cnt: 372, ((T([1, 12, 16, 64, 64], f16), [1, 12, 16, 64, 64], 1, 0, 9223372036854775807, 1), {}) +cnt: 372, ((T([1, 12, 16, 64, 64], f16), [1, 12, 16, 64, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 72, ((T([1, 12, 14, 192, 64], f16), [1, 12, 14, 192, 64], 1, 0, 9223372036854775807, 1), {}) +cnt: 72, ((T([1, 12, 14, 192, 64], f16), [1, 12, 14, 192, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16), [1, 12, 12, 64, 512], 4, -64, 9223372036854775807, 1), {}) +cnt: 48, ((T([1, 12, 12, 64, 512], f16), [1, 12, 12, 64, 512], 3, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([1, 12, 12, 64, 512], f16), [1, 12, 12, 64, 512], 2, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([1, 12, 12, 64, 512], f16), [1, 12, 12, 64, 512], 1, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([1, 12, 12, 64, 512], f16), [1, 12, 12, 64, 512], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16), [1, 12, 12, 64, 512], 4, 0, 64, 1), {}) +cnt: 12, ((T([1, 12, 12, 192, 64], f16), [1, 12, 14, 192, 64], 2, 1, -1, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f16), [1, 12, 12, 64, 512], 4, 256, -64, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 192], f16), [1, 12, 12, 64, 512], 4, 64, 256, 1), {}) +cnt: 12, ((T([1, 12, 12, 192, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [1, 12, 14, 192, 64], 2, 1, -1, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16), [1, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [1, 12, 16, 64, 64], 2, 3, -1, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [1, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [1, 12, 16, 64, 64], 2, 1, -3, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [1, 12, 16, 64, 64], 2, 3, -1, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [1, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([1, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [1, 12, 16, 64, 64], 2, 1, -3, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([504, 64], f32)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 50358], f16), [0], True), {}) +cnt: 49, ((T([1024, 768], f16), [0], True), {}) +cnt: 12, ((T([1024, 3072], f16), [0], True), {}) +cnt: 12, ((T([1024, 768], f16, stride=(1, 1024)), [0], True), {}) +Operator: aten.tanh.default +cnt: 12, ((T([1, 1024, 3072], f16),), {}) +cnt: 1, ((T([1, 768], f16),), {}) +cnt: 1, ((T([1, 1024, 768], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 12, ((T([1, 1024, 3072], f16), T([1, 1024, 3072], f16)), {}) +Operator: aten.unbind.int +cnt: 12, ((T([1, 16, 64], f32),), {}) +cnt: 12, ((T([1, 12, 14, 3], i64),), {}) +Operator: aten.unsqueeze_.default +cnt: 1, ((T([1, 12, 64, 192], f32), 1), {}) +cnt: 12, ((T([12, 14, 3], i64), 0), {}) +cnt: 48, ((T([1, 12, 64, 64], f16), 2), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForCausalLM_training.txt new file mode 100644 index 000000000..3bb0b46b0 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForCausalLM_training.txt @@ -0,0 +1,74 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([8192, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([8192, 50265], f16), T([8192, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 8, ((T([1024, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 8, ((T([1024, 128, 128], f16), T([1024, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([64, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 24, ((T([64, 128, 16, 32], f16), [64, 128, 512]), {}) +cnt: 1, ((T([8192, 50265], f16), [64, 128, 50265]), {}) +cnt: 8, ((T([64, 16, 128, 32], f16), [1024, 128, 32]), {}) +cnt: 8, ((T([64, 128, 512], f16), [8192, 512]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([64, 128, 512], f16), T([128, 512], f16)), {}) +cnt: 8, ((T([64, 16, 128, 128], f16), T([64, 1, 128, 128], f16)), {}) +cnt: 48, ((T([64, 128, 512], f16), T([64, 128, 512], f16)), {}) +cnt: 1, ((T([50265, 512], f16), T([50265, 512], f16)), {}) +Operator: aten.addmm.default +cnt: 32, ((T([512], f16), T([8192, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 8, ((T([2048], f16), T([8192, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 8, ((T([512], f16), T([8192, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +Operator: aten.bmm.default +cnt: 16, ((T([1024, 128, 32], f16), T([1024, 32, 128], f16, stride=(4096, 1, 32))), {}) +cnt: 16, ((T([1024, 128, 128], f16), T([1024, 128, 32], f16)), {}) +cnt: 8, ((T([1024, 128, 128], f16, stride=(16384, 1, 128)), T([1024, 128, 32], f16)), {}) +cnt: 8, ((T([1024, 32, 128], f16, stride=(4096, 1, 32)), T([1024, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([64, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([64, 128], i64), T([64, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 512], f16), T([64, 128], i64), 0), {}) +cnt: 1, ((T([512, 512], f16), T([128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([128, 512], f16), T([128], i64), 512, -1, False), {}) +cnt: 1, ((T([64, 128, 512], f16), T([64, 128], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 8, ((T([64, 128, 2048], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 8, ((T([64, 128, 2048], f16), T([64, 128, 2048], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 512], f16), T([512, 50265], f16, stride=(1, 512))), {}) +cnt: 1, ((T([50265, 8192], f16, stride=(1, 50265)), T([8192, 512], f16)), {}) +cnt: 1, ((T([8192, 50265], f16), T([50265, 512], f16)), {}) +cnt: 8, ((T([8192, 512], f16), T([512, 2048], f16)), {}) +cnt: 8, ((T([512, 8192], f16, stride=(1, 512)), T([8192, 2048], f16)), {}) +cnt: 8, ((T([8192, 2048], f16), T([2048, 512], f16)), {}) +cnt: 8, ((T([2048, 8192], f16, stride=(1, 2048)), T([8192, 512], f16)), {}) +cnt: 32, ((T([8192, 512], f16), T([512, 512], f16)), {}) +cnt: 32, ((T([512, 8192], f16, stride=(1, 512)), T([8192, 512], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([64, 128, 512], f16), 1.0), {}) +cnt: 16, ((T([64, 128, 512], f16), 0.1767766952966369), {}) +Operator: aten.native_layer_norm.default +cnt: 17, ((T([64, 128, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 17, ((T([64, 128, 512], f16), T([64, 128, 512], f16), [512], T([64, 128, 1], f32), T([64, 128, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([8192, 50265], f16), T([8192], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([8192, 50265], f16), T([8192], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 40, ((T([8192, 512], f16), [0], True), {}) +cnt: 8, ((T([8192, 2048], f16), [0], True), {}) +cnt: 1, ((T([64, 128, 512], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForConditionalGeneration_training.txt new file mode 100644 index 000000000..866fb9026 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/BlenderbotSmallForConditionalGeneration_training.txt @@ -0,0 +1,81 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([8192, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([8192, 50265], f16), T([8192, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1024, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1024, 128, 128], f16), T([1024, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([64, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([64, 128, 16, 32], f16), [64, 128, 512]), {}) +cnt: 1, ((T([8192, 50265], f16), [64, 128, 50265]), {}) +cnt: 24, ((T([64, 16, 128, 32], f16), [1024, 128, 32]), {}) +cnt: 24, ((T([64, 128, 512], f16), [8192, 512]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([64, 128, 512], f16), T([128, 512], f16)), {}) +cnt: 127, ((T([64, 128, 512], f16), T([64, 128, 512], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 8, ((T([64, 16, 128, 128], f16), T([64, 1, 128, 128], f16)), {}) +cnt: 1, ((T([64, 128, 50265], f16), T([1, 50265], f16)), {}) +cnt: 2, ((T([50265, 512], f16), T([50265, 512], f16)), {}) +Operator: aten.addmm.default +cnt: 96, ((T([512], f16), T([8192, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 16, ((T([2048], f16), T([8192, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 16, ((T([512], f16), T([8192, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +Operator: aten.any.default +cnt: 16, ((T([64, 128, 512], b8),), {}) +Operator: aten.bmm.default +cnt: 48, ((T([1024, 128, 32], f16), T([1024, 32, 128], f16, stride=(4096, 1, 32))), {}) +cnt: 48, ((T([1024, 128, 128], f16), T([1024, 128, 32], f16)), {}) +cnt: 24, ((T([1024, 128, 128], f16, stride=(16384, 1, 128)), T([1024, 128, 32], f16)), {}) +cnt: 24, ((T([1024, 32, 128], f16, stride=(4096, 1, 32)), T([1024, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 3, ((T([64, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 3, ((T([64, 128], i64), T([64, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50265, 512], f16), T([64, 128], i64), 0), {}) +cnt: 2, ((T([512, 512], f16), T([128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([128, 512], f16), T([128], i64), 512, -1, False), {}) +cnt: 2, ((T([64, 128, 512], f16), T([64, 128], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 16, ((T([64, 128, 2048], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 16, ((T([64, 128, 2048], f16), T([64, 128, 2048], f16)), {}) +Operator: aten.isinf.default +cnt: 8, ((T([64, 128, 512], f16),), {}) +Operator: aten.isnan.default +cnt: 8, ((T([64, 128, 512], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 512], f16), T([512, 50265], f16, stride=(1, 512))), {}) +cnt: 1, ((T([50265, 8192], f16, stride=(1, 50265)), T([8192, 512], f16)), {}) +cnt: 1, ((T([8192, 50265], f16), T([50265, 512], f16)), {}) +cnt: 16, ((T([8192, 512], f16), T([512, 2048], f16)), {}) +cnt: 16, ((T([512, 8192], f16, stride=(1, 512)), T([8192, 2048], f16)), {}) +cnt: 16, ((T([8192, 2048], f16), T([2048, 512], f16)), {}) +cnt: 16, ((T([2048, 8192], f16, stride=(1, 2048)), T([8192, 512], f16)), {}) +cnt: 96, ((T([8192, 512], f16), T([512, 512], f16)), {}) +cnt: 96, ((T([512, 8192], f16, stride=(1, 512)), T([8192, 512], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([64, 128, 512], f16), 1.0), {}) +cnt: 48, ((T([64, 128, 512], f16), 0.1767766952966369), {}) +Operator: aten.native_layer_norm.default +cnt: 42, ((T([64, 128, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 42, ((T([64, 128, 512], f16), T([64, 128, 512], f16), [512], T([64, 128, 1], f32), T([64, 128, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([8192, 50265], f16), T([8192], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([8192, 50265], f16), T([8192], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 112, ((T([8192, 512], f16), [0], True), {}) +cnt: 16, ((T([8192, 2048], f16), [0], True), {}) +cnt: 2, ((T([64, 128, 512], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/CamemBert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/CamemBert_training.txt new file mode 100644 index 000000000..2ce6229b7 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/CamemBert_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([512, 32005], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([512, 32005], f16), T([512, 32005], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([1, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([1, 12, 512, 512], f16), T([1, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 1, 1, 512], f32),), {'dtype': f16}) +cnt: 1, ((T([1, 512], b8),), {'dtype': i32}) +cnt: 1, ((T([1, 512], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([1, 512], i32),), {'dtype': i64}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([12, 512, 512], f16), [1, 12, 512, 512]), {}) +cnt: 12, ((T([12, 512, 64], f16), [1, 12, 512, 64]), {}) +cnt: 24, ((T([1, 512, 12, 64], f16), [1, 512, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([1, 512], i32), 0), {}) +cnt: 1, ((T([1, 512], i64), 1), {}) +cnt: 73, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 12, ((T([1, 12, 512, 512], f16), T([1, 1, 1, 512], f16)), {}) +cnt: 1, ((T([32005, 768], f16), T([32005, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([32005], f16), T([512, 768], f16), T([768, 32005], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([12, 512, 64], f16, stride=(64, 768, 1)), T([12, 64, 512], f16, stride=(64, 1, 768))), {}) +cnt: 24, ((T([12, 512, 512], f16), T([12, 512, 64], f16, stride=(64, 768, 1))), {}) +cnt: 12, ((T([12, 512, 512], f16, stride=(262144, 1, 512)), T([12, 512, 64], f16, stride=(64, 768, 1))), {}) +cnt: 12, ((T([12, 64, 512], f16, stride=(64, 1, 768)), T([12, 512, 512], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([1, 512], i32), 1), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([1, 12, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([32005, 768], f16), T([1, 512], i64), 1), {}) +cnt: 1, ((T([1, 768], f16), T([1, 512], i64)), {}) +cnt: 1, ((T([514, 768], f16), T([1, 512], i64), 1), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 514, 1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 1, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 32005, 1, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([1, 512, 3072], f16),), {}) +cnt: 1, ((T([1, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 12, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 32005], f16), T([32005, 768], f16)), {}) +cnt: 1, ((T([32005, 512], f16, stride=(1, 32005)), T([512, 768], f16)), {}) +cnt: 37, ((T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 37, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 12, ((T([512, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16, stride=(1, 512)), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 512], f16), T([512, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([1, 1, 1, 512], f16), -65504.0), {}) +cnt: 1, ((T([1, 512], i32), T([1, 512], i32)), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([1, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([1, 512, 768], f16), T([1, 512, 768], f16), [768], T([1, 512, 1], f32), T([1, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([1, 512], i64), 1), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([512, 32005], f16), T([512], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([512, 32005], f16), T([512], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([1, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 32005], f16), [0], True), {}) +cnt: 49, ((T([512, 768], f16), [0], True), {}) +cnt: 12, ((T([512, 3072], f16), [0], True), {}) +cnt: 12, ((T([512, 768], f16, stride=(1, 512)), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForMaskedLM_training.txt new file mode 100644 index 000000000..f3146c3fd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForMaskedLM_training.txt @@ -0,0 +1,132 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 50265], f16), T([2048, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 25, ((T([4, 512, 768], f16),), {'dtype': f32}) +cnt: 25, ((T([4, 512, 768], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 512, 1], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 512, 512], f32),), {'dtype': torch.uint8}) +cnt: 12, ((T([], f32),), {'dtype': f16, 'device': "torch.device('cpu')"}) +cnt: 12, ((T([4, 1, 512, 512], u8),), {'dtype': torch.bool}) +cnt: 25, ((T([4, 512, 768], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 25, ((T([4, 512, 768], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([2048, 2304], f16), [4, 512, 2304]), {}) +cnt: 36, ((T([4, 12, 512, 64], f16), [48, 512, 64]), {}) +cnt: 12, ((T([4, 12, 64, 512], f16), [48, 64, 512]), {}) +cnt: 12, ((T([48, 512, 512], f16), [4, 12, 512, 512]), {}) +cnt: 12, ((T([48, 512, 64], f16), [4, 12, 512, 64]), {}) +cnt: 12, ((T([4, 512, 12, 192], f16), [4, 512, 2304]), {}) +Operator: aten.add.Tensor +cnt: 25, ((T([4, 512, 1], f32), 1e-07), {}) +cnt: 25, ((T([4, 512, 768], f16), T([768], f16)), {}) +cnt: 24, ((T([4, 12, 512, 64], f16, stride=(1179648, 192, 2304, 1)), T([1, 12, 1, 64], f16)), {}) +cnt: 48, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 768], f32)), {}) +cnt: 25, ((T([4, 512, 1], f32), T([4, 512, 1], f32)), {}) +cnt: 1, ((T([50265, 768], f16), T([50265, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([4, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 13, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([50265], f16), T([2048, 768], f16), T([768, 50265], f16, stride=(1, 768))), {}) +Operator: aten.bitwise_not.default +cnt: 12, ((T([4, 1, 512, 512], b8),), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16, stride=(262144, 1, 512)), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([48, 64, 512], f16, stride=(32768, 1, 64)), T([48, 512, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.cat.default +cnt: 12, (([T([4, 12, 512, 64], f16), T([4, 12, 512, 64], f16, stride=(393216, 32768, 1, 512)), T([4, 12, 512, 64], f16)], 3), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 512], i64), T([4, 512], i64)), {}) +Operator: aten.div.Scalar +cnt: 50, ((T([4, 512, 768], f32, stride=(512, 1, 0)), 768), {}) +Operator: aten.div.Tensor +cnt: 100, ((T([4, 512, 768], f32), T([4, 512, 1], f32)), {}) +cnt: 12, ((T([4, 12, 512, 64], f16, stride=(393216, 64, 768, 1)), T([], f16)), {}) +cnt: 25, ((T([4, 512, 1], f32), T([4, 512, 1], f32)), {}) +cnt: 12, ((T([4, 12, 512, 64], f16), T([], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 768], f16), T([4, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([4, 512, 768], f16), T([4, 512], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 512, 3072], f16),), {}) +cnt: 1, ((T([4, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 12, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.masked_fill.Tensor +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 1, 512, 512], b8), T([], f32)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 1, 512, 512], b8), 0), {}) +Operator: aten.mean.dim +cnt: 50, ((T([4, 512, 768], f32), [-1], True), {}) +Operator: aten.mm.default +cnt: 12, ((T([2048, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2048, 50265], f16), T([50265, 768], f16)), {}) +cnt: 1, ((T([50265, 2048], f16, stride=(1, 50265)), T([2048, 768], f16)), {}) +cnt: 13, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 13, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2304, 2048], f16, stride=(1, 2304)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 2304], f16), T([2304, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 25, ((T([4, 512, 1], f32), 2), {}) +cnt: 25, ((T([4, 512, 768], f32), 2.0), {}) +Operator: aten.mul.Tensor +cnt: 25, ((T([768], f16), T([4, 512, 768], f16)), {}) +cnt: 2, ((T([4, 512, 768], f16), T([4, 512, 1], f16)), {}) +cnt: 1, ((T([4, 1, 1, 512], f32), T([4, 1, 512, 1], f32)), {}) +cnt: 12, ((T([], f32), 1), {}) +cnt: 25, ((T([4, 512, 768], f16), T([768], f16)), {}) +cnt: 25, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 768], f32)), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([4, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-07), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([4, 512, 768], f16), T([4, 512, 768], f16), [768], T([4, 512, 1], f32), T([4, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.neg.default +cnt: 75, ((T([4, 512, 768], f32),), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 50265], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 50265], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 25, ((T([4, 512, 768], f32), 2), {}) +cnt: 25, ((T([4, 512, 768], f32), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 24, ((T([1, 1, 768], f16), [1, 1, 768], 2, 0, 9223372036854775807, 1), {}) +Operator: aten.split.Tensor +cnt: 12, ((T([4, 12, 512, 192], f16, stride=(1179648, 192, 2304, 1)), 64, -1), {}) +Operator: aten.sqrt.default +cnt: 25, ((T([4, 512, 1], f32),), {}) +cnt: 12, ((T([], f32),), {}) +Operator: aten.sub.Tensor +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 1], f32)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 50265], f16), [0], True), {}) +cnt: 25, ((T([2048, 768], f16), [0], True), {}) +cnt: 50, ((T([4, 512, 768], f16), [0, 1], True), {}) +cnt: 75, ((T([4, 512, 768], f32), [2], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 24, ((T([4, 12, 512, 64], f16), [0, 2], True), {}) +cnt: 1, ((T([4, 512, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForQuestionAnswering_training.txt new file mode 100644 index 000000000..cd06e0d09 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaForQuestionAnswering_training.txt @@ -0,0 +1,133 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([4, 512], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([4, 512], f16), T([4, 512], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 25, ((T([4, 512, 768], f16),), {'dtype': f32}) +cnt: 25, ((T([4, 512, 768], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 512, 1], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 512, 512], f32),), {'dtype': torch.uint8}) +cnt: 12, ((T([], f32),), {'dtype': f16, 'device': "torch.device('cpu')"}) +cnt: 12, ((T([4, 1, 512, 512], u8),), {'dtype': torch.bool}) +cnt: 25, ((T([4, 512, 768], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 25, ((T([4, 512, 768], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([2048, 2304], f16), [4, 512, 2304]), {}) +cnt: 36, ((T([4, 12, 512, 64], f16), [48, 512, 64]), {}) +cnt: 12, ((T([4, 12, 64, 512], f16), [48, 64, 512]), {}) +cnt: 12, ((T([48, 512, 512], f16), [4, 12, 512, 512]), {}) +cnt: 12, ((T([48, 512, 64], f16), [4, 12, 512, 64]), {}) +cnt: 12, ((T([4, 512, 12, 192], f16), [4, 512, 2304]), {}) +Operator: aten.add.Tensor +cnt: 25, ((T([4, 512, 1], f32), 1e-07), {}) +cnt: 25, ((T([4, 512, 768], f16), T([768], f16)), {}) +cnt: 24, ((T([4, 12, 512, 64], f16, stride=(1179648, 192, 2304, 1)), T([1, 12, 1, 64], f16)), {}) +cnt: 48, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 768], f32)), {}) +cnt: 25, ((T([4, 512, 1], f32), T([4, 512, 1], f32)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([4, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([2], f16), T([2048, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bitwise_not.default +cnt: 12, ((T([4, 1, 512, 512], b8),), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16, stride=(262144, 1, 512)), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([48, 64, 512], f16, stride=(32768, 1, 64)), T([48, 512, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.cat.default +cnt: 1, (([T([4, 512, 1], f16), T([4, 512, 1], f16)], 2), {}) +cnt: 12, (([T([4, 12, 512, 64], f16), T([4, 12, 512, 64], f16, stride=(393216, 32768, 1, 512)), T([4, 12, 512, 64], f16)], 3), {}) +Operator: aten.clamp.default +cnt: 2, ((T([4], i64), 0, 512), {}) +Operator: aten.clone.default +cnt: 1, ((T([4, 512], i64),), {}) +cnt: 2, ((T([4], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([4, 512], i64), T([4, 512], i64)), {}) +cnt: 2, ((T([4], i64), T([4], i64)), {}) +Operator: aten.div.Scalar +cnt: 50, ((T([4, 512, 768], f32, stride=(512, 1, 0)), 768), {}) +Operator: aten.div.Tensor +cnt: 100, ((T([4, 512, 768], f32), T([4, 512, 1], f32)), {}) +cnt: 12, ((T([4, 12, 512, 64], f16, stride=(393216, 64, 768, 1)), T([], f16)), {}) +cnt: 2, ((T([], f16), 2), {}) +cnt: 25, ((T([4, 512, 1], f32), T([4, 512, 1], f32)), {}) +cnt: 12, ((T([4, 12, 512, 64], f16), T([], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 768], f16), T([4, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([4, 512, 768], f16), T([4, 512], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 512, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.masked_fill.Tensor +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 1, 512, 512], b8), T([], f32)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 1, 512, 512], b8), 0), {}) +Operator: aten.mean.dim +cnt: 50, ((T([4, 512, 768], f32), [-1], True), {}) +Operator: aten.mm.default +cnt: 12, ((T([2048, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2048, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 2048], f16, stride=(1, 2)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2304, 2048], f16, stride=(1, 2304)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 2304], f16), T([2304, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 25, ((T([4, 512, 1], f32), 2), {}) +cnt: 25, ((T([4, 512, 768], f32), 2.0), {}) +Operator: aten.mul.Tensor +cnt: 25, ((T([768], f16), T([4, 512, 768], f16)), {}) +cnt: 2, ((T([4, 512, 768], f16), T([4, 512, 1], f16)), {}) +cnt: 1, ((T([4, 1, 1, 512], f32), T([4, 1, 512, 1], f32)), {}) +cnt: 12, ((T([], f32), 1), {}) +cnt: 25, ((T([4, 512, 768], f16), T([768], f16)), {}) +cnt: 25, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 768], f32)), {}) +Operator: aten.neg.default +cnt: 75, ((T([4, 512, 768], f32),), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([4, 512], f16), T([4], i64), None, 1, 512, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([4, 512], f16), T([4], i64), None, 1, 512), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 25, ((T([4, 512, 768], f32), 2), {}) +cnt: 25, ((T([4, 512, 768], f32), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 24, ((T([1, 1, 768], f16), [1, 1, 768], 2, 0, 9223372036854775807, 1), {}) +Operator: aten.split.Tensor +cnt: 12, ((T([4, 12, 512, 192], f16, stride=(1179648, 192, 2304, 1)), 64, -1), {}) +cnt: 1, ((T([4, 512, 2], f16), 1, -1), {}) +Operator: aten.sqrt.default +cnt: 25, ((T([4, 512, 1], f32),), {}) +cnt: 12, ((T([], f32),), {}) +Operator: aten.sub.Tensor +cnt: 50, ((T([4, 512, 768], f32), T([4, 512, 1], f32)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 2], f16), [0], True), {}) +cnt: 50, ((T([4, 512, 768], f16), [0, 1], True), {}) +cnt: 75, ((T([4, 512, 768], f32), [2], True), {}) +cnt: 24, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 24, ((T([4, 12, 512, 64], f16), [0, 2], True), {}) +cnt: 1, ((T([4, 512, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForMaskedLM_training.txt new file mode 100644 index 000000000..157e119ee --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForMaskedLM_training.txt @@ -0,0 +1,85 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([512, 128100], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([512, 128100], f16), T([512, 128100], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1, 24, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 24, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 512, 1], f32),), {'dtype': f16}) +cnt: 1, ((T([1, 1, 512, 512], f32),), {'dtype': torch.uint8}) +cnt: 24, ((T([], f32),), {'dtype': f16, 'device': "torch.device('cpu')"}) +cnt: 24, ((T([1, 1, 512, 512], u8),), {'dtype': torch.bool}) +Operator: aten._unsafe_view.default +cnt: 48, ((T([1, 512, 24, 64], f16), [1, 512, 1536]), {}) +Operator: aten.add.Tensor +cnt: 144, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16)), {}) +cnt: 1, ((T([128100, 1536], f16), T([128100, 1536], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16)), {}) +Operator: aten.addmm.default +cnt: 97, ((T([1536], f16), T([512, 1536], f16), T([1536, 1536], f16, stride=(1, 1536))), {}) +cnt: 24, ((T([6144], f16), T([512, 1536], f16), T([1536, 6144], f16, stride=(1, 1536))), {}) +cnt: 24, ((T([1536], f16), T([512, 6144], f16), T([6144, 1536], f16, stride=(1, 6144))), {}) +cnt: 1, ((T([128100], f16), T([512, 1536], f16), T([1536, 128100], f16, stride=(1, 1536))), {}) +Operator: aten.bitwise_not.default +cnt: 24, ((T([1, 1, 512, 512], b8),), {}) +Operator: aten.bmm.default +cnt: 24, ((T([24, 512, 64], f16), T([24, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 48, ((T([24, 512, 512], f16), T([24, 512, 64], f16)), {}) +cnt: 24, ((T([24, 512, 512], f16, stride=(262144, 1, 512)), T([24, 512, 64], f16, stride=(64, 1536, 1))), {}) +cnt: 24, ((T([24, 512, 64], f16, stride=(64, 1536, 1)), T([24, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 24, ((T([24, 64, 512], f16, stride=(32768, 1, 64)), T([24, 512, 512], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([24, 512, 512], f16), T([], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([128100, 1536], f16), T([1, 512], i64), 0), {}) +cnt: 1, ((T([512, 1536], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512], i64), 128100, 0, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([1, 512, 6144], f16),), {}) +cnt: 1, ((T([1, 512, 1536], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16)), {}) +cnt: 24, ((T([1, 512, 6144], f16), T([1, 512, 6144], f16)), {}) +Operator: aten.masked_fill.Tensor +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 1, 512, 512], b8), T([], f32)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 1, 512, 512], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 128100], f16), T([128100, 1536], f16)), {}) +cnt: 1, ((T([128100, 512], f16, stride=(1, 128100)), T([512, 1536], f16)), {}) +cnt: 73, ((T([512, 1536], f16), T([1536, 1536], f16)), {}) +cnt: 73, ((T([1536, 512], f16, stride=(1, 1536)), T([512, 1536], f16)), {}) +cnt: 24, ((T([512, 1536], f16), T([1536, 6144], f16)), {}) +cnt: 24, ((T([1536, 512], f16, stride=(1, 1536)), T([512, 6144], f16)), {}) +cnt: 24, ((T([512, 6144], f16), T([6144, 1536], f16)), {}) +cnt: 24, ((T([6144, 512], f16, stride=(1, 6144)), T([512, 1536], f16)), {}) +cnt: 24, ((T([512, 1536], f16, stride=(1, 512)), T([1536, 1536], f16)), {}) +cnt: 24, ((T([1536, 512], f16), T([512, 1536], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([1, 512, 1536], f16), T([1, 512, 1], f16)), {}) +cnt: 1, ((T([1, 1, 1, 512], f32), T([1, 1, 512, 1], f32)), {}) +cnt: 24, ((T([], f32), 1), {}) +Operator: aten.native_layer_norm.default +cnt: 50, ((T([1, 512, 1536], f16), [1536], T([1536], f16), T([1536], f16), 1e-07), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 50, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16), [1536], T([1, 512, 1], f32), T([1, 512, 1], f32), T([1536], f16), T([1536], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([512, 128100], f16), T([512], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([512, 128100], f16), T([512], i64), None, 1, -100), {}) +Operator: aten.sqrt.default +cnt: 24, ((T([], f32),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 128100], f16), [0], True), {}) +cnt: 97, ((T([512, 1536], f16), [0], True), {}) +cnt: 24, ((T([512, 6144], f16), [0], True), {}) +cnt: 24, ((T([512, 1536], f16, stride=(1, 512)), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForQuestionAnswering_training.txt new file mode 100644 index 000000000..94ffa5856 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DebertaV2ForQuestionAnswering_training.txt @@ -0,0 +1,92 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([1, 512], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([1, 512], f16), T([1, 512], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1, 24, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 24, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 512, 1], f32),), {'dtype': f16}) +cnt: 1, ((T([1, 1, 512, 512], f32),), {'dtype': torch.uint8}) +cnt: 24, ((T([], f32),), {'dtype': f16, 'device': "torch.device('cpu')"}) +cnt: 24, ((T([1, 1, 512, 512], u8),), {'dtype': torch.bool}) +Operator: aten._unsafe_view.default +cnt: 48, ((T([1, 512, 24, 64], f16), [1, 512, 1536]), {}) +Operator: aten.add.Tensor +cnt: 144, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16)), {}) +Operator: aten.addmm.default +cnt: 96, ((T([1536], f16), T([512, 1536], f16), T([1536, 1536], f16, stride=(1, 1536))), {}) +cnt: 24, ((T([6144], f16), T([512, 1536], f16), T([1536, 6144], f16, stride=(1, 1536))), {}) +cnt: 24, ((T([1536], f16), T([512, 6144], f16), T([6144, 1536], f16, stride=(1, 6144))), {}) +cnt: 1, ((T([2], f16), T([512, 1536], f16), T([1536, 2], f16, stride=(1, 1536))), {}) +Operator: aten.bitwise_not.default +cnt: 24, ((T([1, 1, 512, 512], b8),), {}) +Operator: aten.bmm.default +cnt: 24, ((T([24, 512, 64], f16), T([24, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 48, ((T([24, 512, 512], f16), T([24, 512, 64], f16)), {}) +cnt: 24, ((T([24, 512, 512], f16, stride=(262144, 1, 512)), T([24, 512, 64], f16, stride=(64, 1536, 1))), {}) +cnt: 24, ((T([24, 512, 64], f16, stride=(64, 1536, 1)), T([24, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 24, ((T([24, 64, 512], f16, stride=(32768, 1, 64)), T([24, 512, 512], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1, 512, 1], f16), T([1, 512, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([1], i64), 0, 512), {}) +Operator: aten.clone.default +cnt: 1, ((T([1, 512], i64),), {}) +cnt: 2, ((T([1], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1, 512], i64), T([1, 512], i64)), {}) +cnt: 2, ((T([1], i64), T([1], i64)), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([24, 512, 512], f16), T([], f16)), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([128100, 1536], f16), T([1, 512], i64), 0), {}) +cnt: 1, ((T([512, 1536], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([1, 512, 1536], f16), T([1, 512], i64), 128100, 0, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([1, 512, 6144], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([1, 512, 6144], f16), T([1, 512, 6144], f16)), {}) +Operator: aten.masked_fill.Tensor +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 1, 512, 512], b8), T([], f32)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 24, ((T([1, 24, 512, 512], f16), T([1, 1, 512, 512], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 2], f16), T([2, 1536], f16)), {}) +cnt: 1, ((T([2, 512], f16, stride=(1, 2)), T([512, 1536], f16)), {}) +cnt: 24, ((T([512, 1536], f16), T([1536, 6144], f16)), {}) +cnt: 24, ((T([1536, 512], f16, stride=(1, 1536)), T([512, 6144], f16)), {}) +cnt: 24, ((T([512, 6144], f16), T([6144, 1536], f16)), {}) +cnt: 24, ((T([6144, 512], f16, stride=(1, 6144)), T([512, 1536], f16)), {}) +cnt: 72, ((T([512, 1536], f16), T([1536, 1536], f16)), {}) +cnt: 72, ((T([1536, 512], f16, stride=(1, 1536)), T([512, 1536], f16)), {}) +cnt: 24, ((T([512, 1536], f16, stride=(1, 512)), T([1536, 1536], f16)), {}) +cnt: 24, ((T([1536, 512], f16), T([512, 1536], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([1, 512, 1536], f16), T([1, 512, 1], f16)), {}) +cnt: 1, ((T([1, 1, 1, 512], f32), T([1, 1, 512, 1], f32)), {}) +cnt: 24, ((T([], f32), 1), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([1, 512, 1536], f16), [1536], T([1536], f16), T([1536], f16), 1e-07), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 49, ((T([1, 512, 1536], f16), T([1, 512, 1536], f16), [1536], T([1, 512, 1], f32), T([1, 512, 1], f32), T([1536], f16), T([1536], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([1, 512], f16), T([1], i64), None, 1, 512, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([1, 512], f16), T([1], i64), None, 1, 512), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([1, 512, 2], f16), 1, -1), {}) +Operator: aten.sqrt.default +cnt: 24, ((T([], f32),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 2], f16), [0], True), {}) +cnt: 96, ((T([512, 1536], f16), [0], True), {}) +cnt: 24, ((T([512, 6144], f16), [0], True), {}) +cnt: 24, ((T([512, 1536], f16, stride=(1, 512)), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForMaskedLM_training.txt new file mode 100644 index 000000000..37d0d4707 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForMaskedLM_training.txt @@ -0,0 +1,78 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 30522], f16), T([2048, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 6, ((T([16, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([16, 12, 128, 128], f16), T([16, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 18, ((T([16, 12, 128, 64], f16), [192, 128, 64]), {}) +cnt: 6, ((T([16, 12, 64, 128], f16), [192, 64, 128]), {}) +cnt: 6, ((T([192, 128, 128], f16), [16, 12, 128, 128]), {}) +cnt: 6, ((T([192, 128, 64], f16), [16, 12, 128, 64]), {}) +cnt: 12, ((T([16, 128, 12, 64], f16), [16, 128, 768]), {}) +cnt: 6, ((T([16, 128, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([16, 128, 768], f16), T([1, 128, 768], f16)), {}) +cnt: 36, ((T([16, 128, 768], f16), T([16, 128, 768], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 25, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 6, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 6, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([2048, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 6, ((T([192, 128, 64], f16), T([192, 64, 128], f16)), {}) +cnt: 6, ((T([192, 128, 128], f16), T([192, 128, 64], f16)), {}) +cnt: 6, ((T([192, 128, 128], f16, stride=(16384, 1, 128)), T([192, 128, 64], f16)), {}) +cnt: 6, ((T([192, 128, 64], f16), T([192, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 6, ((T([192, 64, 128], f16, stride=(8192, 1, 64)), T([192, 128, 128], f16)), {}) +cnt: 6, ((T([192, 128, 128], f16), T([192, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 128], i64), T([16, 128], i64)), {}) +Operator: aten.div.Tensor +cnt: 6, ((T([16, 12, 128, 64], f16, stride=(98304, 64, 768, 1)), 8.0), {}) +cnt: 6, ((T([16, 12, 128, 64], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([16, 128], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 768], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([16, 128, 768], f16), T([16, 128], i64), 30522, 0, False), {}) +Operator: aten.eq.Scalar +cnt: 6, ((T([16, 128], f32), 0), {}) +Operator: aten.gelu.default +cnt: 6, ((T([16, 128, 3072], f16),), {}) +cnt: 1, ((T([16, 128, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([16, 128, 768], f16), T([16, 128, 768], f16)), {}) +cnt: 6, ((T([16, 128, 3072], f16), T([16, 128, 3072], f16)), {}) +Operator: aten.masked_fill.Scalar +cnt: 6, ((T([16, 12, 128, 128], f16), T([16, 12, 128, 128], b8, stride=(128, 0, 0, 1)), 0), {}) +Operator: aten.masked_fill.Tensor +cnt: 6, ((T([16, 12, 128, 128], f16), T([16, 12, 128, 128], b8, stride=(128, 0, 0, 1)), T([], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 30522], f16), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 2048], f16, stride=(1, 30522)), T([2048, 768], f16)), {}) +cnt: 25, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 25, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 6, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 6, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 6, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 6, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 14, ((T([16, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 14, ((T([16, 128, 768], f16), T([16, 128, 768], f16), [768], T([16, 128, 1], f32), T([16, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 30522], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 30522], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 30522], f16), [0], True), {}) +cnt: 31, ((T([2048, 768], f16), [0], True), {}) +cnt: 6, ((T([2048, 3072], f16), [0], True), {}) +cnt: 1, ((T([16, 128, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForQuestionAnswering_training.txt new file mode 100644 index 000000000..350ed8018 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistilBertForQuestionAnswering_training.txt @@ -0,0 +1,85 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([32, 128], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([32, 128], f16), T([32, 128], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 6, ((T([32, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([32, 12, 128, 128], f16), T([32, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 18, ((T([32, 12, 128, 64], f16), [384, 128, 64]), {}) +cnt: 6, ((T([32, 12, 64, 128], f16), [384, 64, 128]), {}) +cnt: 6, ((T([384, 128, 128], f16), [32, 12, 128, 128]), {}) +cnt: 6, ((T([384, 128, 64], f16), [32, 12, 128, 64]), {}) +cnt: 12, ((T([32, 128, 12, 64], f16), [32, 128, 768]), {}) +cnt: 6, ((T([32, 128, 768], f16), [4096, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([32, 128, 768], f16), T([1, 128, 768], f16)), {}) +cnt: 36, ((T([32, 128, 768], f16), T([32, 128, 768], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.addmm.default +cnt: 24, ((T([768], f16), T([4096, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 6, ((T([3072], f16), T([4096, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 6, ((T([768], f16), T([4096, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([2], f16), T([4096, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 6, ((T([384, 128, 64], f16), T([384, 64, 128], f16)), {}) +cnt: 6, ((T([384, 128, 128], f16), T([384, 128, 64], f16)), {}) +cnt: 6, ((T([384, 128, 128], f16, stride=(16384, 1, 128)), T([384, 128, 64], f16)), {}) +cnt: 6, ((T([384, 128, 64], f16), T([384, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 6, ((T([384, 64, 128], f16, stride=(8192, 1, 64)), T([384, 128, 128], f16)), {}) +cnt: 6, ((T([384, 128, 128], f16), T([384, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([32, 128, 1], f16), T([32, 128, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([32], i64), 0, 128), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 128], i64),), {}) +cnt: 2, ((T([32], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 128], i64), T([32, 128], i64)), {}) +cnt: 2, ((T([32], i64), T([32], i64)), {}) +Operator: aten.div.Tensor +cnt: 6, ((T([32, 12, 128, 64], f16, stride=(98304, 64, 768, 1)), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +cnt: 6, ((T([32, 12, 128, 64], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([32, 128], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 768], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([32, 128, 768], f16), T([32, 128], i64), 30522, 0, False), {}) +Operator: aten.eq.Scalar +cnt: 6, ((T([32, 128], f32), 0), {}) +Operator: aten.gelu.default +cnt: 6, ((T([32, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 6, ((T([32, 128, 3072], f16), T([32, 128, 3072], f16)), {}) +Operator: aten.masked_fill.Scalar +cnt: 6, ((T([32, 12, 128, 128], f16), T([32, 12, 128, 128], b8, stride=(128, 0, 0, 1)), 0), {}) +Operator: aten.masked_fill.Tensor +cnt: 6, ((T([32, 12, 128, 128], f16), T([32, 12, 128, 128], b8, stride=(128, 0, 0, 1)), T([], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 4096], f16, stride=(1, 2)), T([4096, 768], f16)), {}) +cnt: 6, ((T([4096, 768], f16), T([768, 3072], f16)), {}) +cnt: 6, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 3072], f16)), {}) +cnt: 6, ((T([4096, 3072], f16), T([3072, 768], f16)), {}) +cnt: 6, ((T([3072, 4096], f16, stride=(1, 3072)), T([4096, 768], f16)), {}) +cnt: 24, ((T([4096, 768], f16), T([768, 768], f16)), {}) +cnt: 24, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 768], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([32, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 13, ((T([32, 128, 768], f16), T([32, 128, 768], f16), [768], T([32, 128, 1], f32), T([32, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([32, 128], f16), T([32], i64), None, 1, 128, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([32, 128], f16), T([32], i64), None, 1, 128), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([32, 128, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([4096, 2], f16), [0], True), {}) +cnt: 30, ((T([4096, 768], f16), [0], True), {}) +cnt: 6, ((T([4096, 3072], f16), [0], True), {}) +cnt: 1, ((T([32, 128, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistillGPT2_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistillGPT2_training.txt new file mode 100644 index 000000000..5654c4bbd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/DistillGPT2_training.txt @@ -0,0 +1,91 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([511, 50257], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([511, 50257], f16), T([511, 50257], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 6, ((T([1, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([1, 12, 512, 512], f16), T([1, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 6, ((T([1, 1, 512, 512], u8, stride=(1048576, 1048576, 1024, 1)),), {'dtype': torch.bool}) +cnt: 6, ((T([], f16),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 6, ((T([12, 512, 512], f16), [1, 12, 512, 512]), {}) +cnt: 6, ((T([12, 512, 64], f16), [1, 12, 512, 64]), {}) +cnt: 1, ((T([512, 50257], f16), [1, 512, 50257]), {}) +cnt: 12, ((T([1, 512, 12, 64], f16), [1, 512, 768]), {}) +Operator: aten.add.Tensor +cnt: 25, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 18, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +cnt: 6, ((T([1, 512, 3072], f16), 1.0), {}) +cnt: 1, ((T([50257, 768], f16), T([50257, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 6, ((T([2304], f16), T([512, 768], f16), T([768, 2304], f16)), {}) +cnt: 6, ((T([768], f16), T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 6, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 6, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16)), {}) +Operator: aten.bmm.default +cnt: 6, ((T([12, 512, 64], f16, stride=(64, 2304, 1)), T([12, 64, 512], f16, stride=(64, 1, 2304))), {}) +cnt: 12, ((T([12, 512, 512], f16), T([12, 512, 64], f16, stride=(64, 2304, 1))), {}) +cnt: 6, ((T([12, 512, 512], f16, stride=(262144, 1, 512)), T([12, 512, 64], f16, stride=(64, 768, 1))), {}) +cnt: 6, ((T([12, 512, 64], f16, stride=(64, 768, 1)), T([12, 64, 512], f16, stride=(64, 1, 2304))), {}) +cnt: 6, ((T([12, 64, 512], f16, stride=(64, 1, 2304)), T([12, 512, 512], f16)), {}) +Operator: aten.cat.default +cnt: 6, (([T([1, 512, 768], f16), T([1, 512, 768], f16, stride=(512, 1, 512)), T([1, 512, 768], f16)], 2), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 12, ((T([1, 12, 512, 512], f16), T([], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50257, 768], f16), T([1, 512], i64)), {}) +cnt: 1, ((T([1024, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 1024, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 50257, -1, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 768], f16), T([768, 50257], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50257, 512], f16, stride=(1, 50257)), T([512, 768], f16)), {}) +cnt: 1, ((T([512, 50257], f16), T([50257, 768], f16)), {}) +cnt: 6, ((T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 6, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +cnt: 6, ((T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 6, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 6, ((T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 6, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +cnt: 6, ((T([512, 2304], f16), T([2304, 768], f16, stride=(1, 2304))), {}) +cnt: 6, ((T([768, 512], f16, stride=(1, 768)), T([512, 2304], f16)), {}) +Operator: aten.mul.Scalar +cnt: 6, ((T([1, 512, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 12, ((T([1, 512, 3072], f16), 0.5), {}) +cnt: 12, ((T([1, 512, 3072], f16), 0.044715), {}) +cnt: 12, ((T([1, 512, 3072], f16), 0.7978845608028654), {}) +cnt: 24, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([1, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 13, ((T([1, 512, 768], f16), T([1, 512, 768], f16), [768], T([1, 512, 1], f32), T([1, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([511, 50257], f16), T([511], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([511, 50257], f16), T([511], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 6, ((T([1, 512, 3072], f16), 3.0), {}) +cnt: 6, ((T([1, 512, 3072], f16), 2.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([1, 511, 50257], f16), [1, 511, 50257], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([1, 511, 50257], f16), [1, 512, 50257], 1, 0, -1, 1), {}) +Operator: aten.split.Tensor +cnt: 6, ((T([1, 512, 2304], f16), 768, 2), {}) +Operator: aten.sum.SymInt +cnt: 12, ((T([512, 768], f16), [0], True), {}) +cnt: 6, ((T([512, 3072], f16), [0], True), {}) +cnt: 6, ((T([512, 2304], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 6, ((T([1, 512, 3072], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 6, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +Operator: aten.where.self +cnt: 12, ((T([1, 1, 512, 512], b8), T([1, 12, 512, 512], f16), T([], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForCausalLM_training.txt new file mode 100644 index 000000000..adbb45be6 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForCausalLM_training.txt @@ -0,0 +1,92 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([511, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([511, 30522], f16), T([511, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([1, 4, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([1, 4, 512, 512], f16), T([1, 4, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([4, 512, 512], f16), [1, 4, 512, 512]), {}) +cnt: 12, ((T([4, 512, 64], f16), [1, 4, 512, 64]), {}) +cnt: 24, ((T([1, 512, 4, 64], f16), [1, 512, 256]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([1, 512, 128], f16), T([1, 512, 128], f16)), {}) +cnt: 12, ((T([1, 4, 512, 512], f16), T([1, 1, 1, 512], f16)), {}) +cnt: 72, ((T([1, 512, 256], f16), T([1, 512, 256], f16)), {}) +cnt: 1, ((T([30522, 128], f16), T([30522, 128], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 512, 128], f16), T([1, 512, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([256], f16), T([512, 128], f16), T([128, 256], f16, stride=(1, 128))), {}) +cnt: 48, ((T([256], f16), T([512, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 12, ((T([1024], f16), T([512, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 12, ((T([256], f16), T([512, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([128], f16), T([512, 256], f16), T([256, 128], f16, stride=(1, 256))), {}) +cnt: 1, ((T([30522], f16), T([512, 128], f16), T([128, 30522], f16, stride=(1, 128))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([4, 512, 64], f16, stride=(64, 256, 1)), T([4, 64, 512], f16, stride=(64, 1, 256))), {}) +cnt: 24, ((T([4, 512, 512], f16), T([4, 512, 64], f16, stride=(64, 256, 1))), {}) +cnt: 12, ((T([4, 512, 512], f16, stride=(262144, 1, 512)), T([4, 512, 64], f16, stride=(64, 256, 1))), {}) +cnt: 12, ((T([4, 64, 512], f16, stride=(64, 1, 256)), T([4, 512, 512], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([1, 4, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 128], f16), T([1, 512], i64), 0), {}) +cnt: 1, ((T([2, 128], f16), T([1, 512], i64)), {}) +cnt: 1, ((T([512, 128], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 2, -1, False), {}) +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([1, 512, 1024], f16),), {}) +cnt: 1, ((T([1, 512, 128], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512, 128], f16)), {}) +cnt: 12, ((T([1, 512, 1024], f16), T([1, 512, 1024], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 30522], f16), T([30522, 128], f16)), {}) +cnt: 1, ((T([30522, 512], f16, stride=(1, 30522)), T([512, 128], f16)), {}) +cnt: 1, ((T([512, 128], f16), T([128, 256], f16)), {}) +cnt: 1, ((T([128, 512], f16, stride=(1, 128)), T([512, 256], f16)), {}) +cnt: 12, ((T([512, 256], f16), T([256, 1024], f16)), {}) +cnt: 12, ((T([256, 512], f16, stride=(1, 256)), T([512, 1024], f16)), {}) +cnt: 12, ((T([512, 1024], f16), T([1024, 256], f16)), {}) +cnt: 12, ((T([1024, 512], f16, stride=(1, 1024)), T([512, 256], f16)), {}) +cnt: 36, ((T([512, 256], f16), T([256, 256], f16)), {}) +cnt: 36, ((T([256, 512], f16, stride=(1, 256)), T([512, 256], f16)), {}) +cnt: 12, ((T([512, 256], f16, stride=(1, 512)), T([256, 256], f16)), {}) +cnt: 12, ((T([256, 512], f16), T([512, 256], f16)), {}) +cnt: 1, ((T([512, 256], f16), T([256, 128], f16)), {}) +cnt: 1, ((T([256, 512], f16, stride=(1, 256)), T([512, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([1, 1, 1, 512], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 2, ((T([1, 512, 128], f16), [128], T([128], f16), T([128], f16), 1e-12), {}) +cnt: 24, ((T([1, 512, 256], f16), [256], T([256], f16), T([256], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 2, ((T([1, 512, 128], f16), T([1, 512, 128], f16), [128], T([1, 512, 1], f32), T([1, 512, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 24, ((T([1, 512, 256], f16), T([1, 512, 256], f16), [256], T([1, 512, 1], f32), T([1, 512, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([511, 30522], f16), T([511], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([511, 30522], f16), T([511], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([1, 1, 1, 512], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([1, 511, 30522], f16), [1, 511, 30522], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([1, 511, 30522], f16), [1, 512, 30522], 1, 0, -1, 1), {}) +cnt: 1, ((T([1, 512, 30522], f16), [1, 512, 30522], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 30522], f16), [0], True), {}) +cnt: 1, ((T([512, 128], f16), [0], True), {}) +cnt: 49, ((T([512, 256], f16), [0], True), {}) +cnt: 12, ((T([512, 1024], f16), [0], True), {}) +cnt: 12, ((T([512, 256], f16, stride=(1, 512)), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForQuestionAnswering_training.txt new file mode 100644 index 000000000..c2e4a8beb --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/ElectraForQuestionAnswering_training.txt @@ -0,0 +1,94 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([64, 512], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([64, 512], f16), T([64, 512], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 4, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 4, 512, 512], f16), T([64, 4, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([64, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 4, 512, 64], f16), [256, 512, 64]), {}) +cnt: 12, ((T([64, 4, 64, 512], f16), [256, 64, 512]), {}) +cnt: 12, ((T([256, 512, 512], f16), [64, 4, 512, 512]), {}) +cnt: 12, ((T([256, 512, 64], f16), [64, 4, 512, 64]), {}) +cnt: 24, ((T([64, 512, 4, 64], f16), [64, 512, 256]), {}) +cnt: 12, ((T([64, 512, 256], f16), [32768, 256]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 512, 128], f16), T([64, 512, 128], f16)), {}) +cnt: 12, ((T([64, 4, 512, 512], f16), T([64, 1, 1, 512], f16)), {}) +cnt: 72, ((T([64, 512, 256], f16), T([64, 512, 256], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([64, 512, 128], f16), T([1, 512, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([256], f16), T([32768, 128], f16), T([128, 256], f16, stride=(1, 128))), {}) +cnt: 48, ((T([256], f16), T([32768, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 12, ((T([1024], f16), T([32768, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 12, ((T([256], f16), T([32768, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([2], f16), T([32768, 256], f16), T([256, 2], f16, stride=(1, 256))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([256, 512, 64], f16), T([256, 64, 512], f16)), {}) +cnt: 12, ((T([256, 512, 512], f16), T([256, 512, 64], f16)), {}) +cnt: 12, ((T([256, 512, 512], f16, stride=(262144, 1, 512)), T([256, 512, 64], f16)), {}) +cnt: 12, ((T([256, 512, 64], f16), T([256, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([256, 64, 512], f16, stride=(32768, 1, 64)), T([256, 512, 512], f16)), {}) +cnt: 12, ((T([256, 512, 512], f16), T([256, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 512, 1], f16), T([64, 512, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([64], i64), 0, 512), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 512], i64),), {}) +cnt: 2, ((T([64], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 512], i64), T([64, 512], i64)), {}) +cnt: 2, ((T([64], i64), T([64], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([64, 4, 512, 512], f16), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 128], f16), T([64, 512], i64), 0), {}) +cnt: 1, ((T([2, 128], f16), T([64, 512], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 128], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([64, 512, 128], f16), T([64, 512], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([64, 512, 128], f16), T([64, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 512, 1024], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 512, 1024], f16), T([64, 512, 1024], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([32768, 2], f16), T([2, 256], f16)), {}) +cnt: 1, ((T([2, 32768], f16, stride=(1, 2)), T([32768, 256], f16)), {}) +cnt: 12, ((T([32768, 256], f16), T([256, 1024], f16)), {}) +cnt: 12, ((T([256, 32768], f16, stride=(1, 256)), T([32768, 1024], f16)), {}) +cnt: 12, ((T([32768, 1024], f16), T([1024, 256], f16)), {}) +cnt: 12, ((T([1024, 32768], f16, stride=(1, 1024)), T([32768, 256], f16)), {}) +cnt: 48, ((T([32768, 256], f16), T([256, 256], f16)), {}) +cnt: 48, ((T([256, 32768], f16, stride=(1, 256)), T([32768, 256], f16)), {}) +cnt: 1, ((T([32768, 256], f16), T([256, 128], f16)), {}) +cnt: 1, ((T([256, 32768], f16, stride=(1, 256)), T([32768, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([64, 1, 1, 512], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([64, 512, 128], f16), [128], T([128], f16), T([128], f16), 1e-12), {}) +cnt: 24, ((T([64, 512, 256], f16), [256], T([256], f16), T([256], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 24, ((T([64, 512, 256], f16), T([64, 512, 256], f16), [256], T([64, 512, 1], f32), T([64, 512, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 512, 128], f16), T([64, 512, 128], f16), [128], T([64, 512, 1], f32), T([64, 512, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([64, 512], f16), T([64], i64), None, 1, 512, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([64, 512], f16), T([64], i64), None, 1, 512), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([64, 1, 1, 512], f16), 1.0), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([64, 512, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32768, 2], f16), [0], True), {}) +cnt: 61, ((T([32768, 256], f16), [0], True), {}) +cnt: 12, ((T([32768, 1024], f16), [0], True), {}) +cnt: 1, ((T([64, 512, 128], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPT2ForSequenceClassification_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPT2ForSequenceClassification_training.txt new file mode 100644 index 000000000..4be61bd96 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPT2ForSequenceClassification_training.txt @@ -0,0 +1,106 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([4, 2], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([4, 2], f16), T([4, 2], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 1024, 1024], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 1024, 1024], f16), T([4, 12, 1024, 1024], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 12, ((T([1, 1, 1024, 1024], u8),), {'dtype': torch.bool}) +cnt: 12, ((T([], f16),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 12, 1024, 64], f16), [48, 1024, 64]), {}) +cnt: 12, ((T([4, 12, 64, 1024], f16), [48, 64, 1024]), {}) +cnt: 12, ((T([48, 1024, 1024], f16), [4, 12, 1024, 1024]), {}) +cnt: 12, ((T([48, 1024, 64], f16), [4, 12, 1024, 64]), {}) +cnt: 1, ((T([4096, 2], f16), [4, 1024, 2]), {}) +cnt: 24, ((T([4, 1024, 12, 64], f16), [4, 1024, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([4, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 48, ((T([4, 1024, 768], f16), T([4, 1024, 768], f16)), {}) +cnt: 36, ((T([4, 1024, 3072], f16), T([4, 1024, 3072], f16)), {}) +cnt: 12, ((T([4, 1024, 3072], f16), 1.0), {}) +Operator: aten.addmm.default +cnt: 12, ((T([2304], f16), T([4096, 768], f16), T([768, 2304], f16)), {}) +cnt: 12, ((T([768], f16), T([4096, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([3072], f16), T([4096, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768], f16), T([4096, 3072], f16), T([3072, 768], f16)), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 1024, 64], f16), T([48, 64, 1024], f16)), {}) +cnt: 12, ((T([48, 1024, 1024], f16), T([48, 1024, 64], f16)), {}) +cnt: 12, ((T([48, 1024, 1024], f16, stride=(1048576, 1, 1024)), T([48, 1024, 64], f16)), {}) +cnt: 12, ((T([48, 1024, 64], f16), T([48, 64, 1024], f16, stride=(65536, 1, 64))), {}) +cnt: 12, ((T([48, 64, 1024], f16, stride=(65536, 1, 64)), T([48, 1024, 1024], f16)), {}) +cnt: 12, ((T([48, 1024, 1024], f16), T([48, 1024, 64], f16, stride=(65536, 1, 1024))), {}) +Operator: aten.cat.default +cnt: 12, (([T([4, 1024, 768], f16), T([4, 1024, 768], f16, stride=(786432, 1, 1024)), T([4, 1024, 768], f16)], 2), {}) +Operator: aten.clone.default +cnt: 1, ((T([4, 1024], i64),), {}) +cnt: 1, ((T([4], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([4, 1024], i64), T([4, 1024], i64)), {}) +cnt: 1, ((T([4], i64), T([4], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([4, 12, 1024, 1024], f16), T([], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50257, 768], f16), T([4, 1024], i64)), {}) +cnt: 1, ((T([1024, 768], f16), T([1, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 1024, -1, False), {}) +cnt: 1, ((T([4, 1024, 768], f16), T([4, 1024], i64), 50257, -1, False), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([4, 1024, 2], f16), [T([4], i64), T([4], i64)]), {}) +Operator: aten.index_put.default +cnt: 1, ((T([4, 1024, 2], f16), [T([4], i64), T([4], i64)], T([4, 2], f16), True), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2, 4096], f16, stride=(1, 2)), T([4096, 768], f16)), {}) +cnt: 1, ((T([4096, 2], f16), T([2, 768], f16)), {}) +cnt: 12, ((T([4096, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072, 4096], f16, stride=(1, 3072)), T([4096, 768], f16)), {}) +cnt: 12, ((T([4096, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 12, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 3072], f16)), {}) +cnt: 12, ((T([4096, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 768], f16)), {}) +cnt: 12, ((T([4096, 2304], f16), T([2304, 768], f16, stride=(1, 2304))), {}) +cnt: 12, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 2304], f16)), {}) +Operator: aten.mul.Scalar +cnt: 12, ((T([4, 1024, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([4, 1024, 3072], f16), 0.5), {}) +cnt: 24, ((T([4, 1024, 3072], f16), 0.044715), {}) +cnt: 24, ((T([4, 1024, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([4, 1024, 3072], f16), T([4, 1024, 3072], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([4, 1024, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([4, 1024, 768], f16), T([4, 1024, 768], f16), [768], T([4, 1024, 1], f32), T([4, 1024, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([4, 1024], i64), 0), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([4, 2], f16), [4, 1024, 2]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([4, 2], f16), T([4], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([4, 2], f16), T([4], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([4, 1024, 3072], f16), 3.0), {}) +cnt: 12, ((T([4, 1024, 3072], f16), 2.0), {}) +Operator: aten.split.Tensor +cnt: 12, ((T([4, 1024, 2304], f16), 768, 2), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([4], i64), 1), {}) +Operator: aten.sum.SymInt +cnt: 24, ((T([4096, 768], f16), [0], True), {}) +cnt: 12, ((T([4096, 3072], f16), [0], True), {}) +cnt: 12, ((T([4096, 2304], f16), [0], True), {}) +cnt: 1, ((T([4, 1024, 768], f16), [0], True), {}) +Operator: aten.sum.dim_IntList +cnt: 1, ((T([4, 1024], b8), [-1]), {}) +Operator: aten.tanh.default +cnt: 12, ((T([4, 1024, 3072], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 12, ((T([4, 1024, 3072], f16), T([4, 1024, 3072], f16)), {}) +Operator: aten.where.self +cnt: 24, ((T([1, 1, 1024, 1024], b8), T([4, 12, 1024, 1024], f16), T([], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForCausalLM_training.txt new file mode 100644 index 000000000..013350f4b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForCausalLM_training.txt @@ -0,0 +1,96 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([127, 50257], f32), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([127, 50257], f32), T([127, 50257], f32), 1, f32), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1, 16, 128, 128], f32), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1, 16, 128, 128], f32), T([1, 16, 128, 128], f32), -1, f32), {}) +Operator: aten._to_copy.default +cnt: 48, ((T([1, 16, 128, 128], f16, stride=(262144, 128, 2048, 1)),), {'dtype': f32}) +cnt: 24, ((T([1, 1, 128, 128], u8, stride=(4194304, 4194304, 2048, 1)),), {'dtype': torch.bool}) +cnt: 24, ((T([], f32),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([1, 128, 50257], f16),), {'dtype': f32}) +cnt: 1, ((T([1, 128, 50257], f32),), {'dtype': f16}) +cnt: 1, ((T([], f32),), {'dtype': f16}) +cnt: 1, ((T([], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([1, 128, 50257], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32, stride=(262144, 16384, 1, 128)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([128, 2048], f16), [1, 128, 2048]), {}) +cnt: 24, ((T([16, 128, 128], f32), [1, 16, 128, 128]), {}) +cnt: 24, ((T([16, 128, 128], f16), [1, 16, 128, 128]), {}) +cnt: 1, ((T([128, 50257], f16), [1, 128, 50257]), {}) +cnt: 48, ((T([1, 128, 16, 128], f16), [1, 128, 2048]), {}) +Operator: aten.add.Tensor +cnt: 145, ((T([1, 128, 2048], f16), T([1, 128, 2048], f16)), {}) +cnt: 72, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +cnt: 24, ((T([1, 128, 8192], f16), 1.0), {}) +cnt: 1, ((T([50257, 2048], f16), T([50257, 2048], f16)), {}) +Operator: aten.addmm.default +cnt: 24, ((T([2048], f16), T([128, 2048], f16), T([2048, 2048], f16, stride=(1, 2048))), {}) +cnt: 24, ((T([8192], f16), T([128, 2048], f16), T([2048, 8192], f16, stride=(1, 2048))), {}) +cnt: 24, ((T([2048], f16), T([128, 8192], f16), T([8192, 2048], f16, stride=(1, 8192))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([16, 128, 128], f32, stride=(128, 2048, 1)), T([16, 128, 128], f32, stride=(128, 1, 2048))), {}) +cnt: 24, ((T([16, 128, 128], f16), T([16, 128, 128], f16, stride=(128, 2048, 1))), {}) +cnt: 24, ((T([16, 128, 128], f16, stride=(16384, 1, 128)), T([16, 128, 128], f16, stride=(128, 2048, 1))), {}) +cnt: 24, ((T([16, 128, 128], f16, stride=(128, 2048, 1)), T([16, 128, 128], f16, stride=(128, 1, 2048))), {}) +cnt: 24, ((T([16, 128, 128], f32, stride=(128, 1, 2048)), T([16, 128, 128], f32)), {}) +cnt: 24, ((T([16, 128, 128], f32), T([16, 128, 128], f32, stride=(128, 2048, 1))), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 128], i64), T([1, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50257, 2048], f16), T([1, 128], i64)), {}) +cnt: 1, ((T([2048, 2048], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 2048], f16), T([1, 128], i64), 2048, -1, False), {}) +cnt: 1, ((T([1, 128, 2048], f16), T([1, 128], i64), 50257, -1, False), {}) +Operator: aten.mm.default +cnt: 72, ((T([128, 2048], f16), T([2048, 2048], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([128, 2048], f16), T([2048, 50257], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([50257, 128], f16, stride=(1, 50257)), T([128, 2048], f16)), {}) +cnt: 1, ((T([128, 50257], f16), T([50257, 2048], f16)), {}) +cnt: 24, ((T([128, 2048], f16), T([2048, 8192], f16)), {}) +cnt: 24, ((T([2048, 128], f16, stride=(1, 2048)), T([128, 8192], f16)), {}) +cnt: 24, ((T([128, 8192], f16), T([8192, 2048], f16)), {}) +cnt: 24, ((T([8192, 128], f16, stride=(1, 8192)), T([128, 2048], f16)), {}) +cnt: 72, ((T([128, 2048], f16), T([2048, 2048], f16)), {}) +cnt: 72, ((T([2048, 128], f16, stride=(1, 2048)), T([128, 2048], f16)), {}) +cnt: 24, ((T([2048, 128], f16), T([128, 2048], f16)), {}) +cnt: 24, ((T([128, 2048], f16, stride=(1, 128)), T([2048, 2048], f16)), {}) +Operator: aten.mul.Scalar +cnt: 24, ((T([1, 128, 8192], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 48, ((T([1, 128, 8192], f16), 0.5), {}) +cnt: 48, ((T([1, 128, 8192], f16), 0.044715), {}) +cnt: 48, ((T([1, 128, 8192], f16), 0.7978845608028654), {}) +cnt: 96, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([1, 128, 2048], f16), [2048], T([2048], f16), T([2048], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 49, ((T([1, 128, 2048], f16), T([1, 128, 2048], f16), [2048], T([1, 128, 1], f32), T([1, 128, 1], f32), T([2048], f16), T([2048], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f32), T([127, 50257], f32), T([127], i64), None, 1, -100, T([], f32)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([127, 50257], f32), T([127], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 24, ((T([1, 128, 8192], f16), 3.0), {}) +cnt: 24, ((T([1, 128, 8192], f16), 2.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([1, 127, 50257], f32), [1, 127, 50257], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([1, 127, 50257], f32), [1, 128, 50257], 1, 0, -1, 1), {}) +Operator: aten.sum.SymInt +cnt: 48, ((T([128, 2048], f16), [0], True), {}) +cnt: 24, ((T([128, 8192], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 24, ((T([1, 128, 8192], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 24, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +Operator: aten.where.self +cnt: 48, ((T([1, 1, 128, 128], b8), T([1, 16, 128, 128], f32), T([], f32)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForSequenceClassification_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForSequenceClassification_training.txt new file mode 100644 index 000000000..a537c2d6c --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GPTNeoForSequenceClassification_training.txt @@ -0,0 +1,101 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1, 2], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1, 2], f16), T([1, 2], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([1, 16, 128, 128], f32), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([1, 16, 128, 128], f32), T([1, 16, 128, 128], f32), -1, f32), {}) +Operator: aten._to_copy.default +cnt: 48, ((T([1, 16, 128, 128], f16, stride=(262144, 128, 2048, 1)),), {'dtype': f32}) +cnt: 24, ((T([1, 1, 128, 128], u8, stride=(4194304, 4194304, 2048, 1)),), {'dtype': torch.bool}) +cnt: 24, ((T([], f32),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32),), {'dtype': f16}) +cnt: 24, ((T([1, 16, 128, 128], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32, stride=(262144, 16384, 1, 128)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1, 16, 128, 128], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([128, 2048], f16), [1, 128, 2048]), {}) +cnt: 24, ((T([16, 128, 128], f32), [1, 16, 128, 128]), {}) +cnt: 24, ((T([16, 128, 128], f16), [1, 16, 128, 128]), {}) +cnt: 1, ((T([128, 2], f16), [1, 128, 2]), {}) +cnt: 48, ((T([1, 128, 16, 128], f16), [1, 128, 2048]), {}) +Operator: aten.add.Tensor +cnt: 145, ((T([1, 128, 2048], f16), T([1, 128, 2048], f16)), {}) +cnt: 72, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +cnt: 24, ((T([1, 128, 8192], f16), 1.0), {}) +Operator: aten.addmm.default +cnt: 24, ((T([2048], f16), T([128, 2048], f16), T([2048, 2048], f16, stride=(1, 2048))), {}) +cnt: 24, ((T([8192], f16), T([128, 2048], f16), T([2048, 8192], f16, stride=(1, 2048))), {}) +cnt: 24, ((T([2048], f16), T([128, 8192], f16), T([8192, 2048], f16, stride=(1, 8192))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([16, 128, 128], f32, stride=(128, 2048, 1)), T([16, 128, 128], f32, stride=(128, 1, 2048))), {}) +cnt: 24, ((T([16, 128, 128], f16), T([16, 128, 128], f16, stride=(128, 2048, 1))), {}) +cnt: 24, ((T([16, 128, 128], f16, stride=(16384, 1, 128)), T([16, 128, 128], f16, stride=(128, 2048, 1))), {}) +cnt: 24, ((T([16, 128, 128], f16, stride=(128, 2048, 1)), T([16, 128, 128], f16, stride=(128, 1, 2048))), {}) +cnt: 24, ((T([16, 128, 128], f32, stride=(128, 1, 2048)), T([16, 128, 128], f32)), {}) +cnt: 24, ((T([16, 128, 128], f32), T([16, 128, 128], f32, stride=(128, 2048, 1))), {}) +Operator: aten.clone.default +cnt: 1, ((T([1, 128], i64),), {}) +cnt: 1, ((T([1], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1, 128], i64), T([1, 128], i64)), {}) +cnt: 1, ((T([1], i64), T([1], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50257, 2048], f16), T([1, 128], i64)), {}) +cnt: 1, ((T([2048, 2048], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 2048], f16), T([1, 128], i64), 2048, -1, False), {}) +cnt: 1, ((T([1, 128, 2048], f16), T([1, 128], i64), 50257, -1, False), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([1, 128, 2], f16), [T([1], i64), T([1], i64)]), {}) +Operator: aten.index_put.default +cnt: 1, ((T([1, 128, 2], f16), [T([1], i64), T([1], i64)], T([1, 2], f16), True), {}) +Operator: aten.mm.default +cnt: 72, ((T([128, 2048], f16), T([2048, 2048], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([128, 2048], f16), T([2048, 2], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([2, 128], f16, stride=(1, 2)), T([128, 2048], f16)), {}) +cnt: 1, ((T([128, 2], f16), T([2, 2048], f16)), {}) +cnt: 24, ((T([128, 2048], f16), T([2048, 8192], f16)), {}) +cnt: 24, ((T([2048, 128], f16, stride=(1, 2048)), T([128, 8192], f16)), {}) +cnt: 24, ((T([128, 8192], f16), T([8192, 2048], f16)), {}) +cnt: 24, ((T([8192, 128], f16, stride=(1, 8192)), T([128, 2048], f16)), {}) +cnt: 72, ((T([128, 2048], f16), T([2048, 2048], f16)), {}) +cnt: 72, ((T([2048, 128], f16, stride=(1, 2048)), T([128, 2048], f16)), {}) +cnt: 24, ((T([2048, 128], f16), T([128, 2048], f16)), {}) +cnt: 24, ((T([128, 2048], f16, stride=(1, 128)), T([2048, 2048], f16)), {}) +Operator: aten.mul.Scalar +cnt: 24, ((T([1, 128, 8192], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 48, ((T([1, 128, 8192], f16), 0.5), {}) +cnt: 48, ((T([1, 128, 8192], f16), 0.044715), {}) +cnt: 48, ((T([1, 128, 8192], f16), 0.7978845608028654), {}) +cnt: 96, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([1, 128, 2048], f16), [2048], T([2048], f16), T([2048], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 49, ((T([1, 128, 2048], f16), T([1, 128, 2048], f16), [2048], T([1, 128, 1], f32), T([1, 128, 1], f32), T([2048], f16), T([2048], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([1, 128], i64), 0), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([1, 2], f16), [1, 128, 2]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1, 2], f16), T([1], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1, 2], f16), T([1], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 24, ((T([1, 128, 8192], f16), 3.0), {}) +cnt: 24, ((T([1, 128, 8192], f16), 2.0), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([1], i64), 1), {}) +Operator: aten.sum.SymInt +cnt: 48, ((T([128, 2048], f16), [0], True), {}) +cnt: 24, ((T([128, 8192], f16), [0], True), {}) +Operator: aten.sum.dim_IntList +cnt: 1, ((T([1, 128], b8), [-1]), {}) +Operator: aten.tanh.default +cnt: 24, ((T([1, 128, 8192], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 24, ((T([1, 128, 8192], f16), T([1, 128, 8192], f16)), {}) +Operator: aten.where.self +cnt: 48, ((T([1, 1, 128, 128], b8), T([1, 16, 128, 128], f32), T([], f32)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GoogleFnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GoogleFnet_training.txt new file mode 100644 index 000000000..c234ce838 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/GoogleFnet_training.txt @@ -0,0 +1,83 @@ +Operator: aten._fft_c2c.default +cnt: 12, ((T([1, 512, 768], c32), [1, 2], 0, True), {}) +cnt: 12, ((T([1, 512, 768], c32), [1, 2], 0, False), {}) +Operator: aten._log_softmax.default +cnt: 1, ((T([512, 32000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([512, 32000], f16), T([512, 32000], f16), 1, f16), {}) +Operator: aten._to_copy.default +cnt: 12, ((T([1, 512, 768], f16),), {'dtype': c32}) +Operator: aten.add.Tensor +cnt: 28, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 24, ((T([1, 512, 768], f16), T([1, 512, 768], f16, stride=(786432, 1536, 2))), {}) +cnt: 36, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +cnt: 12, ((T([1, 512, 3072], f16), 1.0), {}) +cnt: 1, ((T([1, 512, 768], f16), 1.0), {}) +cnt: 1, ((T([32000, 768], f16), T([32000, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 2, ((T([768], f16), T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([1, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([32000], f16), T([512, 768], f16), T([768, 32000], f16, stride=(1, 768))), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([32000, 768], f16), T([1, 512], i64), 3), {}) +cnt: 1, ((T([4, 768], f16), T([1, 512], i64)), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 4, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 32000, 3, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 32000], f16), T([32000, 768], f16)), {}) +cnt: 1, ((T([32000, 512], f16, stride=(1, 32000)), T([512, 768], f16)), {}) +cnt: 2, ((T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 2, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 12, ((T([512, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([1, 512, 768], f16), 3.0), {}) +cnt: 12, ((T([1, 512, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([1, 512, 3072], f16), 0.5), {}) +cnt: 24, ((T([1, 512, 3072], f16), 0.044715), {}) +cnt: 24, ((T([1, 512, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +cnt: 2, ((T([1, 512, 768], f16), 0.5), {}) +cnt: 2, ((T([1, 512, 768], f16), 0.044715), {}) +cnt: 2, ((T([1, 512, 768], f16), 0.7978845608028654), {}) +cnt: 4, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([1, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([1, 512, 768], f16), T([1, 512, 768], f16), [768], T([1, 512, 1], f32), T([1, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([512, 32000], f16), T([512], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([512, 32000], f16), T([512], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([1, 512, 3072], f16), 3.0), {}) +cnt: 1, ((T([1, 512, 768], f16), 3.0), {}) +cnt: 1, ((T([1, 512, 768], f16), 2.0), {}) +cnt: 12, ((T([1, 512, 3072], f16), 2.0), {}) +Operator: aten.select_backward.default +cnt: 12, ((T([1, 512, 768], f16), [1, 512, 768, 2], 3, 0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 32000], f16), [0], True), {}) +cnt: 14, ((T([512, 768], f16), [0], True), {}) +cnt: 12, ((T([512, 3072], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 12, ((T([1, 512, 3072], f16),), {}) +cnt: 1, ((T([1, 768], f16),), {}) +cnt: 1, ((T([1, 512, 768], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 12, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForMaskedLM_training.txt new file mode 100644 index 000000000..e10fea336 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForMaskedLM_training.txt @@ -0,0 +1,90 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([8192, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([8192, 30522], f16), T([8192, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([16, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([16, 12, 512, 512], f16), T([16, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([16, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([16, 12, 512, 64], f16), [192, 512, 64]), {}) +cnt: 12, ((T([16, 12, 64, 512], f16), [192, 64, 512]), {}) +cnt: 12, ((T([192, 512, 512], f16), [16, 12, 512, 512]), {}) +cnt: 12, ((T([192, 512, 64], f16), [16, 12, 512, 64]), {}) +cnt: 24, ((T([16, 512, 12, 64], f16), [16, 512, 768]), {}) +cnt: 12, ((T([16, 512, 768], f16), [8192, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([16, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 79, ((T([16, 512, 768], f16), T([16, 512, 768], f16)), {}) +cnt: 12, ((T([16, 12, 512, 512], f16), T([16, 1, 1, 512], f16)), {}) +cnt: 2, ((T([1024, 768], f16), T([1024, 768], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([8192, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([8192, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([8192, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([16, 768], f16, stride=(393216, 1)), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([30522], f16), T([8192, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([192, 512, 64], f16), T([192, 64, 512], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16), T([192, 512, 64], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16, stride=(262144, 1, 512)), T([192, 512, 64], f16)), {}) +cnt: 12, ((T([192, 512, 64], f16), T([192, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([192, 64, 512], f16, stride=(32768, 1, 64)), T([192, 512, 512], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16), T([192, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 512], i64), T([16, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([16, 12, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([16, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +cnt: 4, ((T([1024, 768], f16), T([16, 512], i64, stride=(2048, 4))), {}) +cnt: 2, ((T([1024, 768], f16), T([16, 512], i64)), {}) +cnt: 1, ((T([2, 768], f16), T([16, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 512, 768], f16), T([16, 512], i64), 2, -1, False), {}) +cnt: 2, ((T([16, 512, 768], f16), T([16, 512], i64), 1024, -1, False), {}) +cnt: 4, ((T([16, 512, 768], f16), T([16, 512], i64, stride=(2048, 4)), 1024, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([16, 512, 768], f16), T([16, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([16, 512, 3072], f16),), {}) +cnt: 1, ((T([16, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([16, 512, 768], f16), T([16, 512, 768], f16)), {}) +cnt: 12, ((T([16, 512, 3072], f16), T([16, 512, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 30522], f16), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 8192], f16, stride=(1, 30522)), T([8192, 768], f16)), {}) +cnt: 49, ((T([8192, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 768], f16)), {}) +cnt: 12, ((T([8192, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 3072], f16)), {}) +cnt: 12, ((T([8192, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 8192], f16, stride=(1, 3072)), T([8192, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([16, 1, 1, 512], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([16, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([16, 512, 768], f16), T([16, 512, 768], f16), [768], T([16, 512, 1], f32), T([16, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([8192, 30522], f16), T([8192], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([8192, 30522], f16), T([8192], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([16, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sub.Tensor +cnt: 2, ((T([16, 512], i64, stride=(2048, 4)), T([16, 512], i64, stride=(2048, 4))), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8192, 30522], f16), [0], True), {}) +cnt: 61, ((T([8192, 768], f16), [0], True), {}) +cnt: 12, ((T([8192, 3072], f16), [0], True), {}) +cnt: 1, ((T([16, 512, 768], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 1, ((T([16, 768], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForSequenceClassification_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForSequenceClassification_training.txt new file mode 100644 index 000000000..3d06f1496 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/LayoutLMForSequenceClassification_training.txt @@ -0,0 +1,98 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([16, 2], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([16, 2], f16), T([16, 2], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([16, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([16, 12, 512, 512], f16), T([16, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([16, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([16, 12, 512, 64], f16), [192, 512, 64]), {}) +cnt: 12, ((T([16, 12, 64, 512], f16), [192, 64, 512]), {}) +cnt: 12, ((T([192, 512, 512], f16), [16, 12, 512, 512]), {}) +cnt: 12, ((T([192, 512, 64], f16), [16, 12, 512, 64]), {}) +cnt: 24, ((T([16, 512, 12, 64], f16), [16, 512, 768]), {}) +cnt: 12, ((T([16, 512, 768], f16), [8192, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([16, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 79, ((T([16, 512, 768], f16), T([16, 512, 768], f16)), {}) +cnt: 12, ((T([16, 12, 512, 512], f16), T([16, 1, 1, 512], f16)), {}) +cnt: 2, ((T([1024, 768], f16), T([1024, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([8192, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([8192, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([8192, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([16, 768], f16, stride=(393216, 1)), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2], f16), T([16, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([192, 512, 64], f16), T([192, 64, 512], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16), T([192, 512, 64], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16, stride=(262144, 1, 512)), T([192, 512, 64], f16)), {}) +cnt: 12, ((T([192, 512, 64], f16), T([192, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([192, 64, 512], f16, stride=(32768, 1, 64)), T([192, 512, 512], f16)), {}) +cnt: 12, ((T([192, 512, 512], f16), T([192, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([16, 512], i64),), {}) +cnt: 1, ((T([16], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([16, 512], i64), T([16, 512], i64)), {}) +cnt: 1, ((T([16], i64), T([16], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([16, 12, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([16, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +cnt: 4, ((T([1024, 768], f16), T([16, 512], i64, stride=(2048, 4))), {}) +cnt: 2, ((T([1024, 768], f16), T([16, 512], i64)), {}) +cnt: 1, ((T([2, 768], f16), T([16, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 512, 768], f16), T([16, 512], i64), 2, -1, False), {}) +cnt: 2, ((T([16, 512, 768], f16), T([16, 512], i64), 1024, -1, False), {}) +cnt: 4, ((T([16, 512, 768], f16), T([16, 512], i64, stride=(2048, 4)), 1024, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([16, 512, 768], f16), T([16, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([16, 512, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([16, 512, 3072], f16), T([16, 512, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([16, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 16], f16, stride=(1, 2)), T([16, 768], f16)), {}) +cnt: 1, ((T([16, 768], f16), T([768, 768], f16)), {}) +cnt: 1, ((T([768, 16], f16, stride=(1, 768)), T([16, 768], f16, stride=(393216, 1))), {}) +cnt: 12, ((T([8192, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 3072], f16)), {}) +cnt: 12, ((T([8192, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 8192], f16, stride=(1, 3072)), T([8192, 768], f16)), {}) +cnt: 48, ((T([8192, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([16, 1, 1, 512], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([16, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([16, 512, 768], f16), T([16, 512, 768], f16), [768], T([16, 512, 1], f32), T([16, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([16, 2], f16), T([16], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([16, 2], f16), T([16], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([16, 1, 1, 512], f16), 1.0), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([16, 768], f16), [16, 512, 768], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([16, 512, 768], f16), [16, 512, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sub.Tensor +cnt: 2, ((T([16, 512], i64, stride=(2048, 4)), T([16, 512], i64, stride=(2048, 4))), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([16, 2], f16), [0], True), {}) +cnt: 1, ((T([16, 768], f16), [0], True), {}) +cnt: 60, ((T([8192, 768], f16), [0], True), {}) +cnt: 12, ((T([8192, 3072], f16), [0], True), {}) +cnt: 1, ((T([16, 512, 768], f16), [0], True), {}) +Operator: aten.tanh.default +cnt: 1, ((T([16, 768], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([16, 768], f16), T([16, 768], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/M2M100ForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/M2M100ForConditionalGeneration_training.txt new file mode 100644 index 000000000..bafa9de2d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/M2M100ForConditionalGeneration_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([256, 128112], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([256, 128112], f16), T([256, 128112], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 36, ((T([32, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 36, ((T([32, 128, 128], f16), T([32, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 2, ((T([2, 128], b8),), {'dtype': i32}) +cnt: 2, ((T([2, 128], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([2, 128], i32),), {'dtype': i64}) +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([2, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 108, ((T([2, 128, 16, 64], f16), [2, 128, 1024]), {}) +cnt: 1, ((T([256, 128112], f16), [2, 128, 128112]), {}) +cnt: 36, ((T([2, 16, 128, 64], f16), [32, 128, 64]), {}) +cnt: 36, ((T([2, 128, 1024], f16), [256, 1024]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([2, 128], i32), 0), {}) +cnt: 2, ((T([2, 128], i64), 1), {}) +cnt: 193, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 12, ((T([2, 16, 128, 128], f16), T([2, 1, 128, 128], f16)), {}) +cnt: 2, ((T([128112, 1024], f16), T([128112, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 144, ((T([1024], f16), T([256, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([256, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([256, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.any.default +cnt: 24, ((T([2, 128, 1024], b8),), {}) +Operator: aten.bmm.default +cnt: 72, ((T([32, 128, 64], f16), T([32, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 72, ((T([32, 128, 128], f16), T([32, 128, 64], f16)), {}) +cnt: 36, ((T([32, 128, 128], f16, stride=(16384, 1, 128)), T([32, 128, 64], f16)), {}) +cnt: 36, ((T([32, 64, 128], f16, stride=(8192, 1, 64)), T([32, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 3, ((T([2, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 3, ((T([2, 128], i64), T([2, 128], i64)), {}) +Operator: aten.cumsum.default +cnt: 2, ((T([2, 128], i32), 1), {}) +Operator: aten.embedding.default +cnt: 2, ((T([128112, 1024], f16), T([2, 128], i64), 1), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([2, 128, 1024], f16), T([2, 128], i64), 128112, 1, False), {}) +Operator: aten.index_select.default +cnt: 2, ((T([1026, 1024], f16), 0, T([256], i64)), {}) +Operator: aten.isinf.default +cnt: 12, ((T([2, 128, 1024], f16),), {}) +Operator: aten.isnan.default +cnt: 12, ((T([2, 128, 1024], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([256, 1024], f16), T([1024, 128112], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([128112, 256], f16, stride=(1, 128112)), T([256, 1024], f16)), {}) +cnt: 1, ((T([256, 128112], f16), T([128112, 1024], f16)), {}) +cnt: 24, ((T([256, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 4096], f16)), {}) +cnt: 24, ((T([256, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 256], f16, stride=(1, 4096)), T([256, 1024], f16)), {}) +cnt: 144, ((T([256, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 144, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([2, 128, 1024], f16), 32.0), {}) +cnt: 2, ((T([2, 128], i32), T([2, 128], i32)), {}) +cnt: 72, ((T([2, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 62, ((T([2, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 62, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16), [1024], T([2, 128, 1], f32), T([2, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 2, ((T([2, 128], i64), 1), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([256, 128112], f16), T([256], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([256, 128112], f16), T([256], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 24, ((T([2, 128, 4096], f16),), {}) +Operator: aten.sum.SymInt +cnt: 168, ((T([256, 1024], f16), [0], True), {}) +cnt: 24, ((T([256, 4096], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 24, ((T([2, 128, 4096], f16), T([2, 128, 4096], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForCausalLM_training.txt new file mode 100644 index 000000000..288b2cd2c --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForCausalLM_training.txt @@ -0,0 +1,73 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 50265], f16), T([2048, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([256, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([256, 128, 128], f16), T([256, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([16, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([16, 128, 16, 64], f16), [16, 128, 1024]), {}) +cnt: 1, ((T([2048, 50265], f16), [16, 128, 50265]), {}) +cnt: 12, ((T([16, 16, 128, 64], f16), [256, 128, 64]), {}) +cnt: 12, ((T([16, 128, 1024], f16), [2048, 1024]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([16, 128], i64, stride=(0, 1)), 2), {}) +cnt: 73, ((T([16, 128, 1024], f16), T([16, 128, 1024], f16)), {}) +cnt: 12, ((T([16, 16, 128, 128], f16), T([16, 1, 128, 128], f16)), {}) +cnt: 1, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([1024], f16), T([2048, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([4096], f16), T([2048, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([1024], f16), T([2048, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([256, 128, 64], f16), T([256, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([256, 128, 128], f16), T([256, 128, 64], f16)), {}) +cnt: 12, ((T([256, 128, 128], f16, stride=(16384, 1, 128)), T([256, 128, 64], f16)), {}) +cnt: 12, ((T([256, 64, 128], f16, stride=(8192, 1, 64)), T([256, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 128], i64), T([16, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 1024], f16), T([16, 128], i64), 1), {}) +cnt: 1, ((T([1026, 1024], f16), T([16, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 128, 1024], f16), T([16, 128], i64), 1026, -1, False), {}) +cnt: 1, ((T([16, 128, 1024], f16), T([16, 128], i64), 50265, 1, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([16, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([16, 128, 4096], f16), T([16, 128, 4096], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 2048], f16, stride=(1, 50265)), T([2048, 1024], f16)), {}) +cnt: 1, ((T([2048, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 12, ((T([2048, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 2048], f16, stride=(1, 1024)), T([2048, 4096], f16)), {}) +cnt: 12, ((T([2048, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 12, ((T([4096, 2048], f16, stride=(1, 4096)), T([2048, 1024], f16)), {}) +cnt: 48, ((T([2048, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 2048], f16, stride=(1, 1024)), T([2048, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([16, 128, 1024], f16), 1.0), {}) +cnt: 24, ((T([16, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([16, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([16, 128, 1024], f16), T([16, 128, 1024], f16), [1024], T([16, 128, 1], f32), T([16, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 50265], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 50265], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 60, ((T([2048, 1024], f16), [0], True), {}) +cnt: 12, ((T([2048, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForConditionalGeneration_training.txt new file mode 100644 index 000000000..2ca11dd08 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MBartForConditionalGeneration_training.txt @@ -0,0 +1,94 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50265], f16), T([1024, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 36, ((T([128, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 36, ((T([128, 128, 128], f16), T([128, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([8, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 108, ((T([8, 128, 16, 64], f16), [8, 128, 1024]), {}) +cnt: 1, ((T([1024, 50265], f16), [8, 128, 50265]), {}) +cnt: 36, ((T([8, 16, 128, 64], f16), [128, 128, 64]), {}) +cnt: 36, ((T([8, 128, 1024], f16), [1024, 1024]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([8, 128], i64, stride=(0, 1)), 2), {}) +cnt: 193, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 12, ((T([8, 16, 128, 128], f16), T([8, 1, 128, 128], f16)), {}) +cnt: 1, ((T([8, 128, 50265], f16), T([1, 50265], f16)), {}) +cnt: 2, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 144, ((T([1024], f16), T([1024, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([1024, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([1024, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.any.default +cnt: 24, ((T([8, 128, 1024], b8),), {}) +Operator: aten.bmm.default +cnt: 72, ((T([128, 128, 64], f16), T([128, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 72, ((T([128, 128, 128], f16), T([128, 128, 64], f16)), {}) +cnt: 36, ((T([128, 128, 128], f16, stride=(16384, 1, 128)), T([128, 128, 64], f16)), {}) +cnt: 36, ((T([128, 64, 128], f16, stride=(8192, 1, 64)), T([128, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 3, ((T([8, 128], i64),), {}) +cnt: 1, ((T([8, 127], i64, stride=(128, 1)),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([8, 128], i64), T([8, 128], i64)), {}) +cnt: 1, ((T([8, 127], i64, stride=(128, 1)), T([8, 127], i64)), {}) +cnt: 1, ((T([8], i64, stride=(128,)), T([8], i64)), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50265, 1024], f16), T([8, 128], i64), 1), {}) +cnt: 2, ((T([1026, 1024], f16), T([8, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([8, 128, 1024], f16), T([8, 128], i64), 1026, -1, False), {}) +cnt: 2, ((T([8, 128, 1024], f16), T([8, 128], i64), 50265, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([8, 128], i64), -100), {}) +Operator: aten.gather.default +cnt: 1, ((T([8, 128], i64), 1, T([8, 1], i64)), {}) +Operator: aten.gelu.default +cnt: 24, ((T([8, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([8, 128, 4096], f16), T([8, 128, 4096], f16)), {}) +Operator: aten.isinf.default +cnt: 12, ((T([8, 128, 1024], f16),), {}) +Operator: aten.isnan.default +cnt: 12, ((T([8, 128, 1024], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([8, 128], i64), T([8, 128], b8), 1), {}) +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 1024], f16, stride=(1, 50265)), T([1024, 1024], f16)), {}) +cnt: 1, ((T([1024, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 24, ((T([1024, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 1024], f16)), {}) +cnt: 144, ((T([1024, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 144, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([8, 128, 1024], f16), 1.0), {}) +cnt: 72, ((T([8, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 64, ((T([8, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 64, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16), [1024], T([8, 128, 1], f32), T([8, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([8, 128], i64), 1), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50265], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50265], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([8], i64), 1), {}) +Operator: aten.sum.SymInt +cnt: 168, ((T([1024, 1024], f16), [0], True), {}) +cnt: 24, ((T([1024, 4096], f16), [0], True), {}) +Operator: aten.sum.dim_IntList +cnt: 1, ((T([8, 128], b8), [1]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForCausalLM_training.txt new file mode 100644 index 000000000..efe2661fc --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForCausalLM_training.txt @@ -0,0 +1,85 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([254, 29056], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([254, 29056], f16), T([254, 29056], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([2, 16, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([2, 16, 128, 128], f16), T([2, 16, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([2, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([2, 16, 128, 64], f16), [32, 128, 64]), {}) +cnt: 24, ((T([2, 16, 64, 128], f16), [32, 64, 128]), {}) +cnt: 24, ((T([32, 128, 128], f16), [2, 16, 128, 128]), {}) +cnt: 24, ((T([32, 128, 64], f16), [2, 16, 128, 64]), {}) +cnt: 48, ((T([2, 128, 16, 64], f16), [2, 128, 1024]), {}) +cnt: 24, ((T([2, 128, 1024], f16), [256, 1024]), {}) +Operator: aten.add.Tensor +cnt: 145, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16)), {}) +cnt: 24, ((T([2, 16, 128, 128], f16), T([2, 1, 1, 128], f16)), {}) +cnt: 1, ((T([29056, 1024], f16), T([29056, 1024], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([2, 128, 1024], f16), T([1, 128, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 97, ((T([1024], f16), T([256, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([256, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([256, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([29056], f16), T([256, 1024], f16), T([1024, 29056], f16, stride=(1, 1024))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([32, 128, 64], f16), T([32, 64, 128], f16)), {}) +cnt: 24, ((T([32, 128, 128], f16), T([32, 128, 64], f16)), {}) +cnt: 24, ((T([32, 128, 128], f16, stride=(16384, 1, 128)), T([32, 128, 64], f16)), {}) +cnt: 24, ((T([32, 128, 64], f16), T([32, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([32, 64, 128], f16, stride=(8192, 1, 64)), T([32, 128, 128], f16)), {}) +cnt: 24, ((T([32, 128, 128], f16), T([32, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.clone.default +cnt: 2, ((T([2, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([2, 128], i64), T([2, 128], i64)), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([2, 16, 128, 128], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([29056, 1024], f16), T([2, 128], i64), 0), {}) +cnt: 1, ((T([2, 1024], f16), T([2, 128], i64)), {}) +cnt: 1, ((T([512, 1024], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 1024], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([2, 128, 1024], f16), T([2, 128], i64), 2, -1, False), {}) +cnt: 1, ((T([2, 128, 1024], f16), T([2, 128], i64), 29056, 0, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([2, 128, 4096], f16),), {}) +cnt: 1, ((T([2, 128, 1024], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16)), {}) +cnt: 24, ((T([2, 128, 4096], f16), T([2, 128, 4096], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([256, 29056], f16), T([29056, 1024], f16)), {}) +cnt: 1, ((T([29056, 256], f16, stride=(1, 29056)), T([256, 1024], f16)), {}) +cnt: 97, ((T([256, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 97, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 1024], f16)), {}) +cnt: 24, ((T([256, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 4096], f16)), {}) +cnt: 24, ((T([256, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 256], f16, stride=(1, 4096)), T([256, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([2, 1, 1, 128], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 50, ((T([2, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 50, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16), [1024], T([2, 128, 1], f32), T([2, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([254, 29056], f16), T([254], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([254, 29056], f16), T([254], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([2, 1, 1, 128], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([2, 127, 29056], f16), [2, 127, 29056], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([2, 127, 29056], f16), [2, 128, 29056], 1, 0, -1, 1), {}) +cnt: 1, ((T([2, 128, 29056], f16), [2, 128, 29056], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([256, 29056], f16), [0], True), {}) +cnt: 121, ((T([256, 1024], f16), [0], True), {}) +cnt: 24, ((T([256, 4096], f16), [0], True), {}) +cnt: 1, ((T([2, 128, 1024], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForQuestionAnswering_training.txt new file mode 100644 index 000000000..5c1861e54 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MegatronBertForQuestionAnswering_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([8, 128], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([8, 128], f16), T([8, 128], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([8, 16, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([8, 16, 128, 128], f16), T([8, 16, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([8, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([8, 16, 128, 64], f16), [128, 128, 64]), {}) +cnt: 24, ((T([8, 16, 64, 128], f16), [128, 64, 128]), {}) +cnt: 24, ((T([128, 128, 128], f16), [8, 16, 128, 128]), {}) +cnt: 24, ((T([128, 128, 64], f16), [8, 16, 128, 64]), {}) +cnt: 48, ((T([8, 128, 16, 64], f16), [8, 128, 1024]), {}) +cnt: 24, ((T([8, 128, 1024], f16), [1024, 1024]), {}) +Operator: aten.add.Tensor +cnt: 145, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16)), {}) +cnt: 24, ((T([8, 16, 128, 128], f16), T([8, 1, 1, 128], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([8, 128, 1024], f16), T([1, 128, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 96, ((T([1024], f16), T([1024, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([1024, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([1024, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([2], f16), T([1024, 1024], f16), T([1024, 2], f16, stride=(1, 1024))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([128, 128, 64], f16), T([128, 64, 128], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 64], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16, stride=(16384, 1, 128)), T([128, 128, 64], f16)), {}) +cnt: 24, ((T([128, 128, 64], f16), T([128, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([128, 64, 128], f16, stride=(8192, 1, 64)), T([128, 128, 128], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([8, 128, 1], f16), T([8, 128, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([8], i64), 0, 128), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 128], i64),), {}) +cnt: 2, ((T([8], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 128], i64), T([8, 128], i64)), {}) +cnt: 2, ((T([8], i64), T([8], i64)), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([8, 16, 128, 128], f16), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([29056, 1024], f16), T([8, 128], i64), 0), {}) +cnt: 1, ((T([2, 1024], f16), T([8, 128], i64)), {}) +cnt: 1, ((T([512, 1024], f16), T([1, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 128, 1024], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([8, 128, 1024], f16), T([8, 128], i64), 2, -1, False), {}) +cnt: 1, ((T([8, 128, 1024], f16), T([8, 128], i64), 29056, 0, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([8, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([8, 128, 4096], f16), T([8, 128, 4096], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 2], f16), T([2, 1024], f16)), {}) +cnt: 1, ((T([2, 1024], f16, stride=(1, 2)), T([1024, 1024], f16)), {}) +cnt: 24, ((T([1024, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 1024], f16)), {}) +cnt: 96, ((T([1024, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 96, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([8, 1, 1, 128], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([8, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 49, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16), [1024], T([8, 128, 1], f32), T([8, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([8, 128], f16), T([8], i64), None, 1, 128, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([8, 128], f16), T([8], i64), None, 1, 128), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([8, 1, 1, 128], f16), 1.0), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([8, 128, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 2], f16), [0], True), {}) +cnt: 120, ((T([1024, 1024], f16), [0], True), {}) +cnt: 24, ((T([1024, 4096], f16), [0], True), {}) +cnt: 1, ((T([8, 128, 1024], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForMaskedLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForMaskedLM_training.txt new file mode 100644 index 000000000..e6b91aa01 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForMaskedLM_training.txt @@ -0,0 +1,112 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 30522], f16), T([2048, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([16, 4, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([16, 4, 128, 128], f16), T([16, 4, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([16, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([16, 4, 128, 32], f16), [64, 128, 32]), {}) +cnt: 24, ((T([16, 4, 32, 128], f16), [64, 32, 128]), {}) +cnt: 24, ((T([64, 128, 128], f16), [16, 4, 128, 128]), {}) +cnt: 24, ((T([64, 128, 32], f16), [16, 4, 128, 32]), {}) +cnt: 1, ((T([2048, 30522], f16), [16, 128, 30522]), {}) +cnt: 48, ((T([16, 128, 4, 32], f16), [16, 128, 128]), {}) +cnt: 24, ((T([16, 128, 128], f16), [2048, 128]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([16, 128, 512], f16), T([1, 128, 512], f16)), {}) +cnt: 97, ((T([16, 128, 512], f16), T([16, 128, 512], f16)), {}) +cnt: 25, ((T([16, 128, 512], f16), T([512], f16)), {}) +cnt: 168, ((T([16, 128, 128], f16), T([128], f16)), {}) +cnt: 24, ((T([16, 4, 128, 128], f16), T([16, 1, 1, 128], f16)), {}) +cnt: 241, ((T([16, 128, 128], f16), T([16, 128, 128], f16)), {}) +cnt: 1, ((T([16, 128, 128], f16, stride=(49152, 384, 1)), T([16, 128, 128], f16)), {}) +cnt: 1, ((T([30522, 128], f16, stride=(1, 30522)), T([30522, 128], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([16, 128, 30522], f16), T([30522], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([512], f16), T([2048, 384], f16), T([384, 512], f16, stride=(1, 384))), {}) +cnt: 168, ((T([128], f16), T([2048, 512], f16), T([512, 128], f16, stride=(1, 512))), {}) +cnt: 72, ((T([128], f16), T([2048, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 120, ((T([512], f16), T([2048, 128], f16), T([128, 512], f16, stride=(1, 128))), {}) +cnt: 1, ((T([512], f16), T([2048, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([64, 128, 32], f16), T([64, 32, 128], f16)), {}) +cnt: 24, ((T([64, 128, 128], f16), T([64, 128, 32], f16)), {}) +cnt: 24, ((T([64, 128, 128], f16, stride=(16384, 1, 128)), T([64, 128, 32], f16)), {}) +cnt: 24, ((T([64, 128, 32], f16), T([64, 32, 128], f16, stride=(4096, 1, 32))), {}) +cnt: 24, ((T([64, 32, 128], f16, stride=(4096, 1, 32)), T([64, 128, 128], f16)), {}) +cnt: 24, ((T([64, 128, 128], f16), T([64, 128, 32], f16, stride=(4096, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([16, 128, 128], f16), T([16, 128, 128], f16), T([16, 128, 128], f16)], 2), {}) +cnt: 1, (([T([128, 30522], f16, stride=(1, 128)), T([384, 30522], f16)],), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 128], i64),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([16, 127, 128], f16, stride=(16384, 128, 1)), [0, 0, 0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([16, 127, 128], f16, stride=(16384, 128, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 1, ((T([16, 128, 128], f16, stride=(49152, 384, 1)), [0, 0, -1, 0, 0, 0]), {}) +cnt: 1, ((T([16, 128, 128], f16, stride=(49152, 384, 1)), [0, 0, 0, -1, 0, 0]), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 128], i64), T([16, 128], i64)), {}) +cnt: 1, ((T([30522, 128], f16), T([30522, 128], f16, stride=(1, 30522))), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([16, 4, 128, 128], f16), 5.656854249492381), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 128], f16), T([16, 128], i64), 0), {}) +cnt: 1, ((T([512, 512], f16), T([1, 128], i64)), {}) +cnt: 1, ((T([2, 512], f16), T([16, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 128, 512], f16), T([16, 128], i64), 2, -1, False), {}) +cnt: 1, ((T([1, 128, 512], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([16, 128, 128], f16), T([16, 128], i64), 30522, 0, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 512], f16), T([512, 30522], f16)), {}) +cnt: 1, ((T([512, 2048], f16, stride=(1, 512)), T([2048, 30522], f16)), {}) +cnt: 1, ((T([2048, 30522], f16), T([30522, 512], f16, stride=(1, 30522))), {}) +cnt: 1, ((T([2048, 512], f16), T([512, 512], f16)), {}) +cnt: 1, ((T([512, 2048], f16, stride=(1, 512)), T([2048, 512], f16)), {}) +cnt: 120, ((T([2048, 512], f16), T([512, 128], f16)), {}) +cnt: 120, ((T([512, 2048], f16, stride=(1, 512)), T([2048, 128], f16)), {}) +cnt: 168, ((T([2048, 128], f16), T([128, 512], f16)), {}) +cnt: 168, ((T([128, 2048], f16, stride=(1, 128)), T([2048, 512], f16)), {}) +cnt: 72, ((T([2048, 128], f16), T([128, 128], f16)), {}) +cnt: 72, ((T([128, 2048], f16, stride=(1, 128)), T([2048, 128], f16)), {}) +cnt: 1, ((T([2048, 512], f16), T([512, 384], f16)), {}) +cnt: 1, ((T([512, 2048], f16, stride=(1, 512)), T([2048, 384], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([16, 1, 1, 128], f16), -65504.0), {}) +cnt: 50, ((T([16, 128, 512], f16), T([512], f16)), {}) +cnt: 336, ((T([16, 128, 128], f16), T([128], f16)), {}) +cnt: 25, ((T([16, 128, 512], f16), T([16, 128, 512], f16)), {}) +cnt: 168, ((T([16, 128, 128], f16), T([16, 128, 128], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([16, 128, 512], f16), [512], T([512], f16), T([512], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([16, 128, 512], f16), T([16, 128, 512], f16), [512], T([16, 128, 1], f32), T([16, 128, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([30522, 128], f16, stride=(1, 30522)), [30522, 128], [128, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 30522], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 30522], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 97, ((T([16, 128, 512], f16),), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([16, 1, 1, 128], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([16, 127, 128], f16), [16, 128, 128], 1, 0, -1, 1), {}) +cnt: 2, ((T([16, 128, 128], f16), [16, 128, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([16, 127, 128], f16), [16, 128, 128], 1, 1, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([16, 128, 30522], f16), [0, 1], True), {}) +cnt: 122, ((T([2048, 512], f16), [0], True), {}) +cnt: 50, ((T([16, 128, 512], f16), [0, 1], True), {}) +cnt: 336, ((T([16, 128, 128], f16), [0, 1], True), {}) +cnt: 240, ((T([2048, 128], f16), [0], True), {}) +cnt: 1, ((T([16, 128, 512], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 97, ((T([16, 128, 512], f16), T([16, 128, 512], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForQuestionAnswering_training.txt new file mode 100644 index 000000000..c5e7b0f51 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/MobileBertForQuestionAnswering_training.txt @@ -0,0 +1,106 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([32, 128], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([32, 128], f16), T([32, 128], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([32, 4, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([32, 4, 128, 128], f16), T([32, 4, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([32, 1, 1, 128], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([32, 4, 128, 32], f16), [128, 128, 32]), {}) +cnt: 24, ((T([32, 4, 32, 128], f16), [128, 32, 128]), {}) +cnt: 24, ((T([128, 128, 128], f16), [32, 4, 128, 128]), {}) +cnt: 24, ((T([128, 128, 32], f16), [32, 4, 128, 32]), {}) +cnt: 48, ((T([32, 128, 4, 32], f16), [32, 128, 128]), {}) +cnt: 24, ((T([32, 128, 128], f16), [4096, 128]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([32, 128, 512], f16), T([1, 128, 512], f16)), {}) +cnt: 97, ((T([32, 128, 512], f16), T([32, 128, 512], f16)), {}) +cnt: 25, ((T([32, 128, 512], f16), T([512], f16)), {}) +cnt: 168, ((T([32, 128, 128], f16), T([128], f16)), {}) +cnt: 24, ((T([32, 4, 128, 128], f16), T([32, 1, 1, 128], f16)), {}) +cnt: 241, ((T([32, 128, 128], f16), T([32, 128, 128], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +cnt: 1, ((T([32, 128, 128], f16, stride=(49152, 384, 1)), T([32, 128, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([512], f16), T([4096, 384], f16), T([384, 512], f16, stride=(1, 384))), {}) +cnt: 168, ((T([128], f16), T([4096, 512], f16), T([512, 128], f16, stride=(1, 512))), {}) +cnt: 72, ((T([128], f16), T([4096, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 120, ((T([512], f16), T([4096, 128], f16), T([128, 512], f16, stride=(1, 128))), {}) +cnt: 1, ((T([2], f16), T([4096, 512], f16), T([512, 2], f16, stride=(1, 512))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([128, 128, 32], f16), T([128, 32, 128], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 32], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16, stride=(16384, 1, 128)), T([128, 128, 32], f16)), {}) +cnt: 24, ((T([128, 128, 32], f16), T([128, 32, 128], f16, stride=(4096, 1, 32))), {}) +cnt: 24, ((T([128, 32, 128], f16, stride=(4096, 1, 32)), T([128, 128, 128], f16)), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 32], f16, stride=(4096, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([32, 128, 128], f16), T([32, 128, 128], f16), T([32, 128, 128], f16)], 2), {}) +cnt: 1, (([T([32, 128, 1], f16), T([32, 128, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([32], i64), 0, 128), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 128], i64),), {}) +cnt: 2, ((T([32], i64),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([32, 127, 128], f16, stride=(16384, 128, 1)), [0, 0, 0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([32, 127, 128], f16, stride=(16384, 128, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 1, ((T([32, 128, 128], f16, stride=(49152, 384, 1)), [0, 0, -1, 0, 0, 0]), {}) +cnt: 1, ((T([32, 128, 128], f16, stride=(49152, 384, 1)), [0, 0, 0, -1, 0, 0]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 128], i64), T([32, 128], i64)), {}) +cnt: 2, ((T([32], i64), T([32], i64)), {}) +Operator: aten.div.Tensor +cnt: 48, ((T([32, 4, 128, 128], f16), 5.656854249492381), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 128], f16), T([32, 128], i64), 0), {}) +cnt: 1, ((T([512, 512], f16), T([1, 128], i64)), {}) +cnt: 1, ((T([2, 512], f16), T([32, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([32, 128, 512], f16), T([32, 128], i64), 2, -1, False), {}) +cnt: 1, ((T([1, 128, 512], f16), T([1, 128], i64), 512, -1, False), {}) +cnt: 1, ((T([32, 128, 128], f16), T([32, 128], i64), 30522, 0, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 2], f16), T([2, 512], f16)), {}) +cnt: 1, ((T([2, 4096], f16, stride=(1, 2)), T([4096, 512], f16)), {}) +cnt: 120, ((T([4096, 512], f16), T([512, 128], f16)), {}) +cnt: 120, ((T([512, 4096], f16, stride=(1, 512)), T([4096, 128], f16)), {}) +cnt: 168, ((T([4096, 128], f16), T([128, 512], f16)), {}) +cnt: 168, ((T([128, 4096], f16, stride=(1, 128)), T([4096, 512], f16)), {}) +cnt: 72, ((T([4096, 128], f16), T([128, 128], f16)), {}) +cnt: 72, ((T([128, 4096], f16, stride=(1, 128)), T([4096, 128], f16)), {}) +cnt: 1, ((T([4096, 512], f16), T([512, 384], f16)), {}) +cnt: 1, ((T([512, 4096], f16, stride=(1, 512)), T([4096, 384], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([32, 1, 1, 128], f16), -65504.0), {}) +cnt: 50, ((T([32, 128, 512], f16), T([512], f16)), {}) +cnt: 336, ((T([32, 128, 128], f16), T([128], f16)), {}) +cnt: 25, ((T([32, 128, 512], f16), T([32, 128, 512], f16)), {}) +cnt: 168, ((T([32, 128, 128], f16), T([32, 128, 128], f16)), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([32, 128], f16), T([32], i64), None, 1, 128, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([32, 128], f16), T([32], i64), None, 1, 128), {}) +Operator: aten.relu.default +cnt: 96, ((T([32, 128, 512], f16),), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([32, 1, 1, 128], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([32, 127, 128], f16), [32, 128, 128], 1, 0, -1, 1), {}) +cnt: 2, ((T([32, 128, 128], f16), [32, 128, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 127, 128], f16), [32, 128, 128], 1, 1, 9223372036854775807, 1), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([32, 128, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([4096, 2], f16), [0], True), {}) +cnt: 50, ((T([32, 128, 512], f16), [0, 1], True), {}) +cnt: 121, ((T([4096, 512], f16), [0], True), {}) +cnt: 336, ((T([32, 128, 128], f16), [0, 1], True), {}) +cnt: 240, ((T([4096, 128], f16), [0], True), {}) +cnt: 1, ((T([32, 128, 512], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 96, ((T([32, 128, 512], f16), T([32, 128, 512], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/OPTForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/OPTForCausalLM_training.txt new file mode 100644 index 000000000..533b18756 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/OPTForCausalLM_training.txt @@ -0,0 +1,103 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([508, 50272], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([508, 50272], f16), T([508, 50272], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([48, 128, 128], f16), -1, True), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([48, 128, 128], f32), T([48, 128, 128], f32), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([4, 128], b8),), {'dtype': i64}) +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([4, 1, 128, 128], b8, stride=(128, 128, 0, 1)),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 128, 128], f16),), {'dtype': torch.bool}) +cnt: 12, ((T([48, 128, 128], f32),), {'dtype': f16}) +cnt: 12, ((T([48, 128, 128], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 128, 12, 64], f16), [4, 128, 768]), {}) +cnt: 1, ((T([512, 50272], f16), [4, 128, 50272]), {}) +cnt: 12, ((T([4, 12, 128, 64], f16), [48, 128, 64]), {}) +cnt: 12, ((T([4, 128, 768], f16), [512, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([4, 128], i64), 2), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([4, 1, 128, 128], f16), T([4, 1, 128, 128], f16)), {}) +cnt: 49, ((T([4, 128, 768], f16), T([4, 128, 768], f16)), {}) +cnt: 12, ((T([4, 12, 128, 128], f16), T([4, 1, 128, 128], f16)), {}) +cnt: 24, ((T([512, 768], f16), T([512, 768], f16)), {}) +cnt: 1, ((T([50272, 768], f16), T([50272, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([48, 128, 64], f16), T([48, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([48, 128, 128], f16), T([48, 128, 64], f16)), {}) +cnt: 12, ((T([48, 128, 128], f16, stride=(16384, 1, 128)), T([48, 128, 64], f16)), {}) +cnt: 12, ((T([48, 64, 128], f16, stride=(8192, 1, 64)), T([48, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 128], i64), T([4, 128], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([4, 128], i64), 1), {}) +Operator: aten.div.Scalar +cnt: 12, ((T([4, 12, 128, 128], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50272, 768], f16), T([4, 128], i64), 1), {}) +cnt: 1, ((T([2050, 768], f16), T([4, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([4, 128, 768], f16), T([4, 128], i64), 2050, -1, False), {}) +cnt: 1, ((T([4, 128, 768], f16), T([4, 128], i64), 50272, 1, False), {}) +Operator: aten.eq.Tensor +cnt: 12, ((T([4, 12, 128, 128], f16), T([], f32)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +cnt: 12, ((T([4, 12, 128, 128], f16), T([], f32)), {}) +Operator: aten.masked_fill.Scalar +cnt: 1, ((T([4, 1, 128, 128], f16), T([4, 1, 128, 128], b8), -65504.0), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +cnt: 12, ((T([4, 12, 128, 128], f16), T([4, 12, 128, 128], b8), 0), {}) +Operator: aten.maximum.default +cnt: 12, ((T([4, 12, 128, 128], f16), T([], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 768], f16), T([768, 50272], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50272, 512], f16, stride=(1, 50272)), T([512, 768], f16)), {}) +cnt: 1, ((T([512, 50272], f16), T([50272, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 12, ((T([512, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +cnt: 48, ((T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([4, 128], i64), T([4, 128], i64)), {}) +cnt: 24, ((T([4, 128, 768], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([4, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +cnt: 12, ((T([512, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 13, ((T([4, 128, 768], f16), T([4, 128, 768], f16), [768], T([4, 128, 1], f32), T([4, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +cnt: 12, ((T([512, 768], f16), T([512, 768], f16), [768], T([512, 1], f32), T([512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([508, 50272], f16), T([508], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([508, 50272], f16), T([508], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 12, ((T([512, 3072], f16),), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([4, 1, 128, 128], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([4, 127, 50272], f16), [4, 127, 50272], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([4, 127, 50272], f16), [4, 128, 50272], 1, 0, -1, 1), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([4, 128], i64), 1), {}) +Operator: aten.sum.SymInt +cnt: 60, ((T([512, 768], f16), [0], True), {}) +cnt: 12, ((T([512, 3072], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 12, ((T([512, 3072], f16), T([512, 3072], f16), 0), {}) +Operator: aten.where.self +cnt: 12, ((T([4, 12, 128, 128], b8), T([4, 12, 128, 128], f16), T([4, 12, 128, 128], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForCausalLM_training.txt new file mode 100644 index 000000000..7617876fd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForCausalLM_training.txt @@ -0,0 +1,73 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 50005], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 50005], f16), T([2048, 50005], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 6, ((T([192, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([192, 128, 128], f16), T([192, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([16, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 18, ((T([16, 128, 12, 64], f16), [16, 128, 768]), {}) +cnt: 1, ((T([2048, 50005], f16), [16, 128, 50005]), {}) +cnt: 6, ((T([16, 12, 128, 64], f16), [192, 128, 64]), {}) +cnt: 6, ((T([16, 128, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([16, 128], i64, stride=(0, 1)), 2), {}) +cnt: 37, ((T([16, 128, 768], f16), T([16, 128, 768], f16)), {}) +cnt: 6, ((T([16, 12, 128, 128], f16), T([16, 1, 128, 128], f16)), {}) +cnt: 1, ((T([50005, 768], f16), T([50005, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 24, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 6, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 6, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([192, 128, 64], f16), T([192, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([192, 128, 128], f16), T([192, 128, 64], f16)), {}) +cnt: 6, ((T([192, 128, 128], f16, stride=(16384, 1, 128)), T([192, 128, 64], f16)), {}) +cnt: 6, ((T([192, 64, 128], f16, stride=(8192, 1, 64)), T([192, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 128], i64), T([16, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50005, 768], f16), T([16, 128], i64), 1), {}) +cnt: 1, ((T([1026, 768], f16), T([16, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 128, 768], f16), T([16, 128], i64), 1026, -1, False), {}) +cnt: 1, ((T([16, 128, 768], f16), T([16, 128], i64), 50005, 1, False), {}) +Operator: aten.gelu.default +cnt: 6, ((T([16, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 6, ((T([16, 128, 3072], f16), T([16, 128, 3072], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 768], f16), T([768, 50005], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50005, 2048], f16, stride=(1, 50005)), T([2048, 768], f16)), {}) +cnt: 1, ((T([2048, 50005], f16), T([50005, 768], f16)), {}) +cnt: 6, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 6, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 6, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 6, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 24, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 24, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([16, 128, 768], f16), 27.712812921102035), {}) +cnt: 12, ((T([16, 128, 768], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([16, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 13, ((T([16, 128, 768], f16), T([16, 128, 768], f16), [768], T([16, 128, 1], f32), T([16, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 50005], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 50005], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 30, ((T([2048, 768], f16), [0], True), {}) +cnt: 6, ((T([2048, 3072], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForConditionalGeneration_training.txt new file mode 100644 index 000000000..55115055a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PLBartForConditionalGeneration_training.txt @@ -0,0 +1,94 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50005], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50005], f16), T([1024, 50005], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 18, ((T([96, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 18, ((T([96, 128, 128], f16), T([96, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([8, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 54, ((T([8, 128, 12, 64], f16), [8, 128, 768]), {}) +cnt: 1, ((T([1024, 50005], f16), [8, 128, 50005]), {}) +cnt: 18, ((T([8, 12, 128, 64], f16), [96, 128, 64]), {}) +cnt: 18, ((T([8, 128, 768], f16), [1024, 768]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([8, 128], i64, stride=(0, 1)), 2), {}) +cnt: 97, ((T([8, 128, 768], f16), T([8, 128, 768], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 6, ((T([8, 12, 128, 128], f16), T([8, 1, 128, 128], f16)), {}) +cnt: 1, ((T([8, 128, 50005], f16), T([1, 50005], f16)), {}) +cnt: 2, ((T([50005, 768], f16), T([50005, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 72, ((T([768], f16), T([1024, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([1024, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([1024, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +Operator: aten.any.default +cnt: 12, ((T([8, 128, 768], b8),), {}) +Operator: aten.bmm.default +cnt: 36, ((T([96, 128, 64], f16), T([96, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 36, ((T([96, 128, 128], f16), T([96, 128, 64], f16)), {}) +cnt: 18, ((T([96, 128, 128], f16, stride=(16384, 1, 128)), T([96, 128, 64], f16)), {}) +cnt: 18, ((T([96, 64, 128], f16, stride=(8192, 1, 64)), T([96, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 3, ((T([8, 128], i64),), {}) +cnt: 1, ((T([8, 127], i64, stride=(128, 1)),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([8, 128], i64), T([8, 128], i64)), {}) +cnt: 1, ((T([8, 127], i64, stride=(128, 1)), T([8, 127], i64)), {}) +cnt: 1, ((T([8], i64, stride=(128,)), T([8], i64)), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50005, 768], f16), T([8, 128], i64), 1), {}) +cnt: 2, ((T([1026, 768], f16), T([8, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([8, 128, 768], f16), T([8, 128], i64), 1026, -1, False), {}) +cnt: 2, ((T([8, 128, 768], f16), T([8, 128], i64), 50005, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([8, 128], i64), -100), {}) +Operator: aten.gather.default +cnt: 1, ((T([8, 128], i64), 1, T([8, 1], i64)), {}) +Operator: aten.gelu.default +cnt: 12, ((T([8, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([8, 128, 3072], f16), T([8, 128, 3072], f16)), {}) +Operator: aten.isinf.default +cnt: 6, ((T([8, 128, 768], f16),), {}) +Operator: aten.isnan.default +cnt: 6, ((T([8, 128, 768], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([8, 128], i64), T([8, 128], b8), 1), {}) +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 768], f16), T([768, 50005], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50005, 1024], f16, stride=(1, 50005)), T([1024, 768], f16)), {}) +cnt: 1, ((T([1024, 50005], f16), T([50005, 768], f16)), {}) +cnt: 12, ((T([1024, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 3072], f16)), {}) +cnt: 12, ((T([1024, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 1024], f16, stride=(1, 3072)), T([1024, 768], f16)), {}) +cnt: 72, ((T([1024, 768], f16), T([768, 768], f16)), {}) +cnt: 72, ((T([768, 1024], f16, stride=(1, 768)), T([1024, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([8, 128, 768], f16), 27.712812921102035), {}) +cnt: 36, ((T([8, 128, 768], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 32, ((T([8, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 32, ((T([8, 128, 768], f16), T([8, 128, 768], f16), [768], T([8, 128, 1], f32), T([8, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([8, 128], i64), 1), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50005], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50005], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([8], i64), 1), {}) +Operator: aten.sum.SymInt +cnt: 84, ((T([1024, 768], f16), [0], True), {}) +cnt: 12, ((T([1024, 3072], f16), [0], True), {}) +Operator: aten.sum.dim_IntList +cnt: 1, ((T([8, 128], b8), [1]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForCausalLM_training.txt new file mode 100644 index 000000000..1341c2798 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForCausalLM_training.txt @@ -0,0 +1,72 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50265], f16), T([1024, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([128, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([128, 128, 128], f16), T([128, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([8, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([8, 128, 16, 64], f16), [8, 128, 1024]), {}) +cnt: 1, ((T([1024, 50265], f16), [8, 128, 50265]), {}) +cnt: 12, ((T([8, 16, 128, 64], f16), [128, 128, 64]), {}) +cnt: 12, ((T([8, 128, 1024], f16), [1024, 1024]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([8, 128, 1024], f16), T([128, 1024], f16)), {}) +cnt: 12, ((T([8, 16, 128, 128], f16), T([8, 1, 128, 128], f16)), {}) +cnt: 72, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16)), {}) +cnt: 1, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([1024], f16), T([1024, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([4096], f16), T([1024, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([1024], f16), T([1024, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([128, 128, 64], f16), T([128, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 64], f16)), {}) +cnt: 12, ((T([128, 128, 128], f16, stride=(16384, 1, 128)), T([128, 128, 64], f16)), {}) +cnt: 12, ((T([128, 64, 128], f16, stride=(8192, 1, 64)), T([128, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([8, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([8, 128], i64), T([8, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 1024], f16), T([8, 128], i64), 0), {}) +cnt: 1, ((T([1024, 1024], f16), T([128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([8, 128, 1024], f16), T([8, 128], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([8, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([8, 128, 4096], f16), T([8, 128, 4096], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 1024], f16, stride=(1, 50265)), T([1024, 1024], f16)), {}) +cnt: 1, ((T([1024, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 12, ((T([1024, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 12, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([8, 128, 1024], f16), 1.0), {}) +cnt: 24, ((T([8, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([8, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16), [1024], T([8, 128, 1], f32), T([8, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50265], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50265], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 60, ((T([1024, 1024], f16), [0], True), {}) +cnt: 12, ((T([1024, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForConditionalGeneration_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForConditionalGeneration_training.txt new file mode 100644 index 000000000..970513d4b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/PegasusForConditionalGeneration_training.txt @@ -0,0 +1,79 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([512, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([512, 50265], f16), T([512, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 36, ((T([64, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 36, ((T([64, 128, 128], f16), T([64, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 108, ((T([4, 128, 16, 64], f16), [4, 128, 1024]), {}) +cnt: 1, ((T([512, 50265], f16), [4, 128, 50265]), {}) +cnt: 36, ((T([4, 16, 128, 64], f16), [64, 128, 64]), {}) +cnt: 36, ((T([4, 128, 1024], f16), [512, 1024]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([4, 128, 1024], f16), T([128, 1024], f16)), {}) +cnt: 191, ((T([4, 128, 1024], f16), T([4, 128, 1024], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 12, ((T([4, 16, 128, 128], f16), T([4, 1, 128, 128], f16)), {}) +cnt: 1, ((T([4, 128, 50265], f16), T([1, 50265], f16)), {}) +cnt: 2, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 144, ((T([1024], f16), T([512, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([512, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([512, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.any.default +cnt: 24, ((T([4, 128, 1024], b8),), {}) +Operator: aten.bmm.default +cnt: 72, ((T([64, 128, 64], f16), T([64, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 72, ((T([64, 128, 128], f16), T([64, 128, 64], f16)), {}) +cnt: 36, ((T([64, 128, 128], f16, stride=(16384, 1, 128)), T([64, 128, 64], f16)), {}) +cnt: 36, ((T([64, 64, 128], f16, stride=(8192, 1, 64)), T([64, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 3, ((T([4, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 3, ((T([4, 128], i64), T([4, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50265, 1024], f16), T([4, 128], i64), 0), {}) +cnt: 2, ((T([1024, 1024], f16), T([128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([4, 128, 1024], f16), T([4, 128], i64), 50265, 0, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([4, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([4, 128, 4096], f16), T([4, 128, 4096], f16)), {}) +Operator: aten.isinf.default +cnt: 12, ((T([4, 128, 1024], f16),), {}) +Operator: aten.isnan.default +cnt: 12, ((T([4, 128, 1024], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 512], f16, stride=(1, 50265)), T([512, 1024], f16)), {}) +cnt: 1, ((T([512, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 24, ((T([512, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 512], f16, stride=(1, 1024)), T([512, 4096], f16)), {}) +cnt: 24, ((T([512, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 512], f16, stride=(1, 4096)), T([512, 1024], f16)), {}) +cnt: 144, ((T([512, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 144, ((T([1024, 512], f16, stride=(1, 1024)), T([512, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([4, 128, 1024], f16), 1.0), {}) +cnt: 72, ((T([4, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 62, ((T([4, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 62, ((T([4, 128, 1024], f16), T([4, 128, 1024], f16), [1024], T([4, 128, 1], f32), T([4, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([512, 50265], f16), T([512], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([512, 50265], f16), T([512], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 168, ((T([512, 1024], f16), [0], True), {}) +cnt: 24, ((T([512, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForCausalLM_training.txt new file mode 100644 index 000000000..25b78750d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForCausalLM_training.txt @@ -0,0 +1,94 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([508, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([508, 30522], f16), T([508, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 128, 128], f16), T([4, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([4, 1, 1, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 128], b8),), {'dtype': i32}) +cnt: 1, ((T([4, 128], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([4, 128], i32),), {'dtype': i64}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 12, 128, 64], f16), [48, 128, 64]), {}) +cnt: 12, ((T([4, 12, 64, 128], f16), [48, 64, 128]), {}) +cnt: 12, ((T([48, 128, 128], f16), [4, 12, 128, 128]), {}) +cnt: 12, ((T([48, 128, 64], f16), [4, 12, 128, 64]), {}) +cnt: 24, ((T([4, 128, 12, 64], f16), [4, 128, 768]), {}) +cnt: 12, ((T([4, 128, 768], f16), [512, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([4, 128], i32), 0), {}) +cnt: 1, ((T([4, 128], i64), 0), {}) +cnt: 73, ((T([4, 128, 768], f16), T([4, 128, 768], f16)), {}) +cnt: 12, ((T([4, 12, 128, 128], f16), T([4, 1, 1, 128], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([4, 128, 768], f16), T([4, 128, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([512, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 128, 64], f16), T([48, 64, 128], f16)), {}) +cnt: 12, ((T([48, 128, 128], f16), T([48, 128, 64], f16)), {}) +cnt: 12, ((T([48, 128, 128], f16, stride=(16384, 1, 128)), T([48, 128, 64], f16)), {}) +cnt: 12, ((T([48, 128, 64], f16), T([48, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([48, 64, 128], f16, stride=(8192, 1, 64)), T([48, 128, 128], f16)), {}) +cnt: 12, ((T([48, 128, 128], f16), T([48, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 128], i64), T([4, 128], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([4, 128], i32), 1), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([4, 12, 128, 128], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([4, 128], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([4, 128], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 768], f16), T([4, 128], i64), 0), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([4, 128, 768], f16), T([4, 128], i64), 512, 0, False), {}) +cnt: 1, ((T([4, 128, 768], f16), T([4, 128], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([4, 128, 768], f16), T([4, 128], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 128, 3072], f16),), {}) +cnt: 1, ((T([4, 128, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([4, 128, 768], f16), T([4, 128, 768], f16)), {}) +cnt: 12, ((T([4, 128, 3072], f16), T([4, 128, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 30522], f16), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 512], f16, stride=(1, 30522)), T([512, 768], f16)), {}) +cnt: 49, ((T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 12, ((T([512, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([4, 1, 1, 128], f16), -65504.0), {}) +cnt: 1, ((T([4, 128], i32), T([4, 128], i32)), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([4, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([4, 128, 768], f16), T([4, 128, 768], f16), [768], T([4, 128, 1], f32), T([4, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([4, 128], i64), 0), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([508, 30522], f16), T([508], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([508, 30522], f16), T([508], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([4, 1, 1, 128], f16), 1.0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([4, 127, 30522], f16), [4, 127, 30522], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([4, 127, 30522], f16), [4, 128, 30522], 1, 0, -1, 1), {}) +cnt: 1, ((T([4, 128, 30522], f16), [4, 128, 30522], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 30522], f16), [0], True), {}) +cnt: 61, ((T([512, 768], f16), [0], True), {}) +cnt: 12, ((T([512, 3072], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForQuestionAnswering_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForQuestionAnswering_training.txt new file mode 100644 index 000000000..02cf28ea0 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/RobertaForQuestionAnswering_training.txt @@ -0,0 +1,97 @@ +Operator: aten._log_softmax.default +cnt: 2, ((T([64, 128], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 2, ((T([64, 128], f16), T([64, 128], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([64, 1, 1, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([64, 128], b8),), {'dtype': i32}) +cnt: 1, ((T([64, 128], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([64, 128], i32),), {'dtype': i64}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 128, 64], f16), [768, 128, 64]), {}) +cnt: 12, ((T([64, 12, 64, 128], f16), [768, 64, 128]), {}) +cnt: 12, ((T([768, 128, 128], f16), [64, 12, 128, 128]), {}) +cnt: 12, ((T([768, 128, 64], f16), [64, 12, 128, 64]), {}) +cnt: 24, ((T([64, 128, 12, 64], f16), [64, 128, 768]), {}) +cnt: 12, ((T([64, 128, 768], f16), [8192, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 128], i32), 0), {}) +cnt: 1, ((T([64, 128], i64), 0), {}) +cnt: 73, ((T([64, 128, 768], f16), T([64, 128, 768], f16)), {}) +cnt: 12, ((T([64, 12, 128, 128], f16), T([64, 1, 1, 128], f16)), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([64, 128, 768], f16), T([64, 128, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([8192, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([8192, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([8192, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([2], f16), T([8192, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16, stride=(16384, 1, 128)), T([768, 128, 64], f16)), {}) +cnt: 12, ((T([768, 128, 64], f16), T([768, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([768, 64, 128], f16, stride=(8192, 1, 64)), T([768, 128, 128], f16)), {}) +cnt: 12, ((T([768, 128, 128], f16), T([768, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 128, 1], f16), T([64, 128, 1], f16)], 2), {}) +Operator: aten.clamp.default +cnt: 2, ((T([64], i64), 0, 128), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 128], i64),), {}) +cnt: 2, ((T([64], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 128], i64), T([64, 128], i64)), {}) +cnt: 2, ((T([64], i64), T([64], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([64, 128], i32), 1), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([64, 12, 128, 128], f16), 8.0), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([64, 128], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([64, 128], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 768], f16), T([64, 128], i64), 0), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64), 512, 0, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([64, 128, 768], f16), T([64, 128], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 128, 3072], f16), T([64, 128, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 8192], f16, stride=(1, 2)), T([8192, 768], f16)), {}) +cnt: 12, ((T([8192, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 3072], f16)), {}) +cnt: 12, ((T([8192, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 8192], f16, stride=(1, 3072)), T([8192, 768], f16)), {}) +cnt: 48, ((T([8192, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 8192], f16, stride=(1, 768)), T([8192, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([64, 1, 1, 128], f16), -65504.0), {}) +cnt: 1, ((T([64, 128], i32), T([64, 128], i32)), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([64, 128, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 128, 768], f16), T([64, 128, 768], f16), [768], T([64, 128, 1], f32), T([64, 128, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([64, 128], i64), 0), {}) +Operator: aten.nll_loss_backward.default +cnt: 2, ((T([], f16), T([64, 128], f16), T([64], i64), None, 1, 128, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 2, ((T([64, 128], f16), T([64], i64), None, 1, 128), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([64, 1, 1, 128], f16), 1.0), {}) +Operator: aten.split.Tensor +cnt: 1, ((T([64, 128, 2], f16), 1, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8192, 2], f16), [0], True), {}) +cnt: 60, ((T([8192, 768], f16), [0], True), {}) +cnt: 12, ((T([8192, 3072], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/Speech2Text2ForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/Speech2Text2ForCausalLM_training.txt new file mode 100644 index 000000000..a816e067e --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/Speech2Text2ForCausalLM_training.txt @@ -0,0 +1,82 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([8192, 10000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([8192, 10000], f16), T([8192, 10000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 6, ((T([256, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([256, 128, 128], f16), T([256, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([64, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([64, 128], b8),), {'dtype': i32}) +cnt: 1, ((T([64, 128], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([64, 128], i32),), {'dtype': i64}) +Operator: aten._unsafe_view.default +cnt: 18, ((T([64, 128, 4, 64], f16), [64, 128, 256]), {}) +cnt: 1, ((T([8192, 10000], f16), [64, 128, 10000]), {}) +cnt: 6, ((T([64, 4, 128, 64], f16), [256, 128, 64]), {}) +cnt: 6, ((T([64, 128, 256], f16), [8192, 256]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([64, 128], i32), 0), {}) +cnt: 1, ((T([64, 128], i64), 1), {}) +cnt: 37, ((T([64, 128, 256], f16), T([64, 128, 256], f16)), {}) +cnt: 6, ((T([64, 4, 128, 128], f16), T([64, 1, 128, 128], f16)), {}) +cnt: 1, ((T([10000, 256], f16), T([10000, 256], f16)), {}) +Operator: aten.addmm.default +cnt: 24, ((T([256], f16), T([8192, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 6, ((T([2048], f16), T([8192, 256], f16), T([256, 2048], f16, stride=(1, 256))), {}) +cnt: 6, ((T([256], f16), T([8192, 2048], f16), T([2048, 256], f16, stride=(1, 2048))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([256, 128, 64], f16), T([256, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([256, 128, 128], f16), T([256, 128, 64], f16)), {}) +cnt: 6, ((T([256, 128, 128], f16, stride=(16384, 1, 128)), T([256, 128, 64], f16)), {}) +cnt: 6, ((T([256, 64, 128], f16, stride=(8192, 1, 64)), T([256, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([64, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([64, 128], i64), T([64, 128], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([64, 128], i32), 1), {}) +Operator: aten.embedding.default +cnt: 1, ((T([10000, 256], f16), T([64, 128], i64), 1), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([64, 128, 256], f16), T([64, 128], i64), 10000, 1, False), {}) +Operator: aten.index_select.default +cnt: 1, ((T([1026, 256], f16), 0, T([8192], i64)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([8192, 256], f16), T([256, 10000], f16, stride=(1, 256))), {}) +cnt: 1, ((T([10000, 8192], f16, stride=(1, 10000)), T([8192, 256], f16)), {}) +cnt: 1, ((T([8192, 10000], f16), T([10000, 256], f16)), {}) +cnt: 6, ((T([8192, 256], f16), T([256, 2048], f16)), {}) +cnt: 6, ((T([256, 8192], f16, stride=(1, 256)), T([8192, 2048], f16)), {}) +cnt: 6, ((T([8192, 2048], f16), T([2048, 256], f16)), {}) +cnt: 6, ((T([2048, 8192], f16, stride=(1, 2048)), T([8192, 256], f16)), {}) +cnt: 24, ((T([8192, 256], f16), T([256, 256], f16)), {}) +cnt: 24, ((T([256, 8192], f16, stride=(1, 256)), T([8192, 256], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([64, 128, 256], f16), 16.0), {}) +cnt: 1, ((T([64, 128], i32), T([64, 128], i32)), {}) +cnt: 12, ((T([64, 128, 256], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 12, ((T([64, 128, 256], f16), [256], T([256], f16), T([256], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 12, ((T([64, 128, 256], f16), T([64, 128, 256], f16), [256], T([64, 128, 1], f32), T([64, 128, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([64, 128], i64), 1), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([8192, 10000], f16), T([8192], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([8192, 10000], f16), T([8192], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 6, ((T([64, 128, 2048], f16),), {}) +Operator: aten.sum.SymInt +cnt: 30, ((T([8192, 256], f16), [0], True), {}) +cnt: 6, ((T([8192, 2048], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([64, 128, 2048], f16), T([64, 128, 2048], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/TrOCRForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/TrOCRForCausalLM_training.txt new file mode 100644 index 000000000..97c3b304c --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/TrOCRForCausalLM_training.txt @@ -0,0 +1,73 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([1024, 50265], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([1024, 50265], f16), T([1024, 50265], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([128, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([128, 128, 128], f16), T([128, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([8, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([8, 128, 16, 64], f16), [8, 128, 1024]), {}) +cnt: 1, ((T([1024, 50265], f16), [8, 128, 50265]), {}) +cnt: 12, ((T([8, 16, 128, 64], f16), [128, 128, 64]), {}) +cnt: 12, ((T([8, 128, 1024], f16), [1024, 1024]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([8, 128], i64, stride=(0, 1)), 2), {}) +cnt: 73, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16)), {}) +cnt: 1, ((T([128], i64), 1), {}) +cnt: 12, ((T([8, 16, 128, 128], f16), T([8, 1, 128, 128], f16)), {}) +cnt: 1, ((T([50265, 1024], f16), T([50265, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([1024], f16), T([1024, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([4096], f16), T([1024, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 12, ((T([1024], f16), T([1024, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([128, 128, 64], f16), T([128, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 24, ((T([128, 128, 128], f16), T([128, 128, 64], f16)), {}) +cnt: 12, ((T([128, 128, 128], f16, stride=(16384, 1, 128)), T([128, 128, 64], f16)), {}) +cnt: 12, ((T([128, 64, 128], f16, stride=(8192, 1, 64)), T([128, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([8, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([8, 128], i64), T([8, 128], i64)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 1024], f16), T([8, 128], i64), 1), {}) +cnt: 1, ((T([514, 1024], f16), T([8, 128], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([8, 128, 1024], f16), T([8, 128], i64), 514, -1, False), {}) +cnt: 1, ((T([8, 128, 1024], f16), T([8, 128], i64), 50265, 1, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([8, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([8, 128, 4096], f16), T([8, 128, 4096], f16)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 1024], f16), T([1024, 50265], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([50265, 1024], f16, stride=(1, 50265)), T([1024, 1024], f16)), {}) +cnt: 1, ((T([1024, 50265], f16), T([50265, 1024], f16)), {}) +cnt: 12, ((T([1024, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 4096], f16)), {}) +cnt: 12, ((T([1024, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 12, ((T([4096, 1024], f16, stride=(1, 4096)), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 48, ((T([1024, 1024], f16, stride=(1, 1024)), T([1024, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([8, 128, 1024], f16), 1.0), {}) +cnt: 24, ((T([8, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([8, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([8, 128, 1024], f16), T([8, 128, 1024], f16), [1024], T([8, 128, 1], f32), T([8, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([1024, 50265], f16), T([1024], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([1024, 50265], f16), T([1024], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 60, ((T([1024, 1024], f16), [0], True), {}) +cnt: 12, ((T([1024, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XGLMForCausalLM_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XGLMForCausalLM_training.txt new file mode 100644 index 000000000..a8317b48f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XGLMForCausalLM_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([256, 256008], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([256, 256008], f16), T([256, 256008], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([32, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([32, 128, 128], f16), T([32, 128, 128], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([128, 128], f32),), {'dtype': f16}) +cnt: 1, ((T([2, 1, 128, 128], f16, stride=(0, 16384, 128, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([2, 128], b8),), {'dtype': i32}) +cnt: 1, ((T([2, 128], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([2, 128], i32),), {'dtype': i64}) +Operator: aten._unsafe_view.default +cnt: 72, ((T([2, 128, 16, 64], f16), [2, 128, 1024]), {}) +cnt: 1, ((T([256, 256008], f16), [2, 128, 256008]), {}) +cnt: 24, ((T([2, 16, 128, 64], f16), [32, 128, 64]), {}) +cnt: 24, ((T([2, 128, 1024], f16), [256, 1024]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128], i64), 1), {}) +cnt: 1, ((T([2, 128], i32), 0), {}) +cnt: 1, ((T([2, 128], i64), 1), {}) +cnt: 145, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16)), {}) +cnt: 24, ((T([2, 16, 128, 128], f16), T([2, 1, 128, 128], f16)), {}) +cnt: 1, ((T([256008, 1024], f16), T([256008, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 96, ((T([1024], f16), T([256, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([4096], f16), T([256, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([256, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +Operator: aten.bmm.default +cnt: 48, ((T([32, 128, 64], f16), T([32, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 48, ((T([32, 128, 128], f16), T([32, 128, 64], f16)), {}) +cnt: 24, ((T([32, 128, 128], f16, stride=(16384, 1, 128)), T([32, 128, 64], f16)), {}) +cnt: 24, ((T([32, 64, 128], f16, stride=(8192, 1, 64)), T([32, 128, 128], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([2, 128], i64),), {}) +cnt: 1, ((T([2, 127], i64, stride=(128, 1)),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([2, 128], i64), T([2, 128], i64)), {}) +cnt: 1, ((T([2, 127], i64, stride=(128, 1)), T([2, 127], i64)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([2, 128], i32), 1), {}) +Operator: aten.embedding.default +cnt: 1, ((T([256008, 1024], f16), T([2, 128], i64), 1), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([2, 128, 1024], f16), T([2, 128], i64), 256008, 1, False), {}) +Operator: aten.fill_.Tensor +cnt: 1, ((T([2], i64, stride=(128,)), T([], i64)), {}) +Operator: aten.gelu.default +cnt: 24, ((T([2, 128, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([2, 128, 4096], f16), T([2, 128, 4096], f16)), {}) +Operator: aten.index_select.default +cnt: 1, ((T([2050, 1024], f16), 0, T([256], i64)), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([128], i64), T([128, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([128, 128], f32), T([128, 128], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([256, 1024], f16), T([1024, 256008], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([256008, 256], f16, stride=(1, 256008)), T([256, 1024], f16)), {}) +cnt: 1, ((T([256, 256008], f16), T([256008, 1024], f16)), {}) +cnt: 24, ((T([256, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 4096], f16)), {}) +cnt: 24, ((T([256, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 256], f16, stride=(1, 4096)), T([256, 1024], f16)), {}) +cnt: 96, ((T([256, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 96, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([2, 128, 1024], f16), 32.0), {}) +cnt: 1, ((T([2, 128], i32), T([2, 128], i32)), {}) +cnt: 48, ((T([2, 128, 1024], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([2, 128, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 49, ((T([2, 128, 1024], f16), T([2, 128, 1024], f16), [1024], T([2, 128, 1], f32), T([2, 128, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([2, 128], i64), 1), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([2, 128], i64), [2, 128]), {'dtype': i64, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([256, 256008], f16), T([256], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([256, 256008], f16), T([256], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 120, ((T([256, 1024], f16), [0], True), {}) +cnt: 24, ((T([256, 4096], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XLNetLMHeadModel_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XLNetLMHeadModel_training.txt new file mode 100644 index 000000000..f3056de63 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/XLNetLMHeadModel_training.txt @@ -0,0 +1,105 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2048, 32000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2048, 32000], f16), T([2048, 32000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 24, ((T([4, 16, 512, 512], f16), 3, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([4, 16, 512, 512], f16), T([4, 16, 512, 512], f16), 3, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1024, 4, 1024], f32, stride=(1024, 0, 1)),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 24, ((T([1024, 4, 1024], f32),), {'dtype': f16, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 24, ((T([512, 4, 64, 16, 1], f16), [1, 2048, 1024]), {}) +cnt: 24, ((T([64, 16, 1024, 1, 1], f16), [1, 1024, 1024]), {}) +cnt: 24, ((T([4, 16, 512, 1, 64], f16), [64, 512, 64]), {}) +cnt: 24, ((T([1024, 4, 1, 16, 64], f16), [1, 4096, 1024]), {}) +cnt: 72, ((T([512, 4, 1, 16, 64], f16), [1, 2048, 1024]), {}) +Operator: aten.add.Tensor +cnt: 48, ((T([512, 4, 16, 64], f16), T([16, 64], f16)), {}) +cnt: 24, ((T([4, 16, 512, 512], f16), T([4, 16, 512, 512], f16)), {}) +cnt: 24, ((T([4, 16, 512, 512], f16), 0), {}) +cnt: 144, ((T([512, 4, 1024], f16), T([512, 4, 1024], f16)), {}) +cnt: 24, ((T([512, 4, 16, 64], f16, stride=(64, 524288, 32768, 1)), T([512, 4, 16, 64], f16, stride=(64, 524288, 32768, 1))), {}) +cnt: 1, ((T([32000, 1024], f16), T([32000, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 24, ((T([4096], f16), T([2048, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 24, ((T([1024], f16), T([2048, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([32000], f16), T([2048, 1024], f16), T([1024, 32000], f16, stride=(1, 1024))), {}) +Operator: aten.bmm.default +cnt: 96, ((T([1, 2048, 1024], f16), T([1, 1024, 1024], f16)), {}) +cnt: 24, ((T([1, 4096, 1024], f16), T([1, 1024, 1024], f16)), {}) +cnt: 24, ((T([64, 512, 64], f16, stride=(64, 4096, 1)), T([64, 64, 512], f16, stride=(64, 1, 4096))), {}) +cnt: 24, ((T([64, 512, 64], f16, stride=(64, 4096, 1)), T([64, 64, 1024], f16, stride=(64, 1, 4096))), {}) +cnt: 48, ((T([64, 512, 512], f16), T([64, 512, 64], f16, stride=(64, 4096, 1))), {}) +cnt: 96, ((T([1, 1024, 2048], f16, stride=(2097152, 1, 1024)), T([1, 2048, 1024], f16)), {}) +cnt: 96, ((T([1, 2048, 1024], f16), T([1, 1024, 1024], f16, stride=(1048576, 1, 1024))), {}) +cnt: 24, ((T([64, 512, 512], f16, stride=(262144, 1, 512)), T([64, 512, 64], f16)), {}) +cnt: 24, ((T([64, 512, 64], f16), T([64, 64, 512], f16, stride=(64, 1, 4096))), {}) +cnt: 24, ((T([64, 64, 512], f16, stride=(64, 1, 4096)), T([64, 512, 1024], f16)), {}) +cnt: 24, ((T([64, 512, 1024], f16), T([64, 1024, 64], f16, stride=(64, 4096, 1))), {}) +cnt: 24, ((T([64, 64, 512], f16, stride=(64, 1, 4096)), T([64, 512, 512], f16)), {}) +cnt: 24, ((T([1, 1024, 4096], f16, stride=(4194304, 1, 1024)), T([1, 4096, 1024], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1024, 512], f32), T([1024, 512], f32)], -1), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 512], i64), T([4, 512], i64)), {}) +cnt: 24, ((T([1024, 16, 64], f16), T([1024, 16, 64], f16, stride=(1, 1024, 16384))), {}) +Operator: aten.cos.default +cnt: 1, ((T([1024, 512], f32),), {}) +Operator: aten.div.Tensor +cnt: 1, ((T([512], f32), 1024), {}) +Operator: aten.embedding.default +cnt: 1, ((T([32000, 1024], f16), T([512, 4], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([512, 4, 1024], f16), T([512, 4], i64), 32000, -1, False), {}) +Operator: aten.gelu.default +cnt: 24, ((T([512, 4, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 24, ((T([512, 4, 4096], f16), T([512, 4, 4096], f16)), {}) +Operator: aten.index_add.default +cnt: 24, ((T([4, 16, 512, 1023], f16), 3, T([512], i64), T([4, 16, 512, 512], f16)), {}) +Operator: aten.index_select.default +cnt: 24, ((T([4, 16, 512, 1023], f16, stride=(8388608, 524288, 1023, 1)), 3, T([512], i64)), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 32000], f16), T([32000, 1024], f16)), {}) +cnt: 1, ((T([32000, 2048], f16, stride=(1, 32000)), T([2048, 1024], f16)), {}) +cnt: 24, ((T([2048, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 24, ((T([1024, 2048], f16, stride=(1, 1024)), T([2048, 4096], f16)), {}) +cnt: 24, ((T([2048, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 24, ((T([4096, 2048], f16, stride=(1, 4096)), T([2048, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([512], f32), 1), {}) +cnt: 1, ((T([1024, 1], f32), T([1, 512], f32)), {}) +cnt: 48, ((T([4, 16, 512, 512], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 48, ((T([512, 4, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([512, 4, 1024], f16, stride=(1024, 524288, 1)), T([512, 4, 1024], f16), [1024], T([512, 4, 1], f32), T([512, 4, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 47, ((T([512, 4, 1024], f16), T([512, 4, 1024], f16), [1024], T([512, 4, 1], f32), T([512, 4, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 24, ((T([1024, 16, 64], f16, stride=(1, 1024, 16384)), [1024, 16, 64], [1024, 64, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.new_zeros.default +cnt: 24, ((T([4, 16, 512, 512], f16), [4, 16, 512, 1023]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2048, 32000], f16), T([2048], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2048, 32000], f16), T([2048], i64), None, 1, -100), {}) +Operator: aten.pow.Scalar +cnt: 1, ((10000, T([512], f32)), {}) +Operator: aten.reciprocal.default +cnt: 1, ((T([512], f32),), {}) +Operator: aten.sin.default +cnt: 1, ((T([1024, 512], f32),), {}) +Operator: aten.slice_backward.default +cnt: 24, ((T([4, 16, 1023, 512], f16), [4, 16, 1023, 512], 3, 0, 9223372036854775807, 1), {}) +cnt: 24, ((T([4, 16, 1023, 512], f16), [4, 16, 1024, 512], 2, 1, 9223372036854775807, 1), {}) +cnt: 24, ((T([4, 16, 1024, 512], f16), [4, 16, 1024, 512], 1, 0, 9223372036854775807, 1), {}) +cnt: 24, ((T([4, 16, 1024, 512], f16), [4, 16, 1024, 512], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 32000], f16), [0], True), {}) +cnt: 24, ((T([2048, 1024], f16), [0], True), {}) +cnt: 24, ((T([2048, 4096], f16), [0], True), {}) +cnt: 48, ((T([512, 4, 16, 64], f16, stride=(64, 524288, 32768, 1)), [0, 1], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/hf_train/YituTechConvBert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/YituTechConvBert_training.txt new file mode 100644 index 000000000..d1a6dcccd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/hf_train/YituTechConvBert_training.txt @@ -0,0 +1,119 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([512, 30522], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([512, 30522], f16), T([512, 30522], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([3072, 9, 1], f16), 1, False), {}) +cnt: 12, ((T([1, 6, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([1, 6, 512, 512], f16), T([1, 6, 512, 512], f16), -1, f16), {}) +cnt: 12, ((T([3072, 9, 1], f16), T([3072, 9, 1], f16), 1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([1, 512, 54], f16), [1, 512, 54]), {}) +cnt: 12, ((T([1, 512, 384, 9], f16), [3072, 64, 9]), {}) +cnt: 12, ((T([3072, 64, 1], f16), [3072, 64, 1]), {}) +cnt: 12, ((T([6, 512, 512], f16), [1, 6, 512, 512]), {}) +cnt: 12, ((T([6, 512, 64], f16), [1, 6, 512, 64]), {}) +cnt: 12, ((T([512, 384], f16), [3072, 64, 1]), {}) +cnt: 24, ((T([1, 512, 6, 64], f16), [1, 512, 384]), {}) +Operator: aten.add.Tensor +cnt: 86, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 12, ((T([1, 512, 54], f16), T([54], f16)), {}) +cnt: 12, ((T([1, 6, 512, 512], f16), T([1, 1, 1, 512], f16)), {}) +cnt: 12, ((T([1, 512, 384], f16), T([1, 512, 384], f16)), {}) +cnt: 12, ((T([1, 512, 768], f16), T([1, 512, 768], f16, stride=(393216, 1, 512))), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 12, ((T([1, 384, 512], f16), T([384, 1], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([384], f16), T([512, 768], f16), T([768, 384], f16, stride=(1, 768))), {}) +cnt: 13, ((T([768], f16), T([512, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([512, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([512, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([512, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([1, 512, 384], f16, stride=(512, 1, 512)), T([1, 384, 54], f16, stride=(384, 1, 384))), {}) +cnt: 12, ((T([3072, 64, 9], f16), T([3072, 9, 1], f16)), {}) +cnt: 12, ((T([6, 512, 64], f16, stride=(64, 384, 1)), T([6, 64, 512], f16, stride=(64, 1, 384))), {}) +cnt: 24, ((T([6, 512, 512], f16), T([6, 512, 64], f16, stride=(64, 384, 1))), {}) +cnt: 12, ((T([6, 512, 512], f16, stride=(262144, 1, 512)), T([6, 512, 64], f16, stride=(64, 768, 1))), {}) +cnt: 12, ((T([6, 512, 64], f16, stride=(64, 768, 1)), T([6, 64, 512], f16, stride=(64, 1, 384))), {}) +cnt: 12, ((T([6, 64, 512], f16, stride=(64, 1, 384)), T([6, 512, 512], f16)), {}) +cnt: 12, ((T([3072, 9, 64], f16, stride=(576, 1, 9)), T([3072, 64, 1], f16)), {}) +cnt: 12, ((T([3072, 64, 1], f16), T([3072, 1, 9], f16)), {}) +cnt: 12, ((T([1, 384, 512], f16), T([1, 512, 54], f16)), {}) +cnt: 12, ((T([1, 512, 54], f16), T([1, 54, 384], f16)), {}) +Operator: aten.cat.default +cnt: 12, (([T([1, 512, 6, 64], f16), T([1, 512, 6, 64], f16)], 2), {}) +Operator: aten.clone.default +cnt: 2, ((T([1, 512], i64),), {}) +Operator: aten.convolution.default +cnt: 12, ((T([1, 768, 512], f16, stride=(393216, 1, 768)), T([768, 1, 9], f16), None, [1], [4], [1], False, [0], 768), {}) +cnt: 12, ((T([1, 768, 512], f16), T([384, 768, 1], f16), None, [1], [0], [1], False, [0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 12, ((T([1, 384, 512], f16, stride=(196608, 1, 384)), T([1, 768, 512], f16), T([384, 768, 1], f16), [0], [1], [0], [1], False, [0], 1, [True, True, False]), {}) +cnt: 12, ((T([1, 768, 512], f16), T([1, 768, 512], f16, stride=(393216, 1, 768)), T([768, 1, 9], f16), [0], [1], [4], [1], False, [0], 768, [True, True, False]), {}) +Operator: aten.copy_.default +cnt: 2, ((T([1, 512], i64), T([1, 512], i64)), {}) +cnt: 12, ((T([54, 384], f16), T([54, 384], f16, stride=(1, 54))), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([1, 6, 512, 512], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([1, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +cnt: 1, ((T([2, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 2, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([1, 512, 3072], f16),), {}) +cnt: 1, ((T([1, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 12, ((T([1, 512, 3072], f16), T([1, 512, 3072], f16)), {}) +Operator: aten.im2col.default +cnt: 12, ((T([1, 384, 512, 1], f16), [9, 1], [1, 1], [4, 0], [1, 1]), {}) +Operator: aten.im2col_backward.default +cnt: 12, ((T([1, 3456, 512], f16, stride=(1769472, 1, 3456)), [512, 1], [9, 1], [1, 1], [4, 0], [1, 1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([512, 30522], f16), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 512], f16, stride=(1, 30522)), T([512, 768], f16)), {}) +cnt: 13, ((T([512, 768], f16), T([768, 768], f16)), {}) +cnt: 13, ((T([768, 512], f16, stride=(1, 768)), T([512, 768], f16)), {}) +cnt: 12, ((T([512, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 512], f16, stride=(1, 768)), T([512, 3072], f16)), {}) +cnt: 12, ((T([512, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 512], f16, stride=(1, 3072)), T([512, 768], f16)), {}) +cnt: 24, ((T([512, 384], f16, stride=(1, 512)), T([384, 768], f16)), {}) +cnt: 24, ((T([384, 512], f16), T([512, 768], f16)), {}) +cnt: 24, ((T([512, 384], f16), T([384, 768], f16)), {}) +cnt: 24, ((T([384, 512], f16, stride=(1, 384)), T([512, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([1, 1, 1, 512], f16), -65504.0), {}) +cnt: 12, ((T([1, 512, 384], f16, stride=(196608, 1, 512)), T([1, 512, 384], f16)), {}) +cnt: 12, ((T([1, 512, 384], f16), T([1, 512, 384], f16, stride=(196608, 1, 512))), {}) +cnt: 12, ((T([1, 512, 384], f16), T([1, 512, 384], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([1, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([1, 512, 768], f16), T([1, 512, 768], f16), [768], T([1, 512, 1], f32), T([1, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 12, ((T([54, 384], f16, stride=(1, 54)), [54, 384], [384, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([512, 30522], f16), T([512], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([512, 30522], f16), T([512], i64), None, 1, -100), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([1, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([512, 30522], f16), [0], True), {}) +cnt: 25, ((T([512, 768], f16), [0], True), {}) +cnt: 12, ((T([512, 3072], f16), [0], True), {}) +cnt: 24, ((T([512, 384], f16, stride=(1, 512)), [0], True), {}) +cnt: 12, ((T([1, 512, 54], f16), [0, 1], True), {}) +cnt: 12, ((T([1, 384, 54], f16), [0], True), {}) +cnt: 12, ((T([1, 384, 512], f16, stride=(196608, 1, 384)), [0, 2], True), {}) +cnt: 24, ((T([512, 384], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/adv_inception_v3_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/adv_inception_v3_training.txt new file mode 100644 index 000000000..c11cd6890 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/adv_inception_v3_training.txt @@ -0,0 +1,239 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 3, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16)), {}) +cnt: 14, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16)), {}) +cnt: 5, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16)), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16)), {}) +cnt: 3, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16)), {}) +Operator: aten.add_.Tensor +cnt: 94, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 32, 35, 35], f16)], 1), {}) +cnt: 2, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16)], 1), {}) +cnt: 1, (([T([128, 384, 17, 17], f16), T([128, 96, 17, 17], f16), T([128, 288, 17, 17], f16)], 1), {}) +cnt: 4, (([T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16)], 1), {}) +cnt: 1, (([T([128, 320, 8, 8], f16), T([128, 192, 8, 8], f16), T([128, 768, 8, 8], f16)], 1), {}) +cnt: 4, (([T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)], 1), {}) +cnt: 2, (([T([128, 320, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 192, 8, 8], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 299, 299], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), None, [1, 1], [0, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), None, [1, 1], [1, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), [0], [1, 1], [1, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), [0], [1, 1], [0, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 384, 8, 8], f16), T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([128, 192, 17, 17], f16), T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 35, 35], f16), T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([128, 3, 299, 299], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 147, 147], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 768, 17, 17], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 768, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 768, 17, 17], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 768, 8, 8], i64)), {}) +cnt: 1, ((T([128, 288, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 288, 35, 35], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 288, 17, 17], i64)), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 71, 71], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 192, 35, 35], i64)), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([128, 64, 147, 147], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 64, 73, 73], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 0.001), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 192, 8, 8], f16), T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 0.001, [True, True, True]), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 149, 149], f16),), {}) +cnt: 1, ((T([128, 32, 147, 147], f16),), {}) +cnt: 1, ((T([128, 64, 147, 147], f16),), {}) +cnt: 1, ((T([128, 80, 73, 73], f16),), {}) +cnt: 1, ((T([128, 192, 71, 71], f16),), {}) +cnt: 12, ((T([128, 64, 35, 35], f16),), {}) +cnt: 3, ((T([128, 48, 35, 35], f16),), {}) +cnt: 7, ((T([128, 96, 35, 35], f16),), {}) +cnt: 1, ((T([128, 32, 35, 35], f16),), {}) +cnt: 1, ((T([128, 384, 17, 17], f16),), {}) +cnt: 1, ((T([128, 96, 17, 17], f16),), {}) +cnt: 26, ((T([128, 192, 17, 17], f16),), {}) +cnt: 6, ((T([128, 128, 17, 17], f16),), {}) +cnt: 12, ((T([128, 160, 17, 17], f16),), {}) +cnt: 3, ((T([128, 320, 8, 8], f16),), {}) +cnt: 3, ((T([128, 192, 8, 8], f16),), {}) +cnt: 12, ((T([128, 384, 8, 8], f16),), {}) +cnt: 2, ((T([128, 448, 8, 8], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 192, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 8, ((T([128, 384, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 384, 8, 8], f16), 0), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 320, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 1, ((T([128, 192, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 10, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 320, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 16, ((T([128, 192, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 192, 17, 17], f16), 0), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 96, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 96, 17, 17], f16), 0), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), 0), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 384, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 384, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 64, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 96, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 32, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 32, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 96, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 64, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), 0), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), 0), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/beit_base_patch16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/beit_base_patch16_224_training.txt new file mode 100644 index 000000000..c4df651ef --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/beit_base_patch16_224_training.txt @@ -0,0 +1,100 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 197, 197], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 197, 197], f16), T([64, 12, 197, 197], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 197, 64], f16), [768, 197, 64]), {}) +cnt: 12, ((T([64, 12, 64, 197], f16), [768, 64, 197]), {}) +cnt: 12, ((T([768, 197, 197], f16), [64, 12, 197, 197]), {}) +cnt: 12, ((T([768, 197, 64], f16), [64, 12, 197, 64]), {}) +cnt: 12, ((T([64, 197, 12, 64], f16), [64, 197, 768]), {}) +cnt: 12, ((T([64, 197, 3, 12, 64], f16), [64, 197, 2304]), {}) +Operator: aten.add.Tensor +cnt: 12, ((T([64, 12, 197, 197], f16), T([1, 12, 197, 197], f16)), {}) +cnt: 48, ((T([64, 197, 768], f16), T([64, 197, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([2304], f16), T([12608, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12608, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([12608, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12608, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([1000], f16), T([64, 768], f16), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 197, 64], f16), T([768, 64, 197], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16), T([768, 197, 64], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16, stride=(38809, 1, 197)), T([768, 197, 64], f16)), {}) +cnt: 12, ((T([768, 197, 64], f16), T([768, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 12, ((T([768, 64, 197], f16, stride=(12608, 1, 64)), T([768, 197, 197], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16), T([768, 197, 64], f16, stride=(12608, 1, 197))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 768], f16, stride=(0, 768, 1)), T([64, 196, 768], f16, stride=(150528, 1, 196))], 1), {}) +cnt: 12, (([T([768], f16), T([768], f16), T([768], f16)],), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), T([768], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 768, 14, 14], f16, stride=(151296, 1, 10752, 768)), T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), [768], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 196, 768], f16, stride=(768, 0, 1)), 196), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 197, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 197, 3072], f16), T([64, 197, 3072], f16)), {}) +Operator: aten.index.Tensor +cnt: 12, ((T([732, 12], f16), [T([38809], i64)]), {}) +Operator: aten.index_put.default +cnt: 12, ((T([732, 12], f16), [T([38809], i64)], T([38809, 12], f16, stride=(1, 38809)), True), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 196, 768], f16, stride=(151296, 768, 1)), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 768], f16)), {}) +cnt: 12, ((T([12608, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 3072], f16)), {}) +cnt: 12, ((T([12608, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 12608], f16, stride=(1, 3072)), T([12608, 768], f16)), {}) +cnt: 12, ((T([12608, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 768], f16)), {}) +cnt: 12, ((T([12608, 2304], f16), T([2304, 768], f16)), {}) +cnt: 12, ((T([2304, 12608], f16, stride=(1, 2304)), T([12608, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 12, ((T([64, 12, 197, 64], f16, stride=(453888, 64, 2304, 1)), 0.125), {}) +cnt: 24, ((T([768], f16), T([64, 197, 768], f16)), {}) +cnt: 24, ((T([64, 197, 768], f16), T([768], f16)), {}) +cnt: 24, ((T([64, 197, 768], f16), T([64, 197, 768], f16)), {}) +cnt: 12, ((T([64, 12, 197, 64], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 24, ((T([64, 197, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +cnt: 1, ((T([64, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([64, 768], f16), T([64, 768], f16), [768], T([64, 1], f32), T([64, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +cnt: 24, ((T([64, 197, 768], f16), T([64, 197, 768], f16), [768], T([64, 197, 1], f32), T([64, 197, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_zeros.default +cnt: 12, ((T([38809, 12], f16, stride=(1, 38809)), [732, 12]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 196, 768], f16), [64, 197, 768], 1, 1, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 197, 768], f16), [64, 197, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([64, 12, 197, 64], f16), T([64, 12, 197, 64], f16, stride=(151296, 12608, 1, 197)), T([64, 12, 197, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 24, ((T([64, 197, 768], f16), [0, 1], True), {}) +cnt: 24, ((T([12608, 768], f16), [0], True), {}) +cnt: 12, ((T([12608, 3072], f16), [0], True), {}) +cnt: 12, ((T([64, 12, 197, 197], f16), [0], True), {}) +cnt: 12, ((T([12608, 2304], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 768], f16, stride=(151296, 768, 1)), [0], True), {}) +Operator: aten.unbind.int +cnt: 12, ((T([3, 64, 12, 197, 64], f16, stride=(768, 453888, 64, 2304, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/botnet26t_256_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/botnet26t_256_training.txt new file mode 100644 index 000000000..4f2a25afb --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/botnet26t_256_training.txt @@ -0,0 +1,244 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([512, 256, 256], f16), -1, False), {}) +cnt: 1, ((T([512, 64, 64], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 64], f16), -1, f16), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 256], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 3, ((T([128, 256, 16, 16], f16), [512, 64, 256]), {}) +cnt: 2, ((T([512, 256, 256], f16), [512, 256, 256]), {}) +cnt: 2, ((T([512, 16, 16, 64], f16), [131072, 64]), {}) +cnt: 4, ((T([131072, 31], f16), [512, 16, 16, 31]), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16), [512, 256, 256]), {}) +cnt: 1, ((T([512, 256, 64], f16), [512, 256, 64]), {}) +cnt: 3, ((T([512, 64, 256], f16), [128, 256, 16, 16]), {}) +cnt: 3, ((T([128, 512, 16, 16], f16), [512, 128, 256]), {}) +cnt: 2, ((T([512, 16, 16, 128], f16), [131072, 128]), {}) +cnt: 1, ((T([512, 256, 128], f16), [512, 256, 128]), {}) +cnt: 3, ((T([512, 128, 256], f16), [128, 512, 16, 16]), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), [512, 128, 64]), {}) +cnt: 1, ((T([512, 64, 64], f16), [512, 64, 64]), {}) +cnt: 2, ((T([512, 8, 8, 128], f16), [32768, 128]), {}) +cnt: 2, ((T([32768, 15], f16), [512, 8, 8, 15]), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16), [512, 64, 64]), {}) +cnt: 1, ((T([512, 64, 128], f16), [512, 64, 128]), {}) +cnt: 3, ((T([512, 128, 64], f16), [128, 512, 8, 8]), {}) +cnt: 1, ((T([512, 8, 8, 128], f16), [512, 64, 128]), {}) +cnt: 1, ((T([512, 16, 16, 128], f16), [512, 256, 128]), {}) +cnt: 1, ((T([512, 16, 16, 64], f16), [512, 256, 64]), {}) +Operator: aten.add.Tensor +cnt: 31, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16)), {}) +cnt: 4, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16)), {}) +cnt: 4, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16)), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(8432, 31, 527, 1, 0)), T([512, 16, 16, 16, 16], f16, stride=(8432, 527, 31, 0, 1))), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 256], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(1080, 15, 135, 1, 0)), T([512, 8, 8, 8, 8], f16, stride=(1080, 135, 15, 0, 1))), {}) +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 64], f16)), {}) +cnt: 1, ((T([512, 8, 8, 128], f16, stride=(8192, 128, 1024, 1)), T([512, 8, 8, 128], f16)), {}) +cnt: 1, ((T([512, 64, 128], f16), T([512, 64, 128], f16)), {}) +cnt: 1, ((T([512, 16, 16, 128], f16, stride=(32768, 128, 2048, 1)), T([512, 16, 16, 128], f16)), {}) +cnt: 1, ((T([512, 256, 128], f16), T([512, 256, 128], f16)), {}) +cnt: 1, ((T([512, 16, 16, 64], f16, stride=(16384, 64, 1024, 1)), T([512, 16, 16, 64], f16)), {}) +cnt: 1, ((T([512, 256, 64], f16), T([512, 256, 64], f16)), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 512, 16, 16], f16), [2, 2], [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 512, 16, 16], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +Operator: aten.bmm.default +cnt: 2, ((T([512, 256, 64], f16, stride=(16384, 1, 256)), T([512, 64, 256], f16)), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 2, ((T([512, 256, 128], f16, stride=(32768, 1, 256)), T([512, 128, 256], f16)), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 2, ((T([512, 64, 128], f16, stride=(8192, 1, 64)), T([512, 128, 64], f16)), {}) +cnt: 2, ((T([512, 64, 64], f16), T([512, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([512, 64, 64], f16, stride=(4096, 1, 64)), T([512, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([512, 128, 64], f16), T([512, 64, 64], f16)), {}) +cnt: 1, ((T([512, 256, 256], f16, stride=(65536, 1, 256)), T([512, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 1, ((T([512, 128, 256], f16), T([512, 256, 256], f16)), {}) +cnt: 1, ((T([512, 256, 256], f16, stride=(65536, 1, 256)), T([512, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 1, ((T([512, 64, 256], f16), T([512, 256, 256], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16)], 1), {}) +cnt: 1, (([T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16)], 1), {}) +cnt: 1, (([T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 256, 256], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 4, ((T([8192, 16, 31], f16), [0, 1], 0.0), {}) +cnt: 4, ((T([8192, 512], f16), [0, 15], 0.0), {}) +cnt: 2, ((T([4096, 8, 15], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([4096, 128], f16), [0, 7], 0.0), {}) +cnt: 2, ((T([4096, 135], f16), [0, -7]), {}) +cnt: 2, ((T([4096, 8, 16], f16), [0, -1]), {}) +cnt: 4, ((T([8192, 527], f16), [0, -15]), {}) +cnt: 4, ((T([8192, 16, 32], f16), [0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([768, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([1536, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([1536, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1536, 8, 8], f16), T([128, 512, 8, 8], f16), T([1536, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1536, 16, 16], f16), T([128, 512, 16, 16], f16), T([1536, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 768, 16, 16], f16), T([128, 256, 16, 16], f16), T([768, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 32, 32], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 64, 64], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([128, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 64, 64], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 2, ((T([131072, 64], f16), T([64, 31], f16, stride=(1, 64))), {}) +cnt: 2, ((T([131072, 128], f16), T([128, 31], f16, stride=(1, 128))), {}) +cnt: 2, ((T([32768, 128], f16), T([128, 15], f16, stride=(1, 128))), {}) +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +cnt: 2, ((T([15, 32768], f16, stride=(1, 15)), T([32768, 128], f16)), {}) +cnt: 2, ((T([32768, 15], f16), T([15, 128], f16)), {}) +cnt: 2, ((T([31, 131072], f16, stride=(1, 31)), T([131072, 128], f16)), {}) +cnt: 2, ((T([131072, 31], f16), T([31, 128], f16)), {}) +cnt: 2, ((T([31, 131072], f16, stride=(1, 31)), T([131072, 64], f16)), {}) +cnt: 2, ((T([131072, 31], f16), T([31, 64], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([512, 256, 256], f16), 0.125), {}) +cnt: 2, ((T([512, 256, 256], f16), 0.08838834764831845), {}) +cnt: 2, ((T([512, 64, 64], f16), 0.08838834764831845), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 24, 128, 128], f16),), {}) +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 1, ((T([128, 64, 128, 128], f16),), {}) +cnt: 4, ((T([128, 64, 64, 64], f16),), {}) +cnt: 2, ((T([128, 256, 64, 64], f16),), {}) +cnt: 1, ((T([128, 128, 64, 64], f16),), {}) +cnt: 3, ((T([128, 128, 32, 32], f16),), {}) +cnt: 2, ((T([128, 512, 32, 32], f16),), {}) +cnt: 1, ((T([128, 256, 32, 32], f16),), {}) +cnt: 3, ((T([128, 256, 16, 16], f16),), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([128, 512, 16, 16], f16),), {}) +cnt: 3, ((T([128, 512, 8, 8], f16),), {}) +cnt: 2, ((T([128, 2048, 8, 8], f16),), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([4096, 8, 8], f16), [4096, 8, 15], 2, 7, 9223372036854775807, 1), {}) +cnt: 2, ((T([4096, 8, 15], f16), [4096, 9, 15], 1, 0, 8, 1), {}) +cnt: 2, ((T([4096, 9, 15], f16), [4096, 9, 15], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([8192, 16, 16], f16), [8192, 16, 31], 2, 15, 9223372036854775807, 1), {}) +cnt: 4, ((T([8192, 16, 31], f16), [8192, 17, 31], 1, 0, 16, 1), {}) +cnt: 4, ((T([8192, 17, 31], f16), [8192, 17, 31], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([128, 768, 16, 16], f16), [256, 256, 256], 1), {}) +cnt: 1, ((T([128, 1536, 16, 16], f16), [512, 512, 512], 1), {}) +cnt: 1, ((T([128, 1536, 8, 8], f16), [512, 512, 512], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(4096, 64, 1, 512, 8)), [2], True), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(4096, 512, 8, 64, 1)), [2], True), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(65536, 256, 1, 4096, 16)), [2], True), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(65536, 4096, 16, 256, 1)), [2], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), 0), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16), 0), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16), 0), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16), 0), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16), 0), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16), 0), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16), 0), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), 0), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), 0), {}) +cnt: 2, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16), 0), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), 0), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16), 0), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), 0), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cait_m36_384_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cait_m36_384_training.txt new file mode 100644 index 000000000..b49e97575 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cait_m36_384_training.txt @@ -0,0 +1,149 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([2, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([2, 1000], f16), T([2, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 36, ((T([2, 16, 576, 576], f16, stride=(5308416, 1, 9216, 16)), -1, False), {}) +cnt: 2, ((T([2, 16, 1, 577], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 2, ((T([2, 16, 1, 577], f16), T([2, 16, 1, 577], f16), -1, f16), {}) +cnt: 36, ((T([2, 16, 576, 576], f16, stride=(5308416, 1, 9216, 16)), T([2, 16, 576, 576], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 108, ((T([2, 16, 576, 48], f16), [32, 576, 48]), {}) +cnt: 36, ((T([2, 16, 48, 576], f16), [32, 48, 576]), {}) +cnt: 36, ((T([32, 576, 576], f16), [2, 16, 576, 576]), {}) +cnt: 144, ((T([2, 576, 576, 16], f16), [663552, 16]), {}) +cnt: 72, ((T([663552, 16], f16), [2, 576, 576, 16]), {}) +cnt: 72, ((T([2, 16, 576, 576], f16), [32, 576, 576]), {}) +cnt: 36, ((T([32, 576, 48], f16), [2, 16, 576, 48]), {}) +cnt: 36, ((T([2, 576, 16, 48], f16), [2, 576, 768]), {}) +cnt: 2, ((T([2, 16, 48, 577], f16), [32, 48, 577]), {}) +cnt: 2, ((T([32, 1, 577], f16), [2, 16, 1, 577]), {}) +cnt: 2, ((T([2, 16, 577, 48], f16), [32, 577, 48]), {}) +cnt: 2, ((T([32, 1, 48], f16), [2, 16, 1, 48]), {}) +cnt: 2, ((T([2, 577, 16, 48], f16), [2, 577, 768]), {}) +cnt: 2, ((T([2, 577, 768], f16), [1154, 768]), {}) +cnt: 36, ((T([2, 576, 3, 16, 48], f16), [2, 576, 2304]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([2, 576, 768], f16, stride=(442368, 1, 576)), T([1, 576, 768], f16)), {}) +cnt: 72, ((T([2, 576, 576, 16], f16), T([16], f16)), {}) +cnt: 72, ((T([2, 576, 768], f16, stride=(442368, 1, 576)), T([2, 576, 768], f16)), {}) +cnt: 1, ((T([2, 1, 768], f16, stride=(0, 768, 1)), T([2, 1, 768], f16)), {}) +cnt: 4, ((T([2, 1, 768], f16), T([2, 1, 768], f16)), {}) +cnt: 1, ((T([2, 1, 768], f16, stride=(443136, 768, 1)), T([2, 1, 768], f16)), {}) +cnt: 4, ((T([2, 577, 768], f16), T([2, 577, 768], f16)), {}) +cnt: 2, ((T([2, 1, 768], f16), T([2, 1, 768], f16, stride=(443136, 768, 1))), {}) +cnt: 1, ((T([2, 576, 768], f16, stride=(443136, 768, 1)), T([2, 576, 768], f16, stride=(443136, 768, 1))), {}) +cnt: 1, ((T([2, 576, 768], f16), T([2, 576, 768], f16, stride=(443136, 768, 1))), {}) +cnt: 72, ((T([2, 576, 768], f16), T([2, 576, 768], f16)), {}) +cnt: 72, ((T([3, 2, 16, 576, 48], f16), T([3, 2, 16, 576, 48], f16)), {}) +Operator: aten.addmm.default +cnt: 36, ((T([2304], f16), T([1152, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 36, ((T([768], f16), T([1152, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 36, ((T([3072], f16), T([1152, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 36, ((T([768], f16), T([1152, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 2, ((T([768], f16), T([2, 768], f16, stride=(443136, 1)), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 4, ((T([768], f16), T([1154, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 2, ((T([768], f16), T([2, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 2, ((T([3072], f16), T([2, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 2, ((T([768], f16), T([2, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([1000], f16), T([2, 768], f16, stride=(443136, 1)), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 36, ((T([32, 576, 48], f16), T([32, 48, 576], f16)), {}) +cnt: 36, ((T([32, 576, 576], f16), T([32, 576, 48], f16)), {}) +cnt: 2, ((T([32, 1, 48], f16), T([32, 48, 577], f16)), {}) +cnt: 2, ((T([32, 1, 577], f16), T([32, 577, 48], f16)), {}) +cnt: 2, ((T([32, 577, 1], f16), T([32, 1, 48], f16)), {}) +cnt: 2, ((T([32, 1, 48], f16), T([32, 48, 577], f16, stride=(27696, 1, 48))), {}) +cnt: 2, ((T([32, 48, 1], f16), T([32, 1, 577], f16)), {}) +cnt: 2, ((T([32, 1, 577], f16), T([32, 577, 48], f16, stride=(27696, 1, 577))), {}) +cnt: 36, ((T([32, 576, 576], f16, stride=(331776, 1, 576)), T([32, 576, 48], f16)), {}) +cnt: 36, ((T([32, 576, 48], f16), T([32, 48, 576], f16, stride=(27648, 1, 48))), {}) +cnt: 36, ((T([32, 48, 576], f16, stride=(27648, 1, 48)), T([32, 576, 576], f16)), {}) +cnt: 36, ((T([32, 576, 576], f16), T([32, 576, 48], f16, stride=(27648, 1, 576))), {}) +Operator: aten.cat.default +cnt: 1, (([T([2, 1, 768], f16, stride=(0, 768, 1)), T([2, 576, 768], f16, stride=(442368, 1, 576))], 1), {}) +cnt: 2, (([T([2, 1, 768], f16), T([2, 576, 768], f16, stride=(442368, 1, 576))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([2, 3, 384, 384], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([2, 3, 384, 384], f16), T([768, 3, 16, 16], f16), T([768], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([2, 768, 24, 24], f16, stride=(442368, 1, 18432, 768)), T([2, 3, 384, 384], f16), T([768, 3, 16, 16], f16), [768], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([2, 3, 384, 384], f16), T([2, 3, 384, 384], f16)), {}) +Operator: aten.gelu.default +cnt: 36, ((T([2, 576, 3072], f16),), {}) +cnt: 2, ((T([2, 1, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 2, ((T([2, 1, 3072], f16), T([2, 1, 3072], f16)), {}) +cnt: 36, ((T([2, 576, 3072], f16), T([2, 576, 3072], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([2], i64),), {}) +Operator: aten.mm.default +cnt: 72, ((T([663552, 16], f16), T([16, 16], f16, stride=(1, 16))), {}) +cnt: 1, ((T([2, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 2], f16, stride=(1, 1000)), T([2, 768], f16, stride=(443136, 1))), {}) +cnt: 2, ((T([2, 768], f16), T([768, 3072], f16)), {}) +cnt: 2, ((T([768, 2], f16, stride=(1, 768)), T([2, 3072], f16)), {}) +cnt: 2, ((T([2, 3072], f16), T([3072, 768], f16)), {}) +cnt: 2, ((T([3072, 2], f16, stride=(1, 3072)), T([2, 768], f16)), {}) +cnt: 4, ((T([2, 768], f16), T([768, 768], f16)), {}) +cnt: 2, ((T([768, 2], f16, stride=(1, 768)), T([2, 768], f16)), {}) +cnt: 4, ((T([1154, 768], f16), T([768, 768], f16)), {}) +cnt: 4, ((T([768, 1154], f16, stride=(1, 768)), T([1154, 768], f16)), {}) +cnt: 2, ((T([768, 2], f16, stride=(1, 768)), T([2, 768], f16, stride=(443136, 1))), {}) +cnt: 36, ((T([1152, 768], f16), T([768, 3072], f16)), {}) +cnt: 36, ((T([768, 1152], f16, stride=(1, 768)), T([1152, 3072], f16)), {}) +cnt: 36, ((T([1152, 3072], f16), T([3072, 768], f16)), {}) +cnt: 36, ((T([3072, 1152], f16, stride=(1, 3072)), T([1152, 768], f16)), {}) +cnt: 36, ((T([1152, 768], f16), T([768, 768], f16)), {}) +cnt: 36, ((T([768, 1152], f16, stride=(1, 768)), T([1152, 768], f16)), {}) +cnt: 72, ((T([16, 663552], f16, stride=(1, 16)), T([663552, 16], f16)), {}) +cnt: 72, ((T([663552, 16], f16), T([16, 16], f16)), {}) +cnt: 36, ((T([1152, 2304], f16), T([2304, 768], f16)), {}) +cnt: 36, ((T([2304, 1152], f16, stride=(1, 2304)), T([1152, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 36, ((T([2, 16, 576, 48], f16, stride=(1327104, 48, 2304, 1)), 0.14433756729740643), {}) +cnt: 72, ((T([768], f16), T([2, 576, 768], f16)), {}) +cnt: 4, ((T([2, 16, 1, 48], f16), 0.14433756729740643), {}) +cnt: 4, ((T([768], f16), T([2, 1, 768], f16)), {}) +cnt: 1, ((T([2, 1, 768], f16, stride=(443136, 768, 1)), T([768], f16)), {}) +cnt: 1, ((T([2, 1, 768], f16, stride=(443136, 768, 1)), T([2, 1, 768], f16)), {}) +cnt: 3, ((T([2, 1, 768], f16), T([768], f16)), {}) +cnt: 3, ((T([2, 1, 768], f16), T([2, 1, 768], f16)), {}) +cnt: 72, ((T([2, 576, 768], f16), T([768], f16)), {}) +cnt: 72, ((T([2, 576, 768], f16), T([2, 576, 768], f16)), {}) +cnt: 36, ((T([2, 16, 576, 48], f16), 0.14433756729740643), {}) +Operator: aten.native_layer_norm.default +cnt: 72, ((T([2, 576, 768], f16, stride=(442368, 1, 576)), [768], T([768], f16), T([768], f16), 1e-06), {}) +cnt: 3, ((T([2, 577, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +cnt: 2, ((T([2, 1, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 3, ((T([2, 577, 768], f16), T([2, 577, 768], f16), [768], T([2, 577, 1], f32), T([2, 577, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +cnt: 2, ((T([2, 1, 768], f16), T([2, 1, 768], f16), [768], T([2, 1, 1], f32), T([2, 1, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +cnt: 72, ((T([2, 576, 768], f16), T([2, 576, 768], f16, stride=(442368, 1, 576)), [768], T([2, 576, 1], f32), T([2, 576, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([2, 1000], f16), T([2], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([2, 1000], f16), T([2], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 3, ((T([2, 768], f16), [2, 577, 768], 1, 0), {}) +cnt: 36, ((T([2, 16, 576, 48], f16), [3, 2, 16, 576, 48], 0, 2), {}) +cnt: 36, ((T([2, 16, 576, 48], f16, stride=(442368, 27648, 1, 576)), [3, 2, 16, 576, 48], 0, 1), {}) +cnt: 36, ((T([2, 16, 576, 48], f16), [3, 2, 16, 576, 48], 0, 0), {}) +Operator: aten.slice_backward.default +cnt: 3, ((T([2, 577, 768], f16), [2, 577, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2, 1000], f16), [0], True), {}) +cnt: 4, ((T([2, 1, 768], f16), [0, 1], True), {}) +cnt: 6, ((T([2, 768], f16), [0], True), {}) +cnt: 2, ((T([2, 3072], f16), [0], True), {}) +cnt: 4, ((T([1154, 768], f16), [0], True), {}) +cnt: 1, ((T([2, 1, 768], f16), [0], True), {}) +cnt: 72, ((T([2, 576, 768], f16), [0, 1], True), {}) +cnt: 72, ((T([1152, 768], f16), [0], True), {}) +cnt: 36, ((T([1152, 3072], f16), [0], True), {}) +cnt: 72, ((T([2, 576, 576, 16], f16, stride=(5308416, 576, 1, 331776)), [0, 1, 2], True), {}) +cnt: 36, ((T([1152, 2304], f16), [0], True), {}) +cnt: 1, ((T([2, 576, 768], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/coat_lite_mini_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/coat_lite_mini_training.txt new file mode 100644 index 000000000..cba167ebd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/coat_lite_mini_training.txt @@ -0,0 +1,348 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(602304, 8, 192, 1)), 2, False), {}) +cnt: 2, ((T([128, 8, 785, 16], f16, stride=(301440, 16, 384, 1)), 2, False), {}) +cnt: 2, ((T([128, 8, 197, 40], f16, stride=(189120, 40, 960, 1)), 2, False), {}) +cnt: 2, ((T([128, 8, 50, 64], f16, stride=(76800, 64, 1536, 1)), 2, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 3200, 1, 50)), T([128, 8, 50, 64], f16), 2, f16), {}) +cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 7880, 1, 197)), T([128, 8, 197, 40], f16), 2, f16), {}) +cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 12560, 1, 785)), T([128, 8, 785, 16], f16), 2, f16), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 25096, 1, 3137)), T([128, 8, 3137, 8], f16), 2, f16), {}) +Operator: aten._unsafe_view.default +cnt: 6, ((T([128, 8, 3137, 8], f16), [1024, 3137, 8]), {}) +cnt: 2, ((T([1024, 8, 8], f16), [128, 8, 8, 8]), {}) +cnt: 2, ((T([1024, 3137, 8], f16), [128, 8, 3137, 8]), {}) +cnt: 2, ((T([128, 3137, 8, 8], f16), [128, 3137, 64]), {}) +cnt: 6, ((T([128, 8, 785, 16], f16), [1024, 785, 16]), {}) +cnt: 2, ((T([1024, 16, 16], f16), [128, 8, 16, 16]), {}) +cnt: 2, ((T([1024, 785, 16], f16), [128, 8, 785, 16]), {}) +cnt: 2, ((T([128, 785, 8, 16], f16), [128, 785, 128]), {}) +cnt: 6, ((T([128, 8, 197, 40], f16), [1024, 197, 40]), {}) +cnt: 2, ((T([1024, 40, 40], f16), [128, 8, 40, 40]), {}) +cnt: 2, ((T([1024, 197, 40], f16), [128, 8, 197, 40]), {}) +cnt: 2, ((T([128, 197, 8, 40], f16), [128, 197, 320]), {}) +cnt: 6, ((T([128, 8, 50, 64], f16), [1024, 50, 64]), {}) +cnt: 2, ((T([1024, 64, 64], f16), [128, 8, 64, 64]), {}) +cnt: 2, ((T([1024, 50, 64], f16), [128, 8, 50, 64]), {}) +cnt: 2, ((T([128, 50, 8, 64], f16), [128, 50, 512]), {}) +cnt: 2, ((T([128, 50, 3, 8, 64], f16), [128, 50, 1536]), {}) +cnt: 2, ((T([128, 197, 3, 8, 40], f16), [128, 197, 960]), {}) +cnt: 2, ((T([128, 785, 3, 8, 16], f16), [128, 785, 384]), {}) +cnt: 2, ((T([128, 3137, 3, 8, 8], f16), [128, 3137, 192]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64))), {}) +cnt: 6, ((T([128, 8, 3137, 8], f16), T([128, 8, 3137, 8], f16)), {}) +cnt: 10, ((T([128, 3137, 64], f16), T([128, 3137, 64], f16)), {}) +cnt: 2, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128))), {}) +cnt: 6, ((T([128, 8, 785, 16], f16), T([128, 8, 785, 16], f16)), {}) +cnt: 10, ((T([128, 785, 128], f16), T([128, 785, 128], f16)), {}) +cnt: 2, ((T([128, 320, 14, 14], f16), T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320))), {}) +cnt: 6, ((T([128, 8, 197, 40], f16), T([128, 8, 197, 40], f16)), {}) +cnt: 10, ((T([128, 197, 320], f16), T([128, 197, 320], f16)), {}) +cnt: 2, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512))), {}) +cnt: 6, ((T([128, 8, 50, 64], f16), T([128, 8, 50, 64], f16)), {}) +cnt: 10, ((T([128, 50, 512], f16), T([128, 50, 512], f16)), {}) +cnt: 4, ((T([3, 128, 8, 50, 64], f16), T([3, 128, 8, 50, 64], f16)), {}) +cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([128, 512, 7, 7], f16, stride=(25088, 1, 3584, 512))), {}) +cnt: 1, ((T([192, 1, 7, 7], f16), T([192, 1, 7, 7], f16)), {}) +cnt: 2, ((T([192], f16), T([192], f16)), {}) +cnt: 1, ((T([192, 1, 5, 5], f16), T([192, 1, 5, 5], f16)), {}) +cnt: 2, ((T([128, 1, 3, 3], f16), T([128, 1, 3, 3], f16)), {}) +cnt: 2, ((T([128], f16), T([128], f16)), {}) +cnt: 1, ((T([512, 1, 3, 3], f16), T([512, 1, 3, 3], f16)), {}) +cnt: 1, ((T([512], f16), T([512], f16)), {}) +cnt: 4, ((T([3, 128, 8, 197, 40], f16), T([3, 128, 8, 197, 40], f16)), {}) +cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([128, 320, 14, 14], f16, stride=(62720, 1, 4480, 320))), {}) +cnt: 1, ((T([120, 1, 7, 7], f16), T([120, 1, 7, 7], f16)), {}) +cnt: 2, ((T([120], f16), T([120], f16)), {}) +cnt: 1, ((T([120, 1, 5, 5], f16), T([120, 1, 5, 5], f16)), {}) +cnt: 1, ((T([80, 1, 3, 3], f16), T([80, 1, 3, 3], f16)), {}) +cnt: 1, ((T([80], f16), T([80], f16)), {}) +cnt: 1, ((T([320, 1, 3, 3], f16), T([320, 1, 3, 3], f16)), {}) +cnt: 1, ((T([320], f16), T([320], f16)), {}) +cnt: 4, ((T([3, 128, 8, 785, 16], f16), T([3, 128, 8, 785, 16], f16)), {}) +cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 128, 28, 28], f16, stride=(100352, 1, 3584, 128))), {}) +cnt: 1, ((T([48, 1, 7, 7], f16), T([48, 1, 7, 7], f16)), {}) +cnt: 2, ((T([48], f16), T([48], f16)), {}) +cnt: 1, ((T([48, 1, 5, 5], f16), T([48, 1, 5, 5], f16)), {}) +cnt: 1, ((T([32, 1, 3, 3], f16), T([32, 1, 3, 3], f16)), {}) +cnt: 1, ((T([32], f16), T([32], f16)), {}) +cnt: 4, ((T([3, 128, 8, 3137, 8], f16), T([3, 128, 8, 3137, 8], f16)), {}) +cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([128, 64, 56, 56], f16, stride=(200704, 1, 3584, 64))), {}) +cnt: 1, ((T([24, 1, 7, 7], f16), T([24, 1, 7, 7], f16)), {}) +cnt: 2, ((T([24], f16), T([24], f16)), {}) +cnt: 1, ((T([24, 1, 5, 5], f16), T([24, 1, 5, 5], f16)), {}) +cnt: 1, ((T([16, 1, 3, 3], f16), T([16, 1, 3, 3], f16)), {}) +cnt: 1, ((T([16], f16), T([16], f16)), {}) +cnt: 1, ((T([64, 1, 3, 3], f16), T([64, 1, 3, 3], f16)), {}) +cnt: 1, ((T([64], f16), T([64], f16)), {}) +Operator: aten.addmm.default +cnt: 2, ((T([192], f16), T([401536, 64], f16), T([64, 192], f16, stride=(1, 64))), {}) +cnt: 2, ((T([64], f16), T([401536, 64], f16), T([64, 64], f16, stride=(1, 64))), {}) +cnt: 2, ((T([512], f16), T([401536, 64], f16), T([64, 512], f16, stride=(1, 64))), {}) +cnt: 2, ((T([64], f16), T([401536, 512], f16), T([512, 64], f16, stride=(1, 512))), {}) +cnt: 2, ((T([384], f16), T([100480, 128], f16), T([128, 384], f16, stride=(1, 128))), {}) +cnt: 2, ((T([128], f16), T([100480, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 2, ((T([1024], f16), T([100480, 128], f16), T([128, 1024], f16, stride=(1, 128))), {}) +cnt: 2, ((T([128], f16), T([100480, 1024], f16), T([1024, 128], f16, stride=(1, 1024))), {}) +cnt: 2, ((T([960], f16), T([25216, 320], f16), T([320, 960], f16, stride=(1, 320))), {}) +cnt: 2, ((T([320], f16), T([25216, 320], f16), T([320, 320], f16, stride=(1, 320))), {}) +cnt: 2, ((T([1280], f16), T([25216, 320], f16), T([320, 1280], f16, stride=(1, 320))), {}) +cnt: 2, ((T([320], f16), T([25216, 1280], f16), T([1280, 320], f16, stride=(1, 1280))), {}) +cnt: 2, ((T([1536], f16), T([6400, 512], f16), T([512, 1536], f16, stride=(1, 512))), {}) +cnt: 2, ((T([512], f16), T([6400, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 2, ((T([2048], f16), T([6400, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 2, ((T([512], f16), T([6400, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([1000], f16), T([128, 512], f16, stride=(25600, 1)), T([512, 1000], f16, stride=(1, 512))), {}) +Operator: aten.bmm.default +cnt: 4, ((T([1024, 8, 3137], f16, stride=(25096, 1, 8)), T([1024, 3137, 8], f16)), {}) +cnt: 4, ((T([1024, 3137, 8], f16), T([1024, 8, 8], f16)), {}) +cnt: 4, ((T([1024, 16, 785], f16, stride=(12560, 1, 16)), T([1024, 785, 16], f16)), {}) +cnt: 4, ((T([1024, 785, 16], f16), T([1024, 16, 16], f16)), {}) +cnt: 4, ((T([1024, 40, 197], f16, stride=(7880, 1, 40)), T([1024, 197, 40], f16)), {}) +cnt: 4, ((T([1024, 197, 40], f16), T([1024, 40, 40], f16)), {}) +cnt: 4, ((T([1024, 64, 50], f16, stride=(3200, 1, 64)), T([1024, 50, 64], f16)), {}) +cnt: 4, ((T([1024, 50, 64], f16), T([1024, 64, 64], f16)), {}) +cnt: 2, ((T([1024, 50, 64], f16), T([1024, 64, 64], f16, stride=(4096, 1, 64))), {}) +cnt: 2, ((T([1024, 64, 64], f16), T([1024, 64, 50], f16, stride=(3200, 1, 64))), {}) +cnt: 2, ((T([1024, 197, 40], f16), T([1024, 40, 40], f16, stride=(1600, 1, 40))), {}) +cnt: 2, ((T([1024, 40, 40], f16), T([1024, 40, 197], f16, stride=(7880, 1, 40))), {}) +cnt: 2, ((T([1024, 785, 16], f16), T([1024, 16, 16], f16, stride=(256, 1, 16))), {}) +cnt: 2, ((T([1024, 16, 16], f16), T([1024, 16, 785], f16, stride=(12560, 1, 16))), {}) +cnt: 2, ((T([1024, 3137, 8], f16), T([1024, 8, 8], f16, stride=(64, 1, 8))), {}) +cnt: 2, ((T([1024, 8, 8], f16), T([1024, 8, 3137], f16, stride=(25096, 1, 8))), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 1, 64], f16, stride=(0, 64, 1)), T([128, 3136, 64], f16)], 1), {}) +cnt: 2, (([T([128, 1, 64], f16, stride=(200768, 64, 1)), T([128, 3136, 64], f16, stride=(200704, 1, 3136))], 1), {}) +cnt: 2, (([T([128, 16, 56, 56], f16), T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)], 1), {}) +cnt: 1, (([T([128, 1, 128], f16, stride=(0, 128, 1)), T([128, 784, 128], f16)], 1), {}) +cnt: 2, (([T([128, 1, 128], f16, stride=(100480, 128, 1)), T([128, 784, 128], f16, stride=(100352, 1, 784))], 1), {}) +cnt: 2, (([T([128, 32, 28, 28], f16), T([128, 48, 28, 28], f16), T([128, 48, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 1, 320], f16, stride=(0, 320, 1)), T([128, 196, 320], f16)], 1), {}) +cnt: 2, (([T([128, 1, 320], f16, stride=(63040, 320, 1)), T([128, 196, 320], f16, stride=(62720, 1, 196))], 1), {}) +cnt: 2, (([T([128, 80, 14, 14], f16), T([128, 120, 14, 14], f16), T([128, 120, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 1, 512], f16, stride=(0, 512, 1)), T([128, 49, 512], f16)], 1), {}) +cnt: 2, (([T([128, 1, 512], f16, stride=(25600, 512, 1)), T([128, 49, 512], f16, stride=(25088, 1, 49))], 1), {}) +cnt: 2, (([T([128, 128, 7, 7], f16), T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)], 1), {}) +cnt: 2, (([T([128, 128, 7, 7], f16, stride=(6272, 1, 896, 128)), T([128, 192, 7, 7], f16, stride=(9408, 1, 1344, 192)), T([128, 192, 7, 7], f16, stride=(9408, 1, 1344, 192))], 1), {}) +cnt: 2, (([T([128, 80, 14, 14], f16, stride=(15680, 1, 1120, 80)), T([128, 120, 14, 14], f16, stride=(23520, 1, 1680, 120)), T([128, 120, 14, 14], f16, stride=(23520, 1, 1680, 120))], 1), {}) +cnt: 2, (([T([128, 32, 28, 28], f16, stride=(25088, 1, 896, 32)), T([128, 48, 28, 28], f16, stride=(37632, 1, 1344, 48)), T([128, 48, 28, 28], f16, stride=(37632, 1, 1344, 48))], 1), {}) +cnt: 2, (([T([128, 16, 56, 56], f16, stride=(50176, 1, 896, 16)), T([128, 24, 56, 56], f16, stride=(75264, 1, 1344, 24)), T([128, 24, 56, 56], f16, stride=(75264, 1, 1344, 24))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), [0, 0, 1, 0, 0, 0], 0.0), {}) +cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 64, 512, 1)), [0, 0, -1, 0, 0, 0]), {}) +cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 40, 320, 1)), [0, 0, -1, 0, 0, 0]), {}) +cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 16, 128, 1)), [0, 0, -1, 0, 0, 0]), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 8, 64, 1)), [0, 0, -1, 0, 0, 0]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 4, 4], f16), T([64], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([64, 1, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 2, ((T([128, 16, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([16, 1, 3, 3], f16), T([16], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 2, ((T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 5, 5], f16), T([24], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 24), {}) +cnt: 2, ((T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 7, 7], f16), T([24], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 24), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 2, 2], f16), T([128], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 1, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 2, ((T([128, 32, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([32, 1, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 2, ((T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 5, 5], f16), T([48], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 48), {}) +cnt: 2, ((T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 7, 7], f16), T([48], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([320, 128, 2, 2], f16), T([320], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([320, 1, 3, 3], f16), T([320], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 320), {}) +cnt: 2, ((T([128, 80, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([80, 1, 3, 3], f16), T([80], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 80), {}) +cnt: 2, ((T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 5, 5], f16), T([120], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 7, 7], f16), T([120], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 120), {}) +cnt: 1, ((T([128, 320, 14, 14], f16), T([512, 320, 2, 2], f16), T([512], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([512, 1, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 512), {}) +cnt: 2, ((T([128, 128, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([128, 1, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 2, ((T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 5, 5], f16), T([192], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 192), {}) +cnt: 2, ((T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 7, 7], f16), T([192], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 192), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([128, 192, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 7, 7], f16), [192], [1, 1], [3, 3], [1, 1], False, [0, 0], 192, [True, True, True]), {}) +cnt: 2, ((T([128, 192, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 192, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([192, 1, 5, 5], f16), [192], [1, 1], [2, 2], [1, 1], False, [0, 0], 192, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 128, 7, 7], f16, stride=(76800, 1, 10752, 1536)), T([128, 1, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, True]), {}) +cnt: 2, ((T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([128, 512, 7, 7], f16, stride=(25600, 1, 3584, 512)), T([512, 1, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 512, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([128, 320, 14, 14], f16), T([512, 320, 2, 2], f16), [512], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 120, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 7, 7], f16), [120], [1, 1], [3, 3], [1, 1], False, [0, 0], 120, [True, True, True]), {}) +cnt: 2, ((T([128, 120, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 120, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([120, 1, 5, 5], f16), [120], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, True]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 80, 14, 14], f16, stride=(189120, 1, 13440, 960)), T([80, 1, 3, 3], f16), [80], [1, 1], [1, 1], [1, 1], False, [0, 0], 80, [True, True, True]), {}) +cnt: 2, ((T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([128, 320, 14, 14], f16, stride=(63040, 1, 4480, 320)), T([320, 1, 3, 3], f16), [320], [1, 1], [1, 1], [1, 1], False, [0, 0], 320, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([128, 128, 28, 28], f16), T([320, 128, 2, 2], f16), [320], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 48, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 7, 7], f16), [48], [1, 1], [3, 3], [1, 1], False, [0, 0], 48, [True, True, True]), {}) +cnt: 2, ((T([128, 48, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 48, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([48, 1, 5, 5], f16), [48], [1, 1], [2, 2], [1, 1], False, [0, 0], 48, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 32, 28, 28], f16, stride=(301440, 1, 10752, 384)), T([32, 1, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 128, 28, 28], f16, stride=(100480, 1, 3584, 128)), T([128, 1, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 64, 56, 56], f16), T([128, 64, 2, 2], f16), [128], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 7, 7], f16), [24], [1, 1], [3, 3], [1, 1], False, [0, 0], 24, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 24, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([24, 1, 5, 5], f16), [24], [1, 1], [2, 2], [1, 1], False, [0, 0], 24, [True, True, True]), {}) +cnt: 2, ((T([128, 16, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 16, 56, 56], f16, stride=(602304, 1, 10752, 192)), T([16, 1, 3, 3], f16), [16], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([128, 64, 56, 56], f16, stride=(200768, 1, 3584, 64)), T([64, 1, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([128, 3, 224, 224], f16), T([64, 3, 4, 4], f16), [64], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.gelu.default +cnt: 2, ((T([128, 3137, 512], f16),), {}) +cnt: 2, ((T([128, 785, 1024], f16),), {}) +cnt: 2, ((T([128, 197, 1280], f16),), {}) +cnt: 2, ((T([128, 50, 2048], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 2, ((T([128, 50, 2048], f16), T([128, 50, 2048], f16)), {}) +cnt: 2, ((T([128, 197, 1280], f16), T([128, 197, 1280], f16)), {}) +cnt: 2, ((T([128, 785, 1024], f16), T([128, 785, 1024], f16)), {}) +cnt: 2, ((T([128, 3137, 512], f16), T([128, 3137, 512], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 512], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 512], f16, stride=(25600, 1))), {}) +cnt: 2, ((T([6400, 512], f16), T([512, 2048], f16)), {}) +cnt: 2, ((T([512, 6400], f16, stride=(1, 512)), T([6400, 2048], f16)), {}) +cnt: 2, ((T([6400, 2048], f16), T([2048, 512], f16)), {}) +cnt: 2, ((T([2048, 6400], f16, stride=(1, 2048)), T([6400, 512], f16)), {}) +cnt: 2, ((T([6400, 512], f16), T([512, 512], f16)), {}) +cnt: 2, ((T([512, 6400], f16, stride=(1, 512)), T([6400, 512], f16)), {}) +cnt: 2, ((T([6400, 1536], f16), T([1536, 512], f16)), {}) +cnt: 2, ((T([1536, 6400], f16, stride=(1, 1536)), T([6400, 512], f16)), {}) +cnt: 2, ((T([25216, 320], f16), T([320, 1280], f16)), {}) +cnt: 2, ((T([320, 25216], f16, stride=(1, 320)), T([25216, 1280], f16)), {}) +cnt: 2, ((T([25216, 1280], f16), T([1280, 320], f16)), {}) +cnt: 2, ((T([1280, 25216], f16, stride=(1, 1280)), T([25216, 320], f16)), {}) +cnt: 2, ((T([25216, 320], f16), T([320, 320], f16)), {}) +cnt: 2, ((T([320, 25216], f16, stride=(1, 320)), T([25216, 320], f16)), {}) +cnt: 2, ((T([25216, 960], f16), T([960, 320], f16)), {}) +cnt: 2, ((T([960, 25216], f16, stride=(1, 960)), T([25216, 320], f16)), {}) +cnt: 2, ((T([100480, 128], f16), T([128, 1024], f16)), {}) +cnt: 2, ((T([128, 100480], f16, stride=(1, 128)), T([100480, 1024], f16)), {}) +cnt: 2, ((T([100480, 1024], f16), T([1024, 128], f16)), {}) +cnt: 2, ((T([1024, 100480], f16, stride=(1, 1024)), T([100480, 128], f16)), {}) +cnt: 2, ((T([100480, 128], f16), T([128, 128], f16)), {}) +cnt: 2, ((T([128, 100480], f16, stride=(1, 128)), T([100480, 128], f16)), {}) +cnt: 2, ((T([100480, 384], f16), T([384, 128], f16)), {}) +cnt: 2, ((T([384, 100480], f16, stride=(1, 384)), T([100480, 128], f16)), {}) +cnt: 2, ((T([401536, 64], f16), T([64, 512], f16)), {}) +cnt: 2, ((T([64, 401536], f16, stride=(1, 64)), T([401536, 512], f16)), {}) +cnt: 2, ((T([401536, 512], f16), T([512, 64], f16)), {}) +cnt: 2, ((T([512, 401536], f16, stride=(1, 512)), T([401536, 64], f16)), {}) +cnt: 2, ((T([401536, 64], f16), T([64, 64], f16)), {}) +cnt: 2, ((T([64, 401536], f16, stride=(1, 64)), T([401536, 64], f16)), {}) +cnt: 2, ((T([401536, 192], f16), T([192, 64], f16)), {}) +cnt: 2, ((T([192, 401536], f16, stride=(1, 192)), T([401536, 64], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(602304, 8, 192, 1)), T([128, 8, 3136, 8], f16, stride=(200704, 25088, 1, 3136))), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16), 0.3535533905932738), {}) +cnt: 2, ((T([128, 8, 784, 16], f16, stride=(301440, 16, 384, 1)), T([128, 8, 784, 16], f16, stride=(100352, 12544, 1, 784))), {}) +cnt: 2, ((T([128, 8, 785, 16], f16), 0.25), {}) +cnt: 2, ((T([128, 8, 196, 40], f16, stride=(189120, 40, 960, 1)), T([128, 8, 196, 40], f16, stride=(62720, 7840, 1, 196))), {}) +cnt: 2, ((T([128, 8, 197, 40], f16), 0.15811388300841897), {}) +cnt: 2, ((T([128, 8, 49, 64], f16, stride=(76800, 64, 1536, 1)), T([128, 8, 49, 64], f16, stride=(25088, 3136, 1, 49))), {}) +cnt: 2, ((T([128, 8, 50, 64], f16), 0.125), {}) +cnt: 2, ((T([128, 8, 50, 64], f16, stride=(25600, 64, 512, 1)), 0.125), {}) +cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), T([128, 8, 49, 64], f16, stride=(76800, 64, 1536, 1))), {}) +cnt: 2, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), T([128, 8, 49, 64], f16, stride=(25088, 3136, 1, 49))), {}) +cnt: 2, ((T([128, 8, 197, 40], f16, stride=(63040, 40, 320, 1)), 0.15811388300841897), {}) +cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), T([128, 8, 196, 40], f16, stride=(189120, 40, 960, 1))), {}) +cnt: 2, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), T([128, 8, 196, 40], f16, stride=(62720, 7840, 1, 196))), {}) +cnt: 2, ((T([128, 8, 785, 16], f16, stride=(100480, 16, 128, 1)), 0.25), {}) +cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), T([128, 8, 784, 16], f16, stride=(301440, 16, 384, 1))), {}) +cnt: 2, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), T([128, 8, 784, 16], f16, stride=(100352, 12544, 1, 784))), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16, stride=(200768, 8, 64, 1)), 0.3535533905932738), {}) +cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), T([128, 8, 3136, 8], f16, stride=(602304, 8, 192, 1))), {}) +cnt: 2, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), T([128, 8, 3136, 8], f16, stride=(200704, 25088, 1, 3136))), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([64], f16), T([64], f16), 1e-05), {}) +cnt: 4, ((T([128, 3137, 64], f16), [64], T([64], f16), T([64], f16), 1e-06), {}) +cnt: 1, ((T([128, 784, 128], f16, stride=(100352, 1, 784)), [128], T([128], f16), T([128], f16), 1e-05), {}) +cnt: 4, ((T([128, 785, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 1, ((T([128, 196, 320], f16, stride=(62720, 1, 196)), [320], T([320], f16), T([320], f16), 1e-05), {}) +cnt: 4, ((T([128, 197, 320], f16), [320], T([320], f16), T([320], f16), 1e-06), {}) +cnt: 1, ((T([128, 49, 512], f16, stride=(25088, 1, 49)), [512], T([512], f16), T([512], f16), 1e-05), {}) +cnt: 5, ((T([128, 50, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 5, ((T([128, 50, 512], f16), T([128, 50, 512], f16), [512], T([128, 50, 1], f32), T([128, 50, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 1, ((T([128, 49, 512], f16, stride=(25600, 512, 1)), T([128, 49, 512], f16, stride=(25088, 1, 49)), [512], T([128, 49, 1], f32), T([128, 49, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 4, ((T([128, 197, 320], f16), T([128, 197, 320], f16), [320], T([128, 197, 1], f32), T([128, 197, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {}) +cnt: 1, ((T([128, 196, 320], f16, stride=(63040, 320, 1)), T([128, 196, 320], f16, stride=(62720, 1, 196)), [320], T([128, 196, 1], f32), T([128, 196, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {}) +cnt: 4, ((T([128, 785, 128], f16), T([128, 785, 128], f16), [128], T([128, 785, 1], f32), T([128, 785, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 1, ((T([128, 784, 128], f16, stride=(100480, 128, 1)), T([128, 784, 128], f16, stride=(100352, 1, 784)), [128], T([128, 784, 1], f32), T([128, 784, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 4, ((T([128, 3137, 64], f16), T([128, 3137, 64], f16), [64], T([128, 3137, 1], f32), T([128, 3137, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {}) +cnt: 1, ((T([128, 3136, 64], f16, stride=(200768, 64, 1)), T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([128, 3136, 1], f32), T([128, 3136, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([128, 512], f16), [128, 50, 512], 1, 0), {}) +cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 2), {}) +cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 1), {}) +cnt: 2, ((T([128, 8, 50, 64], f16), [3, 128, 8, 50, 64], 0, 0), {}) +cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 2), {}) +cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 1), {}) +cnt: 2, ((T([128, 8, 197, 40], f16), [3, 128, 8, 197, 40], 0, 0), {}) +cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 2), {}) +cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 1), {}) +cnt: 2, ((T([128, 8, 785, 16], f16), [3, 128, 8, 785, 16], 0, 0), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 2), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 1), {}) +cnt: 2, ((T([128, 8, 3137, 8], f16), [3, 128, 8, 3137, 8], 0, 0), {}) +Operator: aten.slice_backward.default +cnt: 5, ((T([128, 50, 512], f16), [128, 50, 512], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 49, 64], f16, stride=(25088, 64, 512, 1)), [128, 8, 49, 64], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 49, 64], f16), [128, 8, 50, 64], 2, 1, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 50, 64], f16), [128, 8, 50, 64], 1, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 50, 64], f16), [128, 8, 50, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 49, 512], f16), [128, 50, 512], 1, 1, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 1, 512], f16, stride=(25600, 512, 1)), [128, 50, 512], 1, 0, 1, 1), {}) +cnt: 1, ((T([128, 196, 320], f16, stride=(62720, 1, 196)), [128, 196, 320], 2, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([128, 196, 320], f16), [128, 197, 320], 1, 1, 9223372036854775807, 1), {}) +cnt: 5, ((T([128, 197, 320], f16), [128, 197, 320], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 196, 40], f16, stride=(62720, 40, 320, 1)), [128, 8, 196, 40], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 196, 40], f16), [128, 8, 197, 40], 2, 1, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 197, 40], f16), [128, 8, 197, 40], 1, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 197, 40], f16), [128, 8, 197, 40], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 1, 320], f16, stride=(63040, 320, 1)), [128, 197, 320], 1, 0, 1, 1), {}) +cnt: 1, ((T([128, 784, 128], f16, stride=(100352, 1, 784)), [128, 784, 128], 2, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([128, 784, 128], f16), [128, 785, 128], 1, 1, 9223372036854775807, 1), {}) +cnt: 5, ((T([128, 785, 128], f16), [128, 785, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 784, 16], f16, stride=(100352, 16, 128, 1)), [128, 8, 784, 16], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 784, 16], f16), [128, 8, 785, 16], 2, 1, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 785, 16], f16), [128, 8, 785, 16], 1, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 785, 16], f16), [128, 8, 785, 16], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 1, 128], f16, stride=(100480, 128, 1)), [128, 785, 128], 1, 0, 1, 1), {}) +cnt: 1, ((T([128, 3136, 64], f16, stride=(200704, 1, 3136)), [128, 3136, 64], 2, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([128, 3136, 64], f16), [128, 3137, 64], 1, 1, 9223372036854775807, 1), {}) +cnt: 5, ((T([128, 3137, 64], f16), [128, 3137, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 3136, 8], f16, stride=(200704, 8, 64, 1)), [128, 8, 3136, 8], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 3136, 8], f16), [128, 8, 3137, 8], 2, 1, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 3137, 8], f16), [128, 8, 3137, 8], 1, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 8, 3137, 8], f16), [128, 8, 3137, 8], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 1, 64], f16, stride=(200768, 64, 1)), [128, 3137, 64], 1, 0, 1, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 2, ((T([128, 64, 56, 56], f16, stride=(602304, 1, 10752, 192)), [16, 24, 24], 1), {}) +cnt: 2, ((T([128, 128, 28, 28], f16, stride=(301440, 1, 10752, 384)), [32, 48, 48], 1), {}) +cnt: 2, ((T([128, 320, 14, 14], f16, stride=(189120, 1, 13440, 960)), [80, 120, 120], 1), {}) +cnt: 2, ((T([128, 512, 7, 7], f16, stride=(76800, 1, 10752, 1536)), [128, 192, 192], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 4, ((T([6400, 512], f16), [0], True), {}) +cnt: 2, ((T([6400, 2048], f16), [0], True), {}) +cnt: 2, ((T([6400, 1536], f16), [0], True), {}) +cnt: 1, ((T([128, 1, 512], f16, stride=(25600, 512, 1)), [0], True), {}) +cnt: 4, ((T([25216, 320], f16), [0], True), {}) +cnt: 2, ((T([25216, 1280], f16), [0], True), {}) +cnt: 2, ((T([25216, 960], f16), [0], True), {}) +cnt: 1, ((T([128, 1, 320], f16, stride=(63040, 320, 1)), [0], True), {}) +cnt: 4, ((T([100480, 128], f16), [0], True), {}) +cnt: 2, ((T([100480, 1024], f16), [0], True), {}) +cnt: 2, ((T([100480, 384], f16), [0], True), {}) +cnt: 1, ((T([128, 1, 128], f16, stride=(100480, 128, 1)), [0], True), {}) +cnt: 4, ((T([401536, 64], f16), [0], True), {}) +cnt: 2, ((T([401536, 512], f16), [0], True), {}) +cnt: 2, ((T([401536, 192], f16), [0], True), {}) +cnt: 1, ((T([128, 1, 64], f16, stride=(200768, 64, 1)), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convmixer_768_32_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convmixer_768_32_training.txt new file mode 100644 index 000000000..a41c33780 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convmixer_768_32_training.txt @@ -0,0 +1,45 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 64, ((T([32, 768, 32, 32], f16), T([32, 768, 32, 32], f16)), {}) +Operator: aten.add_.Tensor +cnt: 65, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 768], f16), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([768, 3, 7, 7], f16), T([768], f16), [7, 7], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 32, ((T([32, 768, 32, 32], f16), T([768, 1, 7, 7], f16), T([768], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 768), {}) +cnt: 32, ((T([32, 768, 32, 32], f16), T([768, 768, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 32, ((T([32, 768, 32, 32], f16), T([32, 768, 32, 32], f16), T([768, 768, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 32, ((T([32, 768, 32, 32], f16), T([32, 768, 32, 32], f16), T([768, 1, 7, 7], f16), [768], [1, 1], [3, 3], [1, 1], False, [0, 0], 768, [True, True, True]), {}) +cnt: 1, ((T([32, 768, 32, 32], f16), T([32, 3, 224, 224], f16), T([768, 3, 7, 7], f16), [768], [7, 7], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 768, 32, 32], f16, stride=(768, 1, 0, 0)), 1024), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 768, 32, 32], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 768], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 65, ((T([32, 768, 32, 32], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 65, ((T([32, 768, 32, 32], f16), T([32, 768, 32, 32], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 65, ((T([32, 768, 32, 32], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 65, ((T([32, 768, 32, 32], f16), T([32, 768, 32, 32], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convnext_base_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convnext_base_training.txt new file mode 100644 index 000000000..8e67418f5 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/convnext_base_training.txt @@ -0,0 +1,210 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 3, ((T([100352, 512], f16), [32, 56, 56, 512]), {}) +cnt: 3, ((T([100352, 128], f16), [32, 56, 56, 128]), {}) +cnt: 3, ((T([25088, 1024], f16), [32, 28, 28, 1024]), {}) +cnt: 3, ((T([25088, 256], f16), [32, 28, 28, 256]), {}) +cnt: 27, ((T([6272, 2048], f16), [32, 14, 14, 2048]), {}) +cnt: 27, ((T([6272, 512], f16), [32, 14, 14, 512]), {}) +cnt: 3, ((T([1568, 4096], f16), [32, 7, 7, 4096]), {}) +cnt: 3, ((T([1568, 1024], f16), [32, 7, 7, 1024]), {}) +cnt: 3, ((T([32, 7, 7, 1024], f16), [1568, 1024]), {}) +Operator: aten.add.Tensor +cnt: 3, ((T([32, 56, 56, 512], f16), T([512], f16)), {}) +cnt: 3, ((T([32, 56, 56, 128], f16), T([128], f16)), {}) +cnt: 7, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128))), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), 1e-06), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([128, 1, 1], f16)), {}) +cnt: 3, ((T([32, 28, 28, 1024], f16), T([1024], f16)), {}) +cnt: 3, ((T([32, 28, 28, 256], f16), T([256], f16)), {}) +cnt: 7, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256))), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), 1e-06), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([256, 1, 1], f16)), {}) +cnt: 27, ((T([32, 14, 14, 2048], f16), T([2048], f16)), {}) +cnt: 27, ((T([32, 14, 14, 512], f16), T([512], f16)), {}) +cnt: 55, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512))), {}) +cnt: 1, ((T([32, 1, 14, 14], f16), 1e-06), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([512, 1, 1], f16)), {}) +cnt: 3, ((T([32, 7, 7, 4096], f16), T([4096], f16)), {}) +cnt: 3, ((T([32, 7, 7, 1024], f16), T([1024], f16)), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024))), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024))), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 512, 14, 14], f16)), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 256, 28, 28], f16)), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 128, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.as_strided_.default +cnt: 1, ((T([32, 1024, 1, 1], f16), [32, 1024, 1, 1], [1024, 1, 1024, 1024]), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([128, 3, 4, 4], f16), T([128], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([128, 1, 7, 7], f16), T([128], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 2, 2], f16), T([256], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([256, 1, 7, 7], f16), T([256], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 2, 2], f16), T([512], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 27, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([512, 1, 7, 7], f16), T([512], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 512), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([1024, 512, 2, 2], f16), T([1024], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), T([1024, 1, 7, 7], f16), T([1024], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 1024), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), T([1024, 1, 7, 7], f16), [1024], [1, 1], [3, 3], [1, 1], False, [0, 0], 1024, [True, True, True]), {}) +cnt: 1, ((T([32, 1024, 7, 7], f16), T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([1024, 512, 2, 2], f16), [1024], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 27, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([512, 1, 7, 7], f16), [512], [1, 1], [3, 3], [1, 1], False, [0, 0], 512, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 2, 2], f16), [512], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([256, 1, 7, 7], f16), [256], [1, 1], [3, 3], [1, 1], False, [0, 0], 256, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 2, 2], f16), [256], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([128, 1, 7, 7], f16), [128], [1, 1], [3, 3], [1, 1], False, [0, 0], 128, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 3, 224, 224], f16), T([128, 3, 4, 4], f16), [128], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +cnt: 1, ((T([32, 1024], f16), T([32, 1024], f16)), {}) +cnt: 1, ((T([1024, 512, 2, 2], f16), T([1024, 512, 2, 2], f16, stride=(2048, 1, 1024, 512))), {}) +cnt: 1, ((T([512, 256, 2, 2], f16), T([512, 256, 2, 2], f16, stride=(1024, 1, 512, 256))), {}) +cnt: 1, ((T([256, 128, 2, 2], f16), T([256, 128, 2, 2], f16, stride=(512, 1, 256, 128))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(196, 0, 14, 1)), 512), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(784, 0, 28, 1)), 256), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(3136, 0, 56, 1)), 128), {}) +Operator: aten.gelu.default +cnt: 3, ((T([32, 56, 56, 512], f16),), {}) +cnt: 3, ((T([32, 28, 28, 1024], f16),), {}) +cnt: 27, ((T([32, 14, 14, 2048], f16),), {}) +cnt: 3, ((T([32, 7, 7, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 3, ((T([32, 7, 7, 4096], f16), T([32, 7, 7, 4096], f16)), {}) +cnt: 27, ((T([32, 14, 14, 2048], f16), T([32, 14, 14, 2048], f16)), {}) +cnt: 3, ((T([32, 28, 28, 1024], f16), T([32, 28, 28, 1024], f16)), {}) +cnt: 3, ((T([32, 56, 56, 512], f16), T([32, 56, 56, 512], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), [-1, -2], True), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [1], True), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), [1], True), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), [1], True), {}) +Operator: aten.mm.default +cnt: 3, ((T([100352, 128], f16), T([128, 512], f16, stride=(1, 128))), {}) +cnt: 3, ((T([100352, 512], f16), T([512, 128], f16, stride=(1, 512))), {}) +cnt: 3, ((T([25088, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 3, ((T([25088, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 27, ((T([6272, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 27, ((T([6272, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 3, ((T([1568, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 3, ((T([1568, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([32, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 1024], f16)), {}) +cnt: 3, ((T([1024, 1568], f16, stride=(1, 1024)), T([1568, 4096], f16)), {}) +cnt: 3, ((T([1568, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 3, ((T([4096, 1568], f16, stride=(1, 4096)), T([1568, 1024], f16)), {}) +cnt: 3, ((T([1568, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 27, ((T([512, 6272], f16, stride=(1, 512)), T([6272, 2048], f16)), {}) +cnt: 27, ((T([6272, 512], f16), T([512, 2048], f16)), {}) +cnt: 27, ((T([2048, 6272], f16, stride=(1, 2048)), T([6272, 512], f16)), {}) +cnt: 27, ((T([6272, 2048], f16), T([2048, 512], f16)), {}) +cnt: 3, ((T([256, 25088], f16, stride=(1, 256)), T([25088, 1024], f16)), {}) +cnt: 3, ((T([25088, 256], f16), T([256, 1024], f16)), {}) +cnt: 3, ((T([1024, 25088], f16, stride=(1, 1024)), T([25088, 256], f16)), {}) +cnt: 3, ((T([25088, 1024], f16), T([1024, 256], f16)), {}) +cnt: 3, ((T([128, 100352], f16, stride=(1, 128)), T([100352, 512], f16)), {}) +cnt: 3, ((T([100352, 128], f16), T([128, 512], f16)), {}) +cnt: 3, ((T([512, 100352], f16, stride=(1, 512)), T([100352, 128], f16)), {}) +cnt: 3, ((T([100352, 512], f16), T([512, 128], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([32, 1, 14, 14], f16), -0.5), {}) +cnt: 1, ((T([32, 1, 14, 14], f16), 0.00390625), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), -0.5), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), 0.0078125), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), -0.5), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), 0.015625), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([1, 128, 1, 1], f16)), {}) +cnt: 2, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 1, 56, 56], f16)), {}) +cnt: 2, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([128, 1, 1], f16)), {}) +cnt: 6, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([1, 256, 1, 1], f16)), {}) +cnt: 2, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 1, 28, 28], f16)), {}) +cnt: 2, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([256, 1, 1], f16)), {}) +cnt: 54, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([1, 512, 1, 1], f16)), {}) +cnt: 2, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 1, 14, 14], f16)), {}) +cnt: 2, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([512, 1, 1], f16)), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024)), T([1, 1024, 1, 1], f16)), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16, stride=(50176, 1, 7168, 1024))), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16), T([1, 1024, 1, 1], f16)), {}) +cnt: 29, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512))), {}) +cnt: 1, ((T([32, 1, 14, 14], f16), T([32, 1, 14, 14], f16)), {}) +cnt: 1, ((T([32, 1, 14, 14], f16), T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512))), {}) +cnt: 5, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256))), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), T([32, 1, 28, 28], f16)), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256))), {}) +cnt: 5, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128))), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), T([32, 1, 56, 56], f16)), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128))), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([32, 56, 56, 128], f16, stride=(401408, 56, 1, 3136)), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 3, ((T([32, 56, 56, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 3, ((T([32, 28, 28, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 27, ((T([32, 14, 14, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +cnt: 3, ((T([32, 7, 7, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-06), {}) +cnt: 1, ((T([32, 1, 1, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([32, 1, 1, 1024], f16), T([32, 1, 1, 1024], f16), [1024], T([32, 1, 1, 1], f32), T([32, 1, 1, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 3, ((T([32, 7, 7, 1024], f16), T([32, 7, 7, 1024], f16), [1024], T([32, 7, 7, 1], f32), T([32, 7, 7, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 27, ((T([32, 14, 14, 512], f16), T([32, 14, 14, 512], f16), [512], T([32, 14, 14, 1], f32), T([32, 14, 14, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 3, ((T([32, 28, 28, 256], f16), T([32, 28, 28, 256], f16), [256], T([32, 28, 28, 1], f32), T([32, 28, 28, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 3, ((T([32, 56, 56, 128], f16), T([32, 56, 56, 128], f16), [128], T([32, 56, 56, 1], f32), T([32, 56, 56, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 1, ((T([32, 56, 56, 128], f16), T([32, 56, 56, 128], f16, stride=(401408, 56, 1, 3136)), [128], T([32, 56, 56, 1], f32), T([32, 56, 56, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.neg.default +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)),), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)),), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)),), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([1024, 512, 2, 2], f16, stride=(2048, 1, 1024, 512)), [1024, 512, 2, 2], [2048, 4, 2, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([512, 256, 2, 2], f16, stride=(1024, 1, 512, 256)), [512, 256, 2, 2], [1024, 4, 2, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([256, 128, 2, 2], f16, stride=(512, 1, 256, 128)), [256, 128, 2, 2], [512, 4, 2, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.new_zeros.default +cnt: 1, ((T([32, 1024], f16), [32768]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 1, ((T([32, 1, 14, 14], f16), 3), {}) +cnt: 1, ((T([32, 1, 28, 28], f16), 3), {}) +cnt: 1, ((T([32, 1, 56, 56], f16), 3), {}) +Operator: aten.rsqrt.default +cnt: 1, ((T([32, 1, 56, 56], f16),), {}) +cnt: 1, ((T([32, 1, 28, 28], f16),), {}) +cnt: 1, ((T([32, 1, 14, 14], f16),), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([512], f16), [512], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([256], f16), [256], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128], f16), [128], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sub.Tensor +cnt: 2, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([32, 1, 56, 56], f16)), {}) +cnt: 2, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([32, 1, 28, 28], f16)), {}) +cnt: 2, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), T([32, 1, 14, 14], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +cnt: 3, ((T([32, 1024, 7, 7], f16), [0, 2, 3], True), {}) +cnt: 3, ((T([32, 7, 7, 1024], f16, stride=(50176, 7, 1, 49)), [0, 1, 2], True), {}) +cnt: 3, ((T([32, 7, 7, 4096], f16), [0, 1, 2], True), {}) +cnt: 29, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [0, 2, 3], True), {}) +cnt: 2, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [1], True), {}) +cnt: 27, ((T([32, 14, 14, 512], f16), [0, 1, 2], True), {}) +cnt: 27, ((T([32, 14, 14, 2048], f16), [0, 1, 2], True), {}) +cnt: 5, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), [0, 2, 3], True), {}) +cnt: 2, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), [1], True), {}) +cnt: 3, ((T([32, 28, 28, 256], f16), [0, 1, 2], True), {}) +cnt: 3, ((T([32, 28, 28, 1024], f16), [0, 1, 2], True), {}) +cnt: 5, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), [0, 2, 3], True), {}) +cnt: 2, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), [1], True), {}) +cnt: 3, ((T([32, 56, 56, 128], f16), [0, 1, 2], True), {}) +cnt: 3, ((T([32, 56, 56, 512], f16), [0, 1, 2], True), {}) +Operator: aten.var_mean.correction +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), [1]), {'correction': 0, 'keepdim': True}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), [1]), {'correction': 0, 'keepdim': True}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [1]), {'correction': 0, 'keepdim': True}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/crossvit_9_240_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/crossvit_9_240_training.txt new file mode 100644 index 000000000..eea124ed3 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/crossvit_9_240_training.txt @@ -0,0 +1,203 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 3, ((T([64, 4, 401, 401], f16), -1, False), {}) +cnt: 9, ((T([64, 4, 197, 197], f16), -1, False), {}) +cnt: 3, ((T([64, 4, 1, 197], f16), -1, False), {}) +cnt: 3, ((T([64, 4, 1, 401], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 3, ((T([64, 4, 1, 401], f16), T([64, 4, 1, 401], f16), -1, f16), {}) +cnt: 3, ((T([64, 4, 1, 197], f16), T([64, 4, 1, 197], f16), -1, f16), {}) +cnt: 9, ((T([64, 4, 197, 197], f16), T([64, 4, 197, 197], f16), -1, f16), {}) +cnt: 3, ((T([64, 4, 401, 401], f16), T([64, 4, 401, 401], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([64, 4, 401, 32], f16), [256, 401, 32]), {}) +cnt: 6, ((T([64, 4, 32, 401], f16), [256, 32, 401]), {}) +cnt: 3, ((T([256, 401, 401], f16), [64, 4, 401, 401]), {}) +cnt: 3, ((T([256, 401, 32], f16), [64, 4, 401, 32]), {}) +cnt: 6, ((T([64, 401, 4, 32], f16), [64, 401, 128]), {}) +cnt: 30, ((T([64, 4, 197, 64], f16), [256, 197, 64]), {}) +cnt: 12, ((T([64, 4, 64, 197], f16), [256, 64, 197]), {}) +cnt: 9, ((T([256, 197, 197], f16), [64, 4, 197, 197]), {}) +cnt: 9, ((T([256, 197, 64], f16), [64, 4, 197, 64]), {}) +cnt: 12, ((T([64, 197, 4, 64], f16), [64, 197, 256]), {}) +cnt: 3, ((T([64, 256], f16), [64, 1, 256]), {}) +cnt: 3, ((T([256, 1, 197], f16), [64, 4, 1, 197]), {}) +cnt: 3, ((T([256, 1, 64], f16), [64, 4, 1, 64]), {}) +cnt: 3, ((T([64, 128], f16), [64, 1, 128]), {}) +cnt: 3, ((T([256, 1, 401], f16), [64, 4, 1, 401]), {}) +cnt: 3, ((T([256, 1, 32], f16), [64, 4, 1, 32]), {}) +cnt: 3, ((T([64, 401, 128], f16), [25664, 128]), {}) +cnt: 3, ((T([64, 197, 256], f16), [12608, 256]), {}) +cnt: 9, ((T([64, 197, 3, 4, 64], f16), [64, 197, 768]), {}) +cnt: 3, ((T([64, 401, 3, 4, 32], f16), [64, 401, 384]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 401, 128], f16), T([1, 401, 128], f16)), {}) +cnt: 1, ((T([64, 197, 256], f16), T([1, 197, 256], f16)), {}) +cnt: 27, ((T([64, 401, 128], f16), T([64, 401, 128], f16)), {}) +cnt: 51, ((T([64, 197, 256], f16), T([64, 197, 256], f16)), {}) +cnt: 3, ((T([64, 1, 256], f16), T([256], f16)), {}) +cnt: 3, ((T([64, 1, 256], f16, stride=(50432, 256, 1)), T([64, 1, 256], f16)), {}) +cnt: 3, ((T([64, 1, 128], f16), T([128], f16)), {}) +cnt: 3, ((T([64, 1, 128], f16, stride=(51328, 128, 1)), T([64, 1, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 6, ((T([384], f16), T([25664, 128], f16), T([128, 384], f16, stride=(1, 128))), {}) +cnt: 9, ((T([128], f16), T([25664, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 3, ((T([128], f16), T([25664, 384], f16), T([384, 128], f16, stride=(1, 384))), {}) +cnt: 18, ((T([768], f16), T([12608, 256], f16), T([256, 768], f16, stride=(1, 256))), {}) +cnt: 15, ((T([256], f16), T([12608, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 9, ((T([256], f16), T([12608, 768], f16), T([768, 256], f16, stride=(1, 768))), {}) +cnt: 6, ((T([256], f16), T([64, 128], f16), T([128, 256], f16, stride=(1, 128))), {}) +cnt: 6, ((T([128], f16), T([64, 256], f16), T([256, 128], f16, stride=(1, 256))), {}) +cnt: 3, ((T([256], f16), T([64, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 3, ((T([128], f16), T([64, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 1, ((T([1000], f16), T([64, 128], f16, stride=(51328, 1)), T([128, 1000], f16, stride=(1, 128))), {}) +cnt: 1, ((T([1000], f16), T([64, 256], f16, stride=(50432, 1)), T([256, 1000], f16, stride=(1, 256))), {}) +Operator: aten.bmm.default +cnt: 3, ((T([256, 401, 32], f16), T([256, 32, 401], f16)), {}) +cnt: 3, ((T([256, 401, 401], f16), T([256, 401, 32], f16)), {}) +cnt: 9, ((T([256, 197, 64], f16), T([256, 64, 197], f16)), {}) +cnt: 9, ((T([256, 197, 197], f16), T([256, 197, 64], f16)), {}) +cnt: 3, ((T([256, 1, 64], f16), T([256, 64, 197], f16)), {}) +cnt: 3, ((T([256, 1, 197], f16), T([256, 197, 64], f16)), {}) +cnt: 3, ((T([256, 1, 32], f16), T([256, 32, 401], f16)), {}) +cnt: 3, ((T([256, 1, 401], f16), T([256, 401, 32], f16)), {}) +cnt: 3, ((T([256, 401, 1], f16), T([256, 1, 32], f16)), {}) +cnt: 3, ((T([256, 1, 32], f16), T([256, 32, 401], f16, stride=(12832, 1, 32))), {}) +cnt: 3, ((T([256, 32, 1], f16), T([256, 1, 401], f16)), {}) +cnt: 3, ((T([256, 1, 401], f16), T([256, 401, 32], f16, stride=(12832, 1, 401))), {}) +cnt: 3, ((T([256, 197, 1], f16), T([256, 1, 64], f16)), {}) +cnt: 3, ((T([256, 1, 64], f16), T([256, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 3, ((T([256, 64, 1], f16), T([256, 1, 197], f16)), {}) +cnt: 3, ((T([256, 1, 197], f16), T([256, 197, 64], f16, stride=(12608, 1, 197))), {}) +cnt: 9, ((T([256, 197, 197], f16, stride=(38809, 1, 197)), T([256, 197, 64], f16)), {}) +cnt: 9, ((T([256, 197, 64], f16), T([256, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 9, ((T([256, 64, 197], f16, stride=(12608, 1, 64)), T([256, 197, 197], f16)), {}) +cnt: 9, ((T([256, 197, 197], f16), T([256, 197, 64], f16, stride=(12608, 1, 197))), {}) +cnt: 3, ((T([256, 401, 401], f16, stride=(160801, 1, 401)), T([256, 401, 32], f16)), {}) +cnt: 3, ((T([256, 401, 32], f16), T([256, 32, 401], f16, stride=(12832, 1, 32))), {}) +cnt: 3, ((T([256, 32, 401], f16, stride=(12832, 1, 32)), T([256, 401, 401], f16)), {}) +cnt: 3, ((T([256, 401, 401], f16), T([256, 401, 32], f16, stride=(12832, 1, 401))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 128], f16, stride=(0, 128, 1)), T([64, 400, 128], f16, stride=(51200, 1, 400))], 1), {}) +cnt: 1, (([T([64, 1, 256], f16, stride=(0, 256, 1)), T([64, 196, 256], f16, stride=(50176, 1, 196))], 1), {}) +cnt: 6, (([T([64, 1, 256], f16), T([64, 196, 256], f16, stride=(50432, 256, 1))], 1), {}) +cnt: 6, (([T([64, 1, 128], f16), T([64, 400, 128], f16, stride=(51328, 128, 1))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 240, 240], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 240, 240], f16), T([128, 3, 12, 12], f16), T([128], f16), [12, 12], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 3, 224, 224], f16), T([256, 3, 16, 16], f16), T([256], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 256, 14, 14], f16, stride=(50432, 1, 3584, 256)), T([64, 3, 224, 224], f16), T([256, 3, 16, 16], f16), [256], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +cnt: 1, ((T([64, 128, 20, 20], f16, stride=(51328, 1, 2560, 128)), T([64, 3, 240, 240], f16), T([128, 3, 12, 12], f16), [128], [12, 12], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 240, 240], f16), T([64, 3, 240, 240], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([2, 64, 1000], f16, stride=(0, 1000, 1)), 2), {}) +Operator: aten.gelu.default +cnt: 3, ((T([64, 401, 384], f16),), {}) +cnt: 9, ((T([64, 197, 768], f16),), {}) +cnt: 6, ((T([64, 1, 128], f16),), {}) +cnt: 6, ((T([64, 1, 256], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 6, ((T([64, 1, 128], f16), T([64, 1, 128], f16)), {}) +cnt: 6, ((T([64, 1, 256], f16), T([64, 1, 256], f16)), {}) +cnt: 9, ((T([64, 197, 768], f16), T([64, 197, 768], f16)), {}) +cnt: 3, ((T([64, 401, 384], f16), T([64, 401, 384], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([2, 64, 1000], f16), [0]), {}) +Operator: aten.mm.default +cnt: 3, ((T([64, 256], f16, stride=(50432, 1)), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 3, ((T([64, 128], f16, stride=(51328, 1)), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 256], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 256], f16, stride=(50432, 1))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 128], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 128], f16, stride=(51328, 1))), {}) +cnt: 6, ((T([64, 256], f16, stride=(50432, 1)), T([256, 128], f16)), {}) +cnt: 6, ((T([256, 64], f16, stride=(1, 50432)), T([64, 128], f16)), {}) +cnt: 6, ((T([64, 128], f16), T([128, 128], f16)), {}) +cnt: 3, ((T([128, 64], f16, stride=(1, 128)), T([64, 128], f16)), {}) +cnt: 9, ((T([25664, 128], f16), T([128, 128], f16)), {}) +cnt: 9, ((T([128, 25664], f16, stride=(1, 128)), T([25664, 128], f16)), {}) +cnt: 3, ((T([128, 64], f16, stride=(1, 128)), T([64, 128], f16, stride=(51328, 1))), {}) +cnt: 6, ((T([64, 128], f16, stride=(51328, 1)), T([128, 256], f16)), {}) +cnt: 6, ((T([128, 64], f16, stride=(1, 51328)), T([64, 256], f16)), {}) +cnt: 6, ((T([64, 256], f16), T([256, 256], f16)), {}) +cnt: 3, ((T([256, 64], f16, stride=(1, 256)), T([64, 256], f16)), {}) +cnt: 15, ((T([12608, 256], f16), T([256, 256], f16)), {}) +cnt: 15, ((T([256, 12608], f16, stride=(1, 256)), T([12608, 256], f16)), {}) +cnt: 3, ((T([256, 64], f16, stride=(1, 256)), T([64, 256], f16, stride=(50432, 1))), {}) +cnt: 9, ((T([12608, 256], f16), T([256, 768], f16)), {}) +cnt: 9, ((T([256, 12608], f16, stride=(1, 256)), T([12608, 768], f16)), {}) +cnt: 18, ((T([12608, 768], f16), T([768, 256], f16)), {}) +cnt: 18, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 256], f16)), {}) +cnt: 3, ((T([25664, 128], f16), T([128, 384], f16)), {}) +cnt: 3, ((T([128, 25664], f16, stride=(1, 128)), T([25664, 384], f16)), {}) +cnt: 6, ((T([25664, 384], f16), T([384, 128], f16)), {}) +cnt: 6, ((T([384, 25664], f16, stride=(1, 384)), T([25664, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([64, 4, 401, 401], f16), 0.1767766952966369), {}) +cnt: 18, ((T([64, 4, 197, 197], f16), 0.125), {}) +cnt: 6, ((T([64, 4, 1, 197], f16), 0.125), {}) +cnt: 6, ((T([64, 4, 1, 401], f16), 0.1767766952966369), {}) +Operator: aten.native_layer_norm.default +cnt: 10, ((T([64, 401, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 22, ((T([64, 197, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 3, ((T([64, 1, 128], f16, stride=(51328, 128, 1)), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 3, ((T([64, 1, 256], f16, stride=(50432, 256, 1)), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 3, ((T([64, 1, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 3, ((T([64, 1, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 22, ((T([64, 197, 256], f16), T([64, 197, 256], f16), [256], T([64, 197, 1], f32), T([64, 197, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 10, ((T([64, 401, 128], f16), T([64, 401, 128], f16), [128], T([64, 401, 1], f32), T([64, 401, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 3, ((T([64, 1, 128], f16), T([64, 1, 128], f16), [128], T([64, 1, 1], f32), T([64, 1, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 3, ((T([64, 1, 256], f16), T([64, 1, 256], f16), [256], T([64, 1, 1], f32), T([64, 1, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 3, ((T([64, 1, 256], f16), T([64, 1, 256], f16, stride=(50432, 256, 1)), [256], T([64, 1, 1], f32), T([64, 1, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 3, ((T([64, 1, 128], f16), T([64, 1, 128], f16, stride=(51328, 128, 1)), [128], T([64, 1, 1], f32), T([64, 1, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 256], f16), [64, 197, 256], 1, 0), {}) +cnt: 1, ((T([64, 128], f16), [64, 401, 128], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 16, ((T([64, 197, 256], f16), [64, 197, 256], 0, 0, 9223372036854775807, 1), {}) +cnt: 16, ((T([64, 401, 128], f16), [64, 401, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 6, ((T([64, 196, 256], f16, stride=(50432, 256, 1)), [64, 197, 256], 1, 1, 9223372036854775807, 1), {}) +cnt: 3, ((T([64, 1, 128], f16), [64, 1, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 9, ((T([64, 1, 128], f16), [64, 401, 128], 1, 0, 1, 1), {}) +cnt: 6, ((T([64, 400, 128], f16, stride=(51328, 128, 1)), [64, 401, 128], 1, 1, 9223372036854775807, 1), {}) +cnt: 3, ((T([64, 1, 256], f16), [64, 1, 256], 0, 0, 9223372036854775807, 1), {}) +cnt: 9, ((T([64, 1, 256], f16), [64, 197, 256], 1, 0, 1, 1), {}) +Operator: aten.stack.default +cnt: 1, (([T([64, 1000], f16), T([64, 1000], f16)],), {}) +cnt: 9, (([T([64, 4, 197, 64], f16), T([64, 4, 197, 64], f16, stride=(50432, 12608, 1, 197)), T([64, 4, 197, 64], f16)],), {}) +cnt: 3, (([T([64, 4, 401, 32], f16), T([64, 4, 401, 32], f16, stride=(51328, 12832, 1, 401)), T([64, 4, 401, 32], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 2, ((T([64, 1000], f16), [0], True), {}) +cnt: 6, ((T([64, 256], f16, stride=(50432, 1)), [0], True), {}) +cnt: 3, ((T([64, 128], f16), [0], True), {}) +cnt: 12, ((T([25664, 128], f16), [0], True), {}) +cnt: 3, ((T([64, 1, 128], f16), [0, 1], True), {}) +cnt: 6, ((T([64, 128], f16, stride=(51328, 1)), [0], True), {}) +cnt: 3, ((T([64, 256], f16), [0], True), {}) +cnt: 24, ((T([12608, 256], f16), [0], True), {}) +cnt: 3, ((T([64, 1, 256], f16), [0, 1], True), {}) +cnt: 18, ((T([12608, 768], f16), [0], True), {}) +cnt: 6, ((T([25664, 384], f16), [0], True), {}) +cnt: 1, ((T([64, 197, 256], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 256], f16, stride=(50432, 256, 1)), [0], True), {}) +cnt: 1, ((T([64, 401, 128], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 128], f16, stride=(51328, 128, 1)), [0], True), {}) +Operator: aten.unbind.int +cnt: 3, ((T([3, 64, 4, 401, 32], f16, stride=(128, 153984, 32, 384, 1)),), {}) +cnt: 9, ((T([3, 64, 4, 197, 64], f16, stride=(256, 151296, 64, 768, 1)),), {}) +cnt: 1, ((T([2, 64, 1000], f16),), {}) +Operator: aten.upsample_bicubic2d.vec +cnt: 1, ((T([64, 3, 240, 240], f16), [224, 224], False, None), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cspdarknet53_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cspdarknet53_training.txt new file mode 100644 index 000000000..9332a617d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/cspdarknet53_training.txt @@ -0,0 +1,177 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 67, ((T([], i64), 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1))), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1))), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16)), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1))), {}) +cnt: 15, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16)), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1))), {}) +cnt: 15, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16)), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1))), {}) +cnt: 7, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16)), {}) +cnt: 1, ((T([64, 1024, 8, 8], f16), T([64, 1024, 8, 8], f16)), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16)), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16)), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16)), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16)), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 128, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1)), T([64, 64, 128, 128], f16)], 1), {}) +cnt: 1, (([T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1)), T([64, 64, 64, 64], f16)], 1), {}) +cnt: 1, (([T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1)), T([64, 128, 32, 32], f16)], 1), {}) +cnt: 1, (([T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1)), T([64, 256, 16, 16], f16)], 1), {}) +cnt: 1, (([T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1)), T([64, 512, 8, 8], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 256, 256], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([32, 3, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 256, 256], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1)), T([32, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 128, 64, 64], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1)), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([256, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 256, 32, 32], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1)), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([64, 128, 32, 32], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([64, 128, 32, 32], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 512, 16, 16], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1)), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([64, 256, 16, 16], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([64, 256, 16, 16], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([1024, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 1024, 8, 8], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1)), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 512, 8, 8], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 512, 8, 8], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([64, 1024, 8, 8], f16), T([64, 1024, 8, 8], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1)), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1024, 8, 8], f16), T([64, 512, 16, 16], f16), T([1024, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1)), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 256, 32, 32], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1)), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 128, 64, 64], f16), T([256, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1)), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 64, 128, 128], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 128, 128, 128], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 32, 128, 128], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1)), T([32, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 64, 128, 128], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 32, 256, 256], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 256, 256], f16), T([64, 3, 256, 256], f16), T([32, 3, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([64, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1024, 8, 8], f16, stride=(1024, 1, 0, 0)), 64), {}) +Operator: aten.leaky_relu_.default +cnt: 1, ((T([64, 32, 256, 256], f16),), {}) +cnt: 4, ((T([64, 64, 128, 128], f16),), {}) +cnt: 1, ((T([64, 128, 128, 128], f16),), {}) +cnt: 1, ((T([64, 32, 128, 128], f16),), {}) +cnt: 3, ((T([64, 128, 64, 64], f16),), {}) +cnt: 5, ((T([64, 64, 64, 64], f16),), {}) +cnt: 3, ((T([64, 256, 32, 32], f16),), {}) +cnt: 17, ((T([64, 128, 32, 32], f16),), {}) +cnt: 3, ((T([64, 512, 16, 16], f16),), {}) +cnt: 17, ((T([64, 256, 16, 16], f16),), {}) +cnt: 3, ((T([64, 1024, 8, 8], f16),), {}) +cnt: 9, ((T([64, 512, 8, 8], f16),), {}) +Operator: aten.leaky_relu_backward.default +cnt: 3, ((T([64, 1024, 8, 8], f16), T([64, 1024, 8, 8], f16), 0.01, True), {}) +cnt: 1, ((T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1)), T([64, 512, 8, 8], f16), 0.01, True), {}) +cnt: 8, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), 0.01, True), {}) +cnt: 3, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), 0.01, True), {}) +cnt: 1, ((T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1)), T([64, 256, 16, 16], f16), 0.01, True), {}) +cnt: 16, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), 0.01, True), {}) +cnt: 3, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16), 0.01, True), {}) +cnt: 1, ((T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1)), T([64, 128, 32, 32], f16), 0.01, True), {}) +cnt: 16, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), 0.01, True), {}) +cnt: 3, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16), 0.01, True), {}) +cnt: 1, ((T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1)), T([64, 64, 64, 64], f16), 0.01, True), {}) +cnt: 4, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), 0.01, True), {}) +cnt: 3, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16), 0.01, True), {}) +cnt: 1, ((T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1)), T([64, 64, 128, 128], f16), 0.01, True), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 128, 128], f16), 0.01, True), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 128, 128], f16), 0.01, True), {}) +cnt: 1, ((T([64, 32, 256, 256], f16), T([64, 32, 256, 256], f16), 0.01, True), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 1024, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([64, 32, 256, 256], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 17, ((T([64, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 17, ((T([64, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 1024, 8, 8], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([64, 1024, 8, 8], f16), T([64, 1024, 8, 8], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 17, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 17, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 32, 256, 256], f16), T([64, 32, 256, 256], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 512, 8, 8], f16), [64, 1024, 8, 8], 1, 512, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 1024, 8, 8], f16), [64, 1024, 8, 8], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 512, 8, 8], f16, stride=(65536, 64, 8, 1)), [64, 1024, 8, 8], 1, 0, 512, 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), [64, 512, 16, 16], 1, 256, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 512, 16, 16], f16), [64, 512, 16, 16], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16, stride=(131072, 256, 16, 1)), [64, 512, 16, 16], 1, 0, 256, 1), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), [64, 256, 32, 32], 1, 128, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 256, 32, 32], f16), [64, 256, 32, 32], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 128, 32, 32], f16, stride=(262144, 1024, 32, 1)), [64, 256, 32, 32], 1, 0, 128, 1), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), [64, 128, 64, 64], 1, 64, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 128, 64, 64], f16), [64, 128, 64, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 64, 64, 64], f16, stride=(524288, 4096, 64, 1)), [64, 128, 64, 64], 1, 0, 64, 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), [64, 128, 128, 128], 1, 64, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 128, 128, 128], f16), [64, 128, 128, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16, stride=(2097152, 16384, 128, 1)), [64, 128, 128, 128], 1, 0, 64, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/deit_base_distilled_patch16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/deit_base_distilled_patch16_224_training.txt new file mode 100644 index 000000000..486ee80cd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/deit_base_distilled_patch16_224_training.txt @@ -0,0 +1,87 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 198, 198], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 198, 198], f16), T([64, 12, 198, 198], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 198, 64], f16), [768, 198, 64]), {}) +cnt: 12, ((T([64, 12, 64, 198], f16), [768, 64, 198]), {}) +cnt: 12, ((T([768, 198, 198], f16), [64, 12, 198, 198]), {}) +cnt: 12, ((T([768, 198, 64], f16), [64, 12, 198, 64]), {}) +cnt: 12, ((T([64, 198, 12, 64], f16), [64, 198, 768]), {}) +cnt: 12, ((T([64, 198, 3, 12, 64], f16), [64, 198, 2304]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 198, 768], f16), T([1, 198, 768], f16)), {}) +cnt: 49, ((T([64, 198, 768], f16), T([64, 198, 768], f16)), {}) +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([2304], f16), T([12672, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12672, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([12672, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12672, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 2, ((T([1000], f16), T([64, 768], f16, stride=(152064, 1)), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 198, 64], f16), T([768, 64, 198], f16)), {}) +cnt: 12, ((T([768, 198, 198], f16), T([768, 198, 64], f16)), {}) +cnt: 12, ((T([768, 198, 198], f16, stride=(39204, 1, 198)), T([768, 198, 64], f16)), {}) +cnt: 12, ((T([768, 198, 64], f16), T([768, 64, 198], f16, stride=(12672, 1, 64))), {}) +cnt: 12, ((T([768, 64, 198], f16, stride=(12672, 1, 64)), T([768, 198, 198], f16)), {}) +cnt: 12, ((T([768, 198, 198], f16), T([768, 198, 64], f16, stride=(12672, 1, 198))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 768], f16, stride=(0, 768, 1)), T([64, 1, 768], f16, stride=(0, 768, 1)), T([64, 196, 768], f16, stride=(150528, 1, 196))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), T([768], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 768, 14, 14], f16, stride=(152064, 1, 10752, 768)), T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), [768], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([64, 1000], f16), 2), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 198, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 198, 3072], f16), T([64, 198, 3072], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mm.default +cnt: 2, ((T([64, 1000], f16), T([1000, 768], f16)), {}) +cnt: 2, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 768], f16, stride=(152064, 1))), {}) +cnt: 12, ((T([12672, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 12672], f16, stride=(1, 768)), T([12672, 3072], f16)), {}) +cnt: 12, ((T([12672, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 12672], f16, stride=(1, 3072)), T([12672, 768], f16)), {}) +cnt: 12, ((T([12672, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 12672], f16, stride=(1, 768)), T([12672, 768], f16)), {}) +cnt: 12, ((T([12672, 2304], f16), T([2304, 768], f16)), {}) +cnt: 12, ((T([2304, 12672], f16, stride=(1, 2304)), T([12672, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([64, 12, 198, 198], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([64, 198, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 198, 768], f16), T([64, 198, 768], f16), [768], T([64, 198, 1], f32), T([64, 198, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 768], f16), [64, 198, 768], 1, 1), {}) +cnt: 1, ((T([64, 768], f16), [64, 198, 768], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([64, 198, 768], f16), [64, 198, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([64, 12, 198, 64], f16), T([64, 12, 198, 64], f16, stride=(152064, 12672, 1, 198)), T([64, 12, 198, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 2, ((T([64, 1000], f16), [0], True), {}) +cnt: 24, ((T([12672, 768], f16), [0], True), {}) +cnt: 12, ((T([12672, 3072], f16), [0], True), {}) +cnt: 12, ((T([12672, 2304], f16), [0], True), {}) +cnt: 1, ((T([64, 198, 768], f16), [0], True), {}) +cnt: 2, ((T([64, 1, 768], f16, stride=(152064, 768, 1)), [0], True), {}) +Operator: aten.unbind.int +cnt: 12, ((T([3, 64, 12, 198, 64], f16, stride=(768, 456192, 64, 2304, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/densenet121_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/densenet121_training.txt new file mode 100644 index 000000000..983f9ccb1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/densenet121_training.txt @@ -0,0 +1,616 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 121, ((T([], i64), 1), {}) +cnt: 1, ((T([64, 512, 7, 7], f16, stride=(50176, 49, 7, 1)), T([64, 512, 7, 7], f16, stride=(48608, 49, 7, 1))), {}) +cnt: 15, ((T([64, 32, 7, 7], f16, stride=(50176, 49, 7, 1)), T([64, 32, 7, 7], f16, stride=(48608, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(47040, 49, 7, 1))), {}) +cnt: 14, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(47040, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(45472, 49, 7, 1))), {}) +cnt: 13, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(45472, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(43904, 49, 7, 1))), {}) +cnt: 12, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(43904, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(42336, 49, 7, 1))), {}) +cnt: 11, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(42336, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(40768, 49, 7, 1))), {}) +cnt: 10, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(40768, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(39200, 49, 7, 1))), {}) +cnt: 9, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(39200, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(37632, 49, 7, 1))), {}) +cnt: 8, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(37632, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(36064, 49, 7, 1))), {}) +cnt: 7, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(36064, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(34496, 49, 7, 1))), {}) +cnt: 6, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(34496, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(32928, 49, 7, 1))), {}) +cnt: 5, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(32928, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(31360, 49, 7, 1))), {}) +cnt: 4, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(31360, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(29792, 49, 7, 1))), {}) +cnt: 3, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(29792, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(28224, 49, 7, 1))), {}) +cnt: 2, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(28224, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16, stride=(26656, 49, 7, 1))), {}) +cnt: 1, ((T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16, stride=(26656, 49, 7, 1))), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16)), {}) +cnt: 1, ((T([64, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), T([64, 256, 14, 14], f16, stride=(194432, 196, 14, 1))), {}) +cnt: 23, ((T([64, 32, 14, 14], f16, stride=(200704, 196, 14, 1)), T([64, 32, 14, 14], f16, stride=(194432, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(188160, 196, 14, 1))), {}) +cnt: 22, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(188160, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(181888, 196, 14, 1))), {}) +cnt: 21, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(181888, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(175616, 196, 14, 1))), {}) +cnt: 20, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(175616, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(169344, 196, 14, 1))), {}) +cnt: 19, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(169344, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(163072, 196, 14, 1))), {}) +cnt: 18, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(163072, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(156800, 196, 14, 1))), {}) +cnt: 17, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(156800, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(150528, 196, 14, 1))), {}) +cnt: 16, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(150528, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(144256, 196, 14, 1))), {}) +cnt: 15, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(144256, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(137984, 196, 14, 1))), {}) +cnt: 14, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(137984, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(131712, 196, 14, 1))), {}) +cnt: 13, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(131712, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(125440, 196, 14, 1))), {}) +cnt: 12, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(125440, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(119168, 196, 14, 1))), {}) +cnt: 11, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(119168, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(112896, 196, 14, 1))), {}) +cnt: 10, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(112896, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(106624, 196, 14, 1))), {}) +cnt: 9, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(106624, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(100352, 196, 14, 1))), {}) +cnt: 8, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(100352, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(94080, 196, 14, 1))), {}) +cnt: 7, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(94080, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(87808, 196, 14, 1))), {}) +cnt: 6, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(87808, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(81536, 196, 14, 1))), {}) +cnt: 5, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(81536, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(75264, 196, 14, 1))), {}) +cnt: 4, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(75264, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(68992, 196, 14, 1))), {}) +cnt: 3, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(68992, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(62720, 196, 14, 1))), {}) +cnt: 2, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(62720, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16, stride=(56448, 196, 14, 1))), {}) +cnt: 1, ((T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16, stride=(56448, 196, 14, 1))), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16)), {}) +cnt: 1, ((T([64, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), T([64, 128, 28, 28], f16, stride=(376320, 784, 28, 1))), {}) +cnt: 11, ((T([64, 32, 28, 28], f16, stride=(401408, 784, 28, 1)), T([64, 32, 28, 28], f16, stride=(376320, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(351232, 784, 28, 1))), {}) +cnt: 10, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(351232, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(326144, 784, 28, 1))), {}) +cnt: 9, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(326144, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(301056, 784, 28, 1))), {}) +cnt: 8, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(301056, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(275968, 784, 28, 1))), {}) +cnt: 7, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(275968, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(250880, 784, 28, 1))), {}) +cnt: 6, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(250880, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(225792, 784, 28, 1))), {}) +cnt: 5, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(225792, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(200704, 784, 28, 1))), {}) +cnt: 4, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(200704, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(175616, 784, 28, 1))), {}) +cnt: 3, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(175616, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(150528, 784, 28, 1))), {}) +cnt: 2, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(150528, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16, stride=(125440, 784, 28, 1))), {}) +cnt: 1, ((T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16, stride=(125440, 784, 28, 1))), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16)), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 64, 56, 56], f16, stride=(702464, 3136, 56, 1))), {}) +cnt: 5, ((T([64, 32, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 32, 56, 56], f16, stride=(702464, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1))), {}) +cnt: 4, ((T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16, stride=(602112, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16, stride=(501760, 3136, 56, 1))), {}) +cnt: 3, ((T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16, stride=(501760, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16, stride=(401408, 3136, 56, 1))), {}) +cnt: 2, ((T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16, stride=(401408, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16, stride=(301056, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16, stride=(301056, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([64, 128, 56, 56], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), [2, 2], [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 14, 14], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 28, 28], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 56, 56], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 64, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16), T([64, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 128, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16), T([64, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 256, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16), T([64, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 512, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16), T([64, 32, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 128, 56, 56], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([128, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 56, 56], f16), T([128, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 224, 56, 56], f16), T([128, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([64, 128, 28, 28], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 28, 28], f16), T([128, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 224, 28, 28], f16), T([128, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 288, 28, 28], f16), T([128, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 320, 28, 28], f16), T([128, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 352, 28, 28], f16), T([128, 352, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 384, 28, 28], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([128, 416, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 448, 28, 28], f16), T([128, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 480, 28, 28], f16), T([128, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 24, ((T([64, 128, 14, 14], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 288, 14, 14], f16), T([128, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 320, 14, 14], f16), T([128, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 352, 14, 14], f16), T([128, 352, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 416, 14, 14], f16), T([128, 416, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 448, 14, 14], f16), T([128, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 480, 14, 14], f16), T([128, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 544, 14, 14], f16), T([128, 544, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 576, 14, 14], f16), T([128, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 608, 14, 14], f16), T([128, 608, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 640, 14, 14], f16), T([128, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 672, 14, 14], f16), T([128, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 704, 14, 14], f16), T([128, 704, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 736, 14, 14], f16), T([128, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 768, 14, 14], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 800, 14, 14], f16), T([128, 800, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([128, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 864, 14, 14], f16), T([128, 864, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 896, 14, 14], f16), T([128, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 928, 14, 14], f16), T([128, 928, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([128, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 992, 14, 14], f16), T([128, 992, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([64, 128, 7, 7], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 544, 7, 7], f16), T([128, 544, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 576, 7, 7], f16), T([128, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 608, 7, 7], f16), T([128, 608, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 640, 7, 7], f16), T([128, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 672, 7, 7], f16), T([128, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 704, 7, 7], f16), T([128, 704, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 736, 7, 7], f16), T([128, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 768, 7, 7], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 800, 7, 7], f16), T([128, 800, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 832, 7, 7], f16), T([128, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 864, 7, 7], f16), T([128, 864, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 896, 7, 7], f16), T([128, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 928, 7, 7], f16), T([128, 928, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([128, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 992, 7, 7], f16), T([128, 992, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 32, 7, 7], f16, stride=(50176, 49, 7, 1)), T([64, 128, 7, 7], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 992, 7, 7], f16), T([128, 992, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 15, ((T([64, 32, 7, 7], f16), T([64, 128, 7, 7], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 960, 7, 7], f16), T([128, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 928, 7, 7], f16), T([128, 928, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 896, 7, 7], f16), T([128, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 864, 7, 7], f16), T([128, 864, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 832, 7, 7], f16), T([128, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 800, 7, 7], f16), T([128, 800, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 768, 7, 7], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 736, 7, 7], f16), T([128, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 704, 7, 7], f16), T([128, 704, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 672, 7, 7], f16), T([128, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 640, 7, 7], f16), T([128, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 608, 7, 7], f16), T([128, 608, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 576, 7, 7], f16), T([128, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 544, 7, 7], f16), T([128, 544, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 7, 7], f16), T([64, 512, 7, 7], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 14, 14], f16, stride=(200704, 196, 14, 1)), T([64, 128, 14, 14], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 992, 14, 14], f16), T([128, 992, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([64, 32, 14, 14], f16), T([64, 128, 14, 14], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 960, 14, 14], f16), T([128, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 928, 14, 14], f16), T([128, 928, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 896, 14, 14], f16), T([128, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 864, 14, 14], f16), T([128, 864, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 832, 14, 14], f16), T([128, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 800, 14, 14], f16), T([128, 800, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 768, 14, 14], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 736, 14, 14], f16), T([128, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 704, 14, 14], f16), T([128, 704, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 672, 14, 14], f16), T([128, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 640, 14, 14], f16), T([128, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 608, 14, 14], f16), T([128, 608, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 576, 14, 14], f16), T([128, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 544, 14, 14], f16), T([128, 544, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 512, 14, 14], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 480, 14, 14], f16), T([128, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 448, 14, 14], f16), T([128, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 416, 14, 14], f16), T([128, 416, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 384, 14, 14], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 352, 14, 14], f16), T([128, 352, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 320, 14, 14], f16), T([128, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 288, 14, 14], f16), T([128, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 14, 14], f16), T([64, 256, 14, 14], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 28, 28], f16, stride=(401408, 784, 28, 1)), T([64, 128, 28, 28], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 480, 28, 28], f16), T([128, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 11, ((T([64, 32, 28, 28], f16), T([64, 128, 28, 28], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 448, 28, 28], f16), T([128, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 416, 28, 28], f16), T([128, 416, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 384, 28, 28], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 352, 28, 28], f16), T([128, 352, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 320, 28, 28], f16), T([128, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 288, 28, 28], f16), T([128, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 256, 28, 28], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 224, 28, 28], f16), T([128, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 192, 28, 28], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 160, 28, 28], f16), T([128, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 128, 56, 56], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 224, 56, 56], f16), T([128, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([64, 32, 56, 56], f16), T([64, 128, 56, 56], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 192, 56, 56], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 160, 56, 56], f16), T([128, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 96, 56, 56], f16), T([128, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([64, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 1024, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 160, 56, 56], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 13, ((T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 320, 28, 28], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 352, 28, 28], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 384, 28, 28], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 480, 28, 28], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 24, ((T([64, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 320, 14, 14], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 352, 14, 14], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 544, 14, 14], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 608, 14, 14], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 640, 14, 14], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 704, 14, 14], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 736, 14, 14], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 864, 14, 14], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 928, 14, 14], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 992, 14, 14], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 16, ((T([64, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 544, 7, 7], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 608, 7, 7], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 640, 7, 7], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 704, 7, 7], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 800, 7, 7], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 864, 7, 7], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 928, 7, 7], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 992, 7, 7], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 16, ((T([64, 128, 7, 7], f16), T([64, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 992, 7, 7], f16), T([64, 992, 7, 7], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f32), T([992], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 928, 7, 7], f16), T([64, 928, 7, 7], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f32), T([928], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 896, 7, 7], f16), T([64, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 864, 7, 7], f16), T([64, 864, 7, 7], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 832, 7, 7], f16), T([64, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 800, 7, 7], f16), T([64, 800, 7, 7], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 736, 7, 7], f16), T([64, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f32), T([736], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 704, 7, 7], f16), T([64, 704, 7, 7], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f32), T([704], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 672, 7, 7], f16), T([64, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 640, 7, 7], f16), T([64, 640, 7, 7], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 608, 7, 7], f16), T([64, 608, 7, 7], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f32), T([608], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 576, 7, 7], f16), T([64, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 544, 7, 7], f16), T([64, 544, 7, 7], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f32), T([544], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 24, ((T([64, 128, 14, 14], f16), T([64, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 992, 14, 14], f16), T([64, 992, 14, 14], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f32), T([992], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 928, 14, 14], f16), T([64, 928, 14, 14], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f32), T([928], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 896, 14, 14], f16), T([64, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 864, 14, 14], f16), T([64, 864, 14, 14], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([64, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 800, 14, 14], f16), T([64, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 768, 14, 14], f16), T([64, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 736, 14, 14], f16), T([64, 736, 14, 14], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f32), T([736], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 704, 14, 14], f16), T([64, 704, 14, 14], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f32), T([704], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 672, 14, 14], f16), T([64, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 640, 14, 14], f16), T([64, 640, 14, 14], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 608, 14, 14], f16), T([64, 608, 14, 14], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f32), T([608], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 576, 14, 14], f16), T([64, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 544, 14, 14], f16), T([64, 544, 14, 14], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f32), T([544], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 448, 14, 14], f16), T([64, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 416, 14, 14], f16), T([64, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 352, 14, 14], f16), T([64, 352, 14, 14], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f32), T([352], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 320, 14, 14], f16), T([64, 320, 14, 14], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 288, 14, 14], f16), T([64, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 13, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 480, 28, 28], f16), T([64, 480, 28, 28], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 448, 28, 28], f16), T([64, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([64, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 384, 28, 28], f16), T([64, 384, 28, 28], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 352, 28, 28], f16), T([64, 352, 28, 28], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f32), T([352], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 320, 28, 28], f16), T([64, 320, 28, 28], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 288, 28, 28], f16), T([64, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 224, 28, 28], f16), T([64, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 160, 28, 28], f16), T([64, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 224, 56, 56], f16), T([64, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 160, 56, 56], f16), T([64, 160, 56, 56], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([64, 64, 112, 112], f16),), {}) +cnt: 1, ((T([64, 64, 56, 56], f16),), {}) +cnt: 7, ((T([64, 128, 56, 56], f16),), {}) +cnt: 1, ((T([64, 96, 56, 56], f16),), {}) +cnt: 1, ((T([64, 160, 56, 56], f16),), {}) +cnt: 1, ((T([64, 192, 56, 56], f16),), {}) +cnt: 1, ((T([64, 224, 56, 56], f16),), {}) +cnt: 1, ((T([64, 256, 56, 56], f16),), {}) +cnt: 13, ((T([64, 128, 28, 28], f16),), {}) +cnt: 1, ((T([64, 160, 28, 28], f16),), {}) +cnt: 1, ((T([64, 192, 28, 28], f16),), {}) +cnt: 1, ((T([64, 224, 28, 28], f16),), {}) +cnt: 1, ((T([64, 256, 28, 28], f16),), {}) +cnt: 1, ((T([64, 288, 28, 28], f16),), {}) +cnt: 1, ((T([64, 320, 28, 28], f16),), {}) +cnt: 1, ((T([64, 352, 28, 28], f16),), {}) +cnt: 1, ((T([64, 384, 28, 28], f16),), {}) +cnt: 1, ((T([64, 416, 28, 28], f16),), {}) +cnt: 1, ((T([64, 448, 28, 28], f16),), {}) +cnt: 1, ((T([64, 480, 28, 28], f16),), {}) +cnt: 1, ((T([64, 512, 28, 28], f16),), {}) +cnt: 1, ((T([64, 256, 14, 14], f16),), {}) +cnt: 24, ((T([64, 128, 14, 14], f16),), {}) +cnt: 1, ((T([64, 288, 14, 14], f16),), {}) +cnt: 1, ((T([64, 320, 14, 14], f16),), {}) +cnt: 1, ((T([64, 352, 14, 14], f16),), {}) +cnt: 1, ((T([64, 384, 14, 14], f16),), {}) +cnt: 1, ((T([64, 416, 14, 14], f16),), {}) +cnt: 1, ((T([64, 448, 14, 14], f16),), {}) +cnt: 1, ((T([64, 480, 14, 14], f16),), {}) +cnt: 1, ((T([64, 512, 14, 14], f16),), {}) +cnt: 1, ((T([64, 544, 14, 14], f16),), {}) +cnt: 1, ((T([64, 576, 14, 14], f16),), {}) +cnt: 1, ((T([64, 608, 14, 14], f16),), {}) +cnt: 1, ((T([64, 640, 14, 14], f16),), {}) +cnt: 1, ((T([64, 672, 14, 14], f16),), {}) +cnt: 1, ((T([64, 704, 14, 14], f16),), {}) +cnt: 1, ((T([64, 736, 14, 14], f16),), {}) +cnt: 1, ((T([64, 768, 14, 14], f16),), {}) +cnt: 1, ((T([64, 800, 14, 14], f16),), {}) +cnt: 1, ((T([64, 832, 14, 14], f16),), {}) +cnt: 1, ((T([64, 864, 14, 14], f16),), {}) +cnt: 1, ((T([64, 896, 14, 14], f16),), {}) +cnt: 1, ((T([64, 928, 14, 14], f16),), {}) +cnt: 1, ((T([64, 960, 14, 14], f16),), {}) +cnt: 1, ((T([64, 992, 14, 14], f16),), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([64, 512, 7, 7], f16),), {}) +cnt: 16, ((T([64, 128, 7, 7], f16),), {}) +cnt: 1, ((T([64, 544, 7, 7], f16),), {}) +cnt: 1, ((T([64, 576, 7, 7], f16),), {}) +cnt: 1, ((T([64, 608, 7, 7], f16),), {}) +cnt: 1, ((T([64, 640, 7, 7], f16),), {}) +cnt: 1, ((T([64, 672, 7, 7], f16),), {}) +cnt: 1, ((T([64, 704, 7, 7], f16),), {}) +cnt: 1, ((T([64, 736, 7, 7], f16),), {}) +cnt: 1, ((T([64, 768, 7, 7], f16),), {}) +cnt: 1, ((T([64, 800, 7, 7], f16),), {}) +cnt: 1, ((T([64, 832, 7, 7], f16),), {}) +cnt: 1, ((T([64, 864, 7, 7], f16),), {}) +cnt: 1, ((T([64, 896, 7, 7], f16),), {}) +cnt: 1, ((T([64, 928, 7, 7], f16),), {}) +cnt: 1, ((T([64, 960, 7, 7], f16),), {}) +cnt: 1, ((T([64, 992, 7, 7], f16),), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16), 0), {}) +cnt: 16, ((T([64, 128, 7, 7], f16), T([64, 128, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 992, 7, 7], f16), T([64, 992, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 928, 7, 7], f16), T([64, 928, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 896, 7, 7], f16), T([64, 896, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 864, 7, 7], f16), T([64, 864, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 832, 7, 7], f16), T([64, 832, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 800, 7, 7], f16), T([64, 800, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 736, 7, 7], f16), T([64, 736, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 704, 7, 7], f16), T([64, 704, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 672, 7, 7], f16), T([64, 672, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 640, 7, 7], f16), T([64, 640, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 608, 7, 7], f16), T([64, 608, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 576, 7, 7], f16), T([64, 576, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 544, 7, 7], f16), T([64, 544, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), 0), {}) +cnt: 24, ((T([64, 128, 14, 14], f16), T([64, 128, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 992, 14, 14], f16), T([64, 992, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 928, 14, 14], f16), T([64, 928, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 896, 14, 14], f16), T([64, 896, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 864, 14, 14], f16), T([64, 864, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([64, 832, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 800, 14, 14], f16), T([64, 800, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 768, 14, 14], f16), T([64, 768, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 736, 14, 14], f16), T([64, 736, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 704, 14, 14], f16), T([64, 704, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 672, 14, 14], f16), T([64, 672, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 640, 14, 14], f16), T([64, 640, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 608, 14, 14], f16), T([64, 608, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 576, 14, 14], f16), T([64, 576, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 544, 14, 14], f16), T([64, 544, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 448, 14, 14], f16), T([64, 448, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 416, 14, 14], f16), T([64, 416, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 352, 14, 14], f16), T([64, 352, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 320, 14, 14], f16), T([64, 320, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 288, 14, 14], f16), T([64, 288, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), 0), {}) +cnt: 13, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 480, 28, 28], f16), T([64, 480, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 448, 28, 28], f16), T([64, 448, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([64, 416, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 384, 28, 28], f16), T([64, 384, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 352, 28, 28], f16), T([64, 352, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 320, 28, 28], f16), T([64, 320, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 288, 28, 28], f16), T([64, 288, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 224, 28, 28], f16), T([64, 224, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 160, 28, 28], f16), T([64, 160, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), 0), {}) +cnt: 7, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 224, 56, 56], f16), T([64, 224, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 160, 56, 56], f16), T([64, 160, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dla102_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dla102_training.txt new file mode 100644 index 000000000..68226f899 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dla102_training.txt @@ -0,0 +1,189 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16, stride=(125440, 49, 7, 1))), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16, stride=(125440, 49, 7, 1)), T([64, 1024, 7, 7], f16)), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16)), {}) +cnt: 1, ((T([64, 512, 7, 7], f16, stride=(125440, 49, 7, 1)), T([64, 512, 7, 7], f16)), {}) +cnt: 16, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16, stride=(551936, 196, 14, 1))), {}) +cnt: 4, ((T([64, 512, 14, 14], f16, stride=(551936, 196, 14, 1)), T([64, 512, 14, 14], f16)), {}) +cnt: 4, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16, stride=(200704, 196, 14, 1))), {}) +cnt: 4, ((T([64, 512, 14, 14], f16, stride=(200704, 196, 14, 1)), T([64, 512, 14, 14], f16)), {}) +cnt: 2, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16, stride=(301056, 196, 14, 1))), {}) +cnt: 4, ((T([64, 512, 14, 14], f16, stride=(301056, 196, 14, 1)), T([64, 512, 14, 14], f16)), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16, stride=(401408, 196, 14, 1))), {}) +cnt: 3, ((T([64, 512, 14, 14], f16, stride=(401408, 196, 14, 1)), T([64, 512, 14, 14], f16)), {}) +cnt: 9, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16)), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16, stride=(903168, 784, 28, 1))), {}) +cnt: 3, ((T([64, 256, 28, 28], f16, stride=(903168, 784, 28, 1)), T([64, 256, 28, 28], f16)), {}) +cnt: 2, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16, stride=(401408, 784, 28, 1))), {}) +cnt: 2, ((T([64, 256, 28, 28], f16, stride=(401408, 784, 28, 1)), T([64, 256, 28, 28], f16)), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16, stride=(602112, 784, 28, 1))), {}) +cnt: 2, ((T([64, 256, 28, 28], f16, stride=(602112, 784, 28, 1)), T([64, 256, 28, 28], f16)), {}) +cnt: 3, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16)), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16, stride=(802816, 3136, 56, 1))), {}) +cnt: 1, ((T([64, 128, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 128, 56, 56], f16)), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16)), {}) +Operator: aten.add_.Tensor +cnt: 105, ((T([], i64), 1), {}) +cnt: 3, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16)), {}) +cnt: 12, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16)), {}) +cnt: 24, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)), {}) +cnt: 3, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16)], 1), {}) +cnt: 2, (([T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([64, 128, 28, 28], f16), T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16)], 1), {}) +cnt: 4, (([T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)], 1), {}) +cnt: 2, (([T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 256, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16), T([64, 512, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([16, 3, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 16, 224, 224], f16), T([16, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 16, 224, 224], f16), T([32, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 56, 56], f16), T([128, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([64, 128, 28, 28], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([64, 256, 28, 28], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 768, 28, 28], f16), T([256, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1152, 28, 28], f16), T([256, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 17, ((T([64, 256, 14, 14], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 15, ((T([64, 512, 14, 14], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 15, ((T([64, 256, 14, 14], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 1536, 14, 14], f16), T([512, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 2048, 14, 14], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 2816, 14, 14], f16), T([512, 2816, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 7, 7], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 2560, 7, 7], f16), T([1024, 2560, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 1, 1], f16), T([1000, 1024, 1, 1], f16), T([1000], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 1000, 1, 1], f16), T([64, 1024, 1, 1], f16), T([1000, 1024, 1, 1], f16), [1000], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 2560, 7, 7], f16), T([1024, 2560, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1024, 7, 7], f16), T([64, 512, 7, 7], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 1024, 7, 7], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 2816, 14, 14], f16), T([512, 2816, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 17, ((T([64, 512, 14, 14], f16), T([64, 256, 14, 14], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 15, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 15, ((T([64, 256, 14, 14], f16), T([64, 512, 14, 14], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 512, 14, 14], f16), T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 512, 14, 14], f16), T([64, 1536, 14, 14], f16), T([512, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 2048, 14, 14], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 28, 28], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 1152, 28, 28], f16), T([256, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 9, ((T([64, 256, 28, 28], f16), T([64, 128, 28, 28], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([64, 256, 28, 28], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 256, 28, 28], f16), T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 768, 28, 28], f16), T([256, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 56, 56], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 128, 56, 56], f16), T([64, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 128, 56, 56], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 32, 112, 112], f16), T([64, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 32, 56, 56], f16), T([128, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 16, 224, 224], f16), T([32, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 16, 224, 224], f16), T([64, 16, 224, 224], f16), T([16, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 16, 224, 224], f16), T([64, 3, 224, 224], f16), T([16, 3, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 32, 112, 112], f16), [2, 2], [2, 2]), {}) +cnt: 3, ((T([64, 128, 56, 56], f16), [2, 2], [2, 2]), {}) +cnt: 4, ((T([64, 256, 28, 28], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), [2, 2], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 14, 14], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 512, 7, 7], i64)), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 28, 28], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 256, 14, 14], i64)), {}) +cnt: 1, ((T([64, 256, 14, 14], f16, stride=(551936, 196, 14, 1)), T([64, 256, 28, 28], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 256, 14, 14], i64)), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 56, 56], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 128, 28, 28], i64)), {}) +cnt: 1, ((T([64, 128, 28, 28], f16, stride=(903168, 784, 28, 1)), T([64, 128, 56, 56], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 128, 28, 28], i64)), {}) +cnt: 1, ((T([64, 32, 56, 56], f16), T([64, 32, 112, 112], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 32, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 1024, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([64, 16, 224, 224], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 14, ((T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 15, ((T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 26, ((T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 31, ((T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 26, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 31, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 14, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 15, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 16, 224, 224], f16), T([64, 16, 224, 224], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([64, 16, 224, 224], f16),), {}) +cnt: 1, ((T([64, 32, 112, 112], f16),), {}) +cnt: 1, ((T([64, 64, 112, 112], f16),), {}) +cnt: 3, ((T([64, 64, 56, 56], f16),), {}) +cnt: 4, ((T([64, 128, 56, 56], f16),), {}) +cnt: 15, ((T([64, 128, 28, 28], f16),), {}) +cnt: 13, ((T([64, 256, 28, 28], f16),), {}) +cnt: 31, ((T([64, 256, 14, 14], f16),), {}) +cnt: 25, ((T([64, 512, 14, 14], f16),), {}) +cnt: 3, ((T([64, 512, 7, 7], f16),), {}) +cnt: 3, ((T([64, 1024, 7, 7], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([64, 1024, 7, 7], f16), T([64, 1024, 7, 7], f16), 0), {}) +cnt: 3, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), 0), {}) +cnt: 25, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), 0), {}) +cnt: 31, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), 0), {}) +cnt: 13, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), 0), {}) +cnt: 15, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), 0), {}) +cnt: 4, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), 0), {}) +cnt: 3, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), 0), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), 0), {}) +cnt: 2, ((T([64, 16, 224, 224], f16), T([64, 16, 224, 224], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dm_nfnet_f0_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dm_nfnet_f0_training.txt new file mode 100644 index 000000000..683e671e2 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dm_nfnet_f0_training.txt @@ -0,0 +1,296 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 3, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 6, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 18, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 8, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 3072], f16), T([3072, 1000], f16, stride=(1, 3072))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 1536, 12, 12], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 1536, 6, 6], f16), T([128, 1536, 12, 12], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 512, 12, 12], f16), T([128, 512, 24, 24], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 256, 48, 48], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 192, 192], f16),), {}) +cnt: 1, ((T([128, 256, 48, 48], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([128, 3, 192, 192], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 12, 12], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 13, 13], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 768, 25, 25], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 256, 49, 49], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 64, 97, 97], f16), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 193, 193], f16), T([16, 3, 3, 3], f16), T([16], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([32, 16, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([64, 32, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 97, 97], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([256, 128, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([256, 128, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([512, 256, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([256, 256, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 49, 49], f16), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 2), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([512, 256, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 12, 12], f16), T([1536, 512, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), T([768, 512, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 25, 25], f16), T([768, 128, 3, 3], f16), T([768], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 6), {}) +cnt: 11, ((T([128, 768, 12, 12], f16), T([768, 128, 3, 3], f16), T([768], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 6, ((T([128, 768, 12, 12], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 768, 1, 1], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([1536, 1536, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 13, 13], f16), T([768, 128, 3, 3], f16), T([768], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 6), {}) +cnt: 5, ((T([128, 768, 6, 6], f16), T([768, 128, 3, 3], f16), T([768], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 3, ((T([128, 768, 6, 6], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([3072, 1536, 1, 1], f16), T([3072], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 3072, 6, 6], f16), T([128, 1536, 6, 6], f16), T([3072, 1536, 1, 1], f16), [3072], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 768, 1, 1], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 768, 1, 1], f16), T([128, 1536, 1, 1], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([128, 768, 6, 6], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 768, 6, 6], f16), T([128, 768, 6, 6], f16), T([768, 128, 3, 3], f16), [768], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 2, ((T([128, 768, 6, 6], f16), T([128, 1536, 6, 6], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 6, 6], f16), T([128, 768, 13, 13], f16), T([768, 128, 3, 3], f16), [768], [2, 2], [0, 0], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 6, ((T([128, 768, 12, 12], f16), T([128, 1536, 12, 12], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16), T([1536, 1536, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([128, 768, 12, 12], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 11, ((T([128, 768, 12, 12], f16), T([128, 768, 12, 12], f16), T([768, 128, 3, 3], f16), [768], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 12, 12], f16), T([128, 768, 25, 25], f16), T([768, 128, 3, 3], f16), [768], [2, 2], [0, 0], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), T([128, 512, 24, 24], f16), T([768, 512, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 12, 12], f16), T([128, 512, 12, 12], f16), T([1536, 512, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 256, 1, 1], f16), T([512, 256, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([128, 512, 1, 1], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 24, 24], f16), T([128, 256, 24, 24], f16), T([512, 256, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([128, 256, 24, 24], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 512, 24, 24], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 256, 49, 49], f16), T([256, 128, 3, 3], f16), [256], [2, 2], [0, 0], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16), T([256, 256, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 128, 1, 1], f16), T([256, 128, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 128, 48, 48], f16), T([256, 128, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16), T([128, 128, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 64, 97, 97], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), T([128, 32, 96, 96], f16), T([64, 32, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 16, 96, 96], f16), T([32, 16, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 3, 193, 193], f16), T([16, 3, 3, 3], f16), [16], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 192, 192], f16), T([128, 3, 192, 192], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 3072, 6, 6], f16, stride=(3072, 1, 0, 0)), 36), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16, stride=(1536, 1, 0, 0)), 36), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16, stride=(1536, 1, 0, 0)), 144), {}) +cnt: 2, ((T([128, 512, 24, 24], f16, stride=(512, 1, 0, 0)), 576), {}) +cnt: 1, ((T([128, 256, 48, 48], f16, stride=(256, 1, 0, 0)), 2304), {}) +Operator: aten.gelu.default +cnt: 1, ((T([128, 16, 96, 96], f16),), {}) +cnt: 1, ((T([128, 32, 96, 96], f16),), {}) +cnt: 1, ((T([128, 64, 96, 96], f16),), {}) +cnt: 4, ((T([128, 128, 48, 48], f16),), {}) +cnt: 2, ((T([128, 256, 48, 48], f16),), {}) +cnt: 5, ((T([128, 256, 24, 24], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 1, ((T([128, 768, 24, 24], f16),), {}) +cnt: 18, ((T([128, 768, 12, 12], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 8, ((T([128, 768, 6, 6], f16),), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16),), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([128, 3072, 6, 6], f16), T([128, 3072, 6, 6], f16)), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), T([128, 768, 6, 6], f16)), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), T([128, 768, 12, 12], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), T([128, 768, 24, 24], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), T([128, 256, 24, 24], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16)), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), T([128, 64, 96, 96], f16)), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16)), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 16, 96, 96], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 3072], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 3072], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([16, 1, 1, 1], f16), 0.19245008972987526), {}) +cnt: 2, ((T([32, 1, 1, 1], f16), 0.08333333333333333), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.05892556509887896), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.041666666666666664), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), 1.0), {}) +cnt: 4, ((T([256, 1, 1, 1], f16), 0.08838834764831845), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.08838834764831845), {}) +cnt: 4, ((T([128, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 2.0), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 0.2), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 0.9805806756909201), {}) +cnt: 6, ((T([512, 1, 1, 1], f16), 0.0625), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.0625), {}) +cnt: 8, ((T([256, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), 2.0), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), 0.2), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 0.9805806756909201), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 0.9622504486493761), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 2, ((T([768, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 36, ((T([768, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 18, ((T([1536, 1, 1, 1], f16), 0.03608439182435161), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), 2.0), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9805806756909201), {}) +cnt: 16, ((T([768, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9622504486493761), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9449111825230679), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9284766908852592), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9128709291752768), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.8980265101338745), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), 2.0), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 0.9805806756909201), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 0.9622504486493761), {}) +cnt: 2, ((T([3072, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([], f16)), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 1.7015043497085571), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), 1.7015043497085571), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([], f16)), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 1.7015043497085571), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([], f16)), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([], f16)), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), 1.7015043497085571), {}) +Operator: aten.mul_.Tensor +cnt: 1, ((T([128, 16, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), 1.7015043497085571), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 1.7015043497085571), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), 1.7015043497085571), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), 1.7015043497085571), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), 1.7015043497085571), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([], f16)), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), 1.7015043497085571), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([1, 16, 27], f16), T([16], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 32, 144], f16), T([32], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 64, 288], f16), T([64], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 576], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 256, 128], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 128], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 128, 1152], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 3, ((T([1, 512, 256], f16), T([512], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 256], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 4, ((T([1, 256, 1152], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 512], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 768, 512], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 18, ((T([1, 768, 1152], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 9, ((T([1, 1536, 768], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 8, ((T([1, 768, 1536], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 3072, 1536], f16), T([3072], f16), None, None, None, True, 0.0, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([1, 3072, 1536], f16), T([1, 3072, 1536], f16), T([3072], f16), None, None, T([3072], f32), T([3072], f32), True, 1e-05, [True, True, False]), {}) +cnt: 9, ((T([1, 1536, 768], f16), T([1, 1536, 768], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 18, ((T([1, 768, 1152], f16), T([1, 768, 1152], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 8, ((T([1, 768, 1536], f16), T([1, 768, 1536], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1, 1536, 1536], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 768, 512], f16), T([1, 768, 512], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1, 1536, 512], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 3, ((T([1, 512, 256], f16), T([1, 512, 256], f16), T([512], f16), None, None, T([512], f32), T([512], f32), True, 1e-05, [True, True, False]), {}) +cnt: 4, ((T([1, 256, 1152], f16), T([1, 256, 1152], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 512], f16), T([1, 256, 512], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 256], f16), T([1, 256, 256], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 256, 128], f16), T([1, 256, 128], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 128, 1152], f16), T([1, 128, 1152], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 128], f16), T([1, 128, 128], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 576], f16), T([1, 128, 576], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 64, 288], f16), T([1, 64, 288], f16), T([64], f16), None, None, T([64], f32), T([64], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 32, 144], f16), T([1, 32, 144], f16), T([32], f16), None, None, T([32], f32), T([32], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 27], f16), T([1, 16, 27], f16), T([16], f16), None, None, T([16], f32), T([16], f32), True, 1e-05, [True, True, False]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 128, 1, 1], f16),), {}) +cnt: 2, ((T([128, 256, 1, 1], f16),), {}) +cnt: 9, ((T([128, 768, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 256, 1, 1], f16),), {}) +cnt: 2, ((T([128, 512, 1, 1], f16),), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 3, ((T([128, 1536, 6, 6], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 1, ((T([128, 256, 48, 48], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 9, ((T([128, 768, 1, 1], f16), T([128, 768, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 128, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dpn107_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dpn107_training.txt new file mode 100644 index 000000000..d1572e4cd --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/dpn107_training.txt @@ -0,0 +1,545 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 111, ((T([], i64), 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16, stride=(928256, 3136, 56, 1)), T([32, 256, 56, 56], f16, stride=(865536, 3136, 56, 1))), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16, stride=(865536, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16, stride=(501760, 784, 28, 1)), T([32, 512, 28, 28], f16, stride=(451584, 784, 28, 1))), {}) +cnt: 7, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(451584, 784, 28, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16, stride=(225792, 196, 14, 1)), T([32, 1024, 14, 14], f16, stride=(213248, 196, 14, 1))), {}) +cnt: 19, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(213248, 196, 14, 1))), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(112896, 49, 7, 1)), T([32, 2048, 7, 7], f16, stride=(106624, 49, 7, 1))), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16, stride=(106624, 49, 7, 1))), {}) +cnt: 3, ((T([32, 2176, 7, 7], f16), T([32, 2176, 7, 7], f16)), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(131712, 49, 7, 1)), T([32, 2048, 7, 7], f16, stride=(125440, 49, 7, 1))), {}) +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(131712, 49, 7, 1)), T([32, 512, 7, 7], f16, stride=(125440, 49, 7, 1))), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16, stride=(119168, 49, 7, 1))), {}) +cnt: 1, ((T([32, 384, 7, 7], f16, stride=(25088, 49, 7, 1)), T([32, 384, 7, 7], f16, stride=(119168, 49, 7, 1))), {}) +cnt: 1, ((T([32, 2304, 7, 7], f16), T([32, 2304, 7, 7], f16)), {}) +cnt: 1, ((T([32, 2432, 14, 14], f16), T([32, 2432, 14, 14], f16)), {}) +cnt: 20, ((T([32, 1088, 14, 14], f16), T([32, 1088, 14, 14], f16)), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16, stride=(476672, 196, 14, 1)), T([32, 1024, 14, 14], f16, stride=(464128, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16, stride=(476672, 196, 14, 1)), T([32, 1344, 14, 14], f16, stride=(464128, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(451584, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16, stride=(263424, 196, 14, 1)), T([32, 1280, 14, 14], f16, stride=(451584, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(439040, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16, stride=(250880, 196, 14, 1)), T([32, 1216, 14, 14], f16, stride=(439040, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(426496, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1152, 14, 14], f16, stride=(238336, 196, 14, 1)), T([32, 1152, 14, 14], f16, stride=(426496, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(413952, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1088, 14, 14], f16, stride=(225792, 196, 14, 1)), T([32, 1088, 14, 14], f16, stride=(413952, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(401408, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16, stride=(213248, 196, 14, 1)), T([32, 1024, 14, 14], f16, stride=(401408, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(388864, 196, 14, 1))), {}) +cnt: 1, ((T([32, 960, 14, 14], f16, stride=(200704, 196, 14, 1)), T([32, 960, 14, 14], f16, stride=(388864, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(376320, 196, 14, 1))), {}) +cnt: 1, ((T([32, 896, 14, 14], f16, stride=(188160, 196, 14, 1)), T([32, 896, 14, 14], f16, stride=(376320, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(363776, 196, 14, 1))), {}) +cnt: 1, ((T([32, 832, 14, 14], f16, stride=(175616, 196, 14, 1)), T([32, 832, 14, 14], f16, stride=(363776, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(351232, 196, 14, 1))), {}) +cnt: 1, ((T([32, 768, 14, 14], f16, stride=(163072, 196, 14, 1)), T([32, 768, 14, 14], f16, stride=(351232, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(338688, 196, 14, 1))), {}) +cnt: 1, ((T([32, 704, 14, 14], f16, stride=(150528, 196, 14, 1)), T([32, 704, 14, 14], f16, stride=(338688, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(326144, 196, 14, 1))), {}) +cnt: 1, ((T([32, 640, 14, 14], f16, stride=(137984, 196, 14, 1)), T([32, 640, 14, 14], f16, stride=(326144, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(313600, 196, 14, 1))), {}) +cnt: 1, ((T([32, 576, 14, 14], f16, stride=(125440, 196, 14, 1)), T([32, 576, 14, 14], f16, stride=(313600, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(301056, 196, 14, 1))), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(112896, 196, 14, 1)), T([32, 512, 14, 14], f16, stride=(301056, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(288512, 196, 14, 1))), {}) +cnt: 1, ((T([32, 448, 14, 14], f16, stride=(100352, 196, 14, 1)), T([32, 448, 14, 14], f16, stride=(288512, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(275968, 196, 14, 1))), {}) +cnt: 1, ((T([32, 384, 14, 14], f16, stride=(87808, 196, 14, 1)), T([32, 384, 14, 14], f16, stride=(275968, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(263424, 196, 14, 1))), {}) +cnt: 1, ((T([32, 320, 14, 14], f16, stride=(75264, 196, 14, 1)), T([32, 320, 14, 14], f16, stride=(263424, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(250880, 196, 14, 1))), {}) +cnt: 1, ((T([32, 256, 14, 14], f16, stride=(62720, 196, 14, 1)), T([32, 256, 14, 14], f16, stride=(250880, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16, stride=(238336, 196, 14, 1))), {}) +cnt: 1, ((T([32, 192, 14, 14], f16, stride=(50176, 196, 14, 1)), T([32, 192, 14, 14], f16, stride=(238336, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1152, 14, 14], f16), T([32, 1152, 14, 14], f16)), {}) +cnt: 1, ((T([32, 1152, 28, 28], f16), T([32, 1152, 28, 28], f16)), {}) +cnt: 8, ((T([32, 576, 28, 28], f16), T([32, 576, 28, 28], f16)), {}) +cnt: 1, ((T([32, 512, 28, 28], f16, stride=(903168, 784, 28, 1)), T([32, 512, 28, 28], f16, stride=(852992, 784, 28, 1))), {}) +cnt: 1, ((T([32, 576, 28, 28], f16, stride=(903168, 784, 28, 1)), T([32, 576, 28, 28], f16, stride=(852992, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(802816, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16, stride=(451584, 784, 28, 1)), T([32, 512, 28, 28], f16, stride=(802816, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(752640, 784, 28, 1))), {}) +cnt: 1, ((T([32, 448, 28, 28], f16, stride=(401408, 784, 28, 1)), T([32, 448, 28, 28], f16, stride=(752640, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(702464, 784, 28, 1))), {}) +cnt: 1, ((T([32, 384, 28, 28], f16, stride=(351232, 784, 28, 1)), T([32, 384, 28, 28], f16, stride=(702464, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(652288, 784, 28, 1))), {}) +cnt: 1, ((T([32, 320, 28, 28], f16, stride=(301056, 784, 28, 1)), T([32, 320, 28, 28], f16, stride=(652288, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(602112, 784, 28, 1))), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(250880, 784, 28, 1)), T([32, 256, 28, 28], f16, stride=(602112, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16, stride=(551936, 784, 28, 1))), {}) +cnt: 1, ((T([32, 192, 28, 28], f16, stride=(200704, 784, 28, 1)), T([32, 192, 28, 28], f16, stride=(551936, 784, 28, 1))), {}) +cnt: 1, ((T([32, 640, 28, 28], f16), T([32, 640, 28, 28], f16)), {}) +cnt: 1, ((T([32, 376, 56, 56], f16), T([32, 376, 56, 56], f16)), {}) +cnt: 4, ((T([32, 276, 56, 56], f16), T([32, 276, 56, 56], f16)), {}) +cnt: 1, ((T([32, 256, 56, 56], f16, stride=(1179136, 3136, 56, 1)), T([32, 256, 56, 56], f16, stride=(1116416, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 100, 56, 56], f16, stride=(1179136, 3136, 56, 1)), T([32, 100, 56, 56], f16, stride=(1116416, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16, stride=(1053696, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 80, 56, 56], f16, stride=(313600, 3136, 56, 1)), T([32, 80, 56, 56], f16, stride=(1053696, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16, stride=(990976, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 60, 56, 56], f16, stride=(250880, 3136, 56, 1)), T([32, 60, 56, 56], f16, stride=(990976, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 296, 56, 56], f16), T([32, 296, 56, 56], f16)), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([32, 40, 56, 56], f16, stride=(928256, 3136, 56, 1)), T([32, 20, 56, 56], f16, stride=(865536, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([32, 256, 56, 56], f16), T([32, 60, 56, 56], f16)], 1), {}) +cnt: 1, (([T([32, 60, 56, 56], f16), T([32, 20, 56, 56], f16, stride=(865536, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([32, 256, 56, 56], f16), T([32, 80, 56, 56], f16)], 1), {}) +cnt: 1, (([T([32, 80, 56, 56], f16), T([32, 20, 56, 56], f16, stride=(865536, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([32, 256, 56, 56], f16), T([32, 100, 56, 56], f16)], 1), {}) +cnt: 1, (([T([32, 100, 56, 56], f16), T([32, 20, 56, 56], f16, stride=(865536, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([32, 256, 56, 56], f16), T([32, 120, 56, 56], f16)], 1), {}) +cnt: 1, (([T([32, 128, 28, 28], f16, stride=(501760, 784, 28, 1)), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 192, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 192, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 256, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 256, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 320, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 320, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 384, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 384, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 448, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 448, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 576, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 576, 28, 28], f16), T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1))], 1), {}) +cnt: 1, (([T([32, 512, 28, 28], f16), T([32, 640, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 128, 14, 14], f16, stride=(225792, 196, 14, 1)), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 192, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 192, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 256, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 256, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 320, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 320, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 384, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 384, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 448, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 448, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 512, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 512, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 576, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 576, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 640, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 640, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 704, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 704, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 768, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 768, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 832, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 832, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 896, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 896, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 960, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 960, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1088, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1088, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1152, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1152, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1216, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1216, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1280, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1280, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1344, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 1344, 14, 14], f16), T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1))], 1), {}) +cnt: 1, (([T([32, 1024, 14, 14], f16), T([32, 1408, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 256, 7, 7], f16, stride=(112896, 49, 7, 1)), T([32, 128, 7, 7], f16, stride=(106624, 49, 7, 1))], 1), {}) +cnt: 1, (([T([32, 2048, 7, 7], f16), T([32, 384, 7, 7], f16)], 1), {}) +cnt: 1, (([T([32, 384, 7, 7], f16), T([32, 128, 7, 7], f16, stride=(106624, 49, 7, 1))], 1), {}) +cnt: 1, (([T([32, 2048, 7, 7], f16), T([32, 512, 7, 7], f16)], 1), {}) +cnt: 1, (([T([32, 512, 7, 7], f16), T([32, 128, 7, 7], f16, stride=(106624, 49, 7, 1))], 1), {}) +cnt: 1, (([T([32, 2048, 7, 7], f16), T([32, 640, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([128, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([296, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([200, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 200, 56, 56], f16), T([200, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 4, ((T([32, 200, 56, 56], f16), T([276, 200, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 316, 56, 56], f16), T([200, 316, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 336, 56, 56], f16), T([200, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 356, 56, 56], f16), T([200, 356, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 376, 56, 56], f16), T([640, 376, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 376, 56, 56], f16), T([400, 376, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 400, 56, 56], f16), T([400, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 8, ((T([32, 400, 28, 28], f16), T([576, 400, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 704, 28, 28], f16), T([400, 704, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([32, 400, 28, 28], f16), T([400, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 1, ((T([32, 768, 28, 28], f16), T([400, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 832, 28, 28], f16), T([400, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([400, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 960, 28, 28], f16), T([400, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16), T([400, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1088, 28, 28], f16), T([400, 1088, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1152, 28, 28], f16), T([1152, 1152, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1152, 28, 28], f16), T([800, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 800, 28, 28], f16), T([800, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 20, ((T([32, 800, 14, 14], f16), T([1088, 800, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16), T([800, 1216, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 19, ((T([32, 800, 14, 14], f16), T([800, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16), T([800, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16), T([800, 1344, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1408, 14, 14], f16), T([800, 1408, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16), T([800, 1472, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1536, 14, 14], f16), T([800, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1600, 14, 14], f16), T([800, 1600, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1664, 14, 14], f16), T([800, 1664, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16), T([800, 1728, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1792, 14, 14], f16), T([800, 1792, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1856, 14, 14], f16), T([800, 1856, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1920, 14, 14], f16), T([800, 1920, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1984, 14, 14], f16), T([800, 1984, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16), T([800, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2112, 14, 14], f16), T([800, 2112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2176, 14, 14], f16), T([800, 2176, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([800, 2240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2304, 14, 14], f16), T([800, 2304, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2368, 14, 14], f16), T([800, 2368, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2432, 14, 14], f16), T([2304, 2432, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2432, 14, 14], f16), T([1600, 2432, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1600, 14, 14], f16), T([1600, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 3, ((T([32, 1600, 7, 7], f16), T([2176, 1600, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2432, 7, 7], f16), T([1600, 2432, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 1600, 7, 7], f16), T([1600, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 50), {}) +cnt: 1, ((T([32, 2560, 7, 7], f16), T([1600, 2560, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2688, 1, 1], f16), T([1000, 2688, 1, 1], f16), T([1000], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1000, 1, 1], f16), T([32, 2688, 1, 1], f16), T([1000, 2688, 1, 1], f16), [1000], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 2176, 7, 7], f16), T([32, 1600, 7, 7], f16), T([2176, 1600, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 1600, 7, 7], f16), T([32, 1600, 7, 7], f16), T([1600, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 1600, 7, 7], f16), T([32, 2560, 7, 7], f16), T([1600, 2560, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1600, 7, 7], f16), T([32, 2432, 7, 7], f16), T([1600, 2432, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1600, 7, 7], f16), T([32, 1600, 14, 14], f16), T([1600, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 1600, 14, 14], f16), T([32, 2432, 14, 14], f16), T([1600, 2432, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2304, 7, 7], f16), T([32, 2432, 14, 14], f16), T([2304, 2432, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 20, ((T([32, 1088, 14, 14], f16), T([32, 800, 14, 14], f16), T([1088, 800, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 19, ((T([32, 800, 14, 14], f16), T([32, 800, 14, 14], f16), T([800, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2368, 14, 14], f16), T([800, 2368, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2304, 14, 14], f16), T([800, 2304, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2240, 14, 14], f16), T([800, 2240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2176, 14, 14], f16), T([800, 2176, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2112, 14, 14], f16), T([800, 2112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 2048, 14, 14], f16), T([800, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1984, 14, 14], f16), T([800, 1984, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1920, 14, 14], f16), T([800, 1920, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1856, 14, 14], f16), T([800, 1856, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1792, 14, 14], f16), T([800, 1792, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1728, 14, 14], f16), T([800, 1728, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1664, 14, 14], f16), T([800, 1664, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1600, 14, 14], f16), T([800, 1600, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1536, 14, 14], f16), T([800, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1472, 14, 14], f16), T([800, 1472, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1408, 14, 14], f16), T([800, 1408, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1344, 14, 14], f16), T([800, 1344, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1280, 14, 14], f16), T([800, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 1216, 14, 14], f16), T([800, 1216, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 14, 14], f16), T([32, 800, 28, 28], f16), T([800, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 800, 28, 28], f16), T([32, 1152, 28, 28], f16), T([800, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1152, 14, 14], f16), T([32, 1152, 28, 28], f16), T([1152, 1152, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([32, 576, 28, 28], f16), T([32, 400, 28, 28], f16), T([576, 400, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([32, 400, 28, 28], f16), T([32, 400, 28, 28], f16), T([400, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 1088, 28, 28], f16), T([400, 1088, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 1024, 28, 28], f16), T([400, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 960, 28, 28], f16), T([400, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 896, 28, 28], f16), T([400, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 832, 28, 28], f16), T([400, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 768, 28, 28], f16), T([400, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 704, 28, 28], f16), T([400, 704, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 28, 28], f16), T([32, 400, 56, 56], f16), T([400, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 400, 56, 56], f16), T([32, 376, 56, 56], f16), T([400, 376, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 640, 28, 28], f16), T([32, 376, 56, 56], f16), T([640, 376, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 276, 56, 56], f16), T([32, 200, 56, 56], f16), T([276, 200, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 200, 56, 56], f16), T([32, 200, 56, 56], f16), T([200, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 50, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 56, 56], f16), T([32, 356, 56, 56], f16), T([200, 356, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 56, 56], f16), T([32, 336, 56, 56], f16), T([200, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 56, 56], f16), T([32, 316, 56, 56], f16), T([200, 316, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 56, 56], f16), T([32, 128, 56, 56], f16), T([200, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 296, 56, 56], f16), T([32, 128, 56, 56], f16), T([296, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 3, 224, 224], f16), T([128, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2688, 7, 7], f16, stride=(2688, 1, 0, 0)), 49), {}) +Operator: aten.elu.default +cnt: 1, ((T([32, 2688, 7, 7], f16), 1.0), {}) +Operator: aten.elu_backward.default +cnt: 1, ((T([32, 2688, 7, 7], f16), 1.0, 1, 1, False, T([32, 2688, 7, 7], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 128, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 128, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 2688, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 0.001), {}) +cnt: 8, ((T([32, 200, 56, 56], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 316, 56, 56], f16), T([316], f16), T([316], f16), T([316], f16), T([316], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 336, 56, 56], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 356, 56, 56], f16), T([356], f16), T([356], f16), T([356], f16), T([356], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([32, 376, 56, 56], f16), T([376], f16), T([376], f16), T([376], f16), T([376], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 400, 56, 56], f16), T([400], f16), T([400], f16), T([400], f16), T([400], f16), True, 0.1, 0.001), {}) +cnt: 15, ((T([32, 400, 28, 28], f16), T([400], f16), T([400], f16), T([400], f16), T([400], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 704, 28, 28], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 768, 28, 28], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 832, 28, 28], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 960, 28, 28], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1088, 28, 28], f16), T([1088], f16), T([1088], f16), T([1088], f16), T([1088], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([32, 1152, 28, 28], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 800, 28, 28], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), True, 0.1, 0.001), {}) +cnt: 39, ((T([32, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16), T([1216], f16), T([1216], f16), T([1216], f16), T([1216], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16), T([1344], f16), T([1344], f16), T([1344], f16), T([1344], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1408, 14, 14], f16), T([1408], f16), T([1408], f16), T([1408], f16), T([1408], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16), T([1472], f16), T([1472], f16), T([1472], f16), T([1472], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1536, 14, 14], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([32, 1600, 14, 14], f16), T([1600], f16), T([1600], f16), T([1600], f16), T([1600], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1664, 14, 14], f16), T([1664], f16), T([1664], f16), T([1664], f16), T([1664], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16), T([1728], f16), T([1728], f16), T([1728], f16), T([1728], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1792, 14, 14], f16), T([1792], f16), T([1792], f16), T([1792], f16), T([1792], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1856, 14, 14], f16), T([1856], f16), T([1856], f16), T([1856], f16), T([1856], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1920, 14, 14], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 1984, 14, 14], f16), T([1984], f16), T([1984], f16), T([1984], f16), T([1984], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2112, 14, 14], f16), T([2112], f16), T([2112], f16), T([2112], f16), T([2112], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2176, 14, 14], f16), T([2176], f16), T([2176], f16), T([2176], f16), T([2176], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2304, 14, 14], f16), T([2304], f16), T([2304], f16), T([2304], f16), T([2304], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2368, 14, 14], f16), T([2368], f16), T([2368], f16), T([2368], f16), T([2368], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([32, 2432, 14, 14], f16), T([2432], f16), T([2432], f16), T([2432], f16), T([2432], f16), True, 0.1, 0.001), {}) +cnt: 5, ((T([32, 1600, 7, 7], f16), T([1600], f16), T([1600], f16), T([1600], f16), T([1600], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2432, 7, 7], f16), T([2432], f16), T([2432], f16), T([2432], f16), T([2432], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2560, 7, 7], f16), T([2560], f16), T([2560], f16), T([2560], f16), T([2560], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([32, 2688, 7, 7], f16), T([2688], f16), T([2688], f16), T([2688], f16), T([2688], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([32, 2688, 7, 7], f16), T([32, 2688, 7, 7], f16), T([2688], f16), T([2688], f16), T([2688], f16), T([2688], f32), T([2688], f32), True, 0.001, [True, True, True]), {}) +cnt: 5, ((T([32, 1600, 7, 7], f16), T([32, 1600, 7, 7], f16), T([1600], f16), T([1600], f16), T([1600], f16), T([1600], f32), T([1600], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2560, 7, 7], f16), T([32, 2560, 7, 7], f16), T([2560], f16), T([2560], f16), T([2560], f16), T([2560], f32), T([2560], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2432, 7, 7], f16), T([32, 2432, 7, 7], f16), T([2432], f16), T([2432], f16), T([2432], f16), T([2432], f32), T([2432], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 1600, 14, 14], f16), T([32, 1600, 14, 14], f16), T([1600], f16), T([1600], f16), T([1600], f16), T([1600], f32), T([1600], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 2432, 14, 14], f16), T([32, 2432, 14, 14], f16), T([2432], f16), T([2432], f16), T([2432], f16), T([2432], f32), T([2432], f32), True, 0.001, [True, True, True]), {}) +cnt: 39, ((T([32, 800, 14, 14], f16), T([32, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2368, 14, 14], f16), T([32, 2368, 14, 14], f16), T([2368], f16), T([2368], f16), T([2368], f16), T([2368], f32), T([2368], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2304, 14, 14], f16), T([32, 2304, 14, 14], f16), T([2304], f16), T([2304], f16), T([2304], f16), T([2304], f32), T([2304], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([32, 2240, 14, 14], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f32), T([2240], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2176, 14, 14], f16), T([32, 2176, 14, 14], f16), T([2176], f16), T([2176], f16), T([2176], f16), T([2176], f32), T([2176], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2112, 14, 14], f16), T([32, 2112, 14, 14], f16), T([2112], f16), T([2112], f16), T([2112], f16), T([2112], f32), T([2112], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16), T([32, 2048, 14, 14], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1984, 14, 14], f16), T([32, 1984, 14, 14], f16), T([1984], f16), T([1984], f16), T([1984], f16), T([1984], f32), T([1984], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1920, 14, 14], f16), T([32, 1920, 14, 14], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f32), T([1920], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1856, 14, 14], f16), T([32, 1856, 14, 14], f16), T([1856], f16), T([1856], f16), T([1856], f16), T([1856], f32), T([1856], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1792, 14, 14], f16), T([32, 1792, 14, 14], f16), T([1792], f16), T([1792], f16), T([1792], f16), T([1792], f32), T([1792], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16), T([32, 1728, 14, 14], f16), T([1728], f16), T([1728], f16), T([1728], f16), T([1728], f32), T([1728], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1664, 14, 14], f16), T([32, 1664, 14, 14], f16), T([1664], f16), T([1664], f16), T([1664], f16), T([1664], f32), T([1664], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1536, 14, 14], f16), T([32, 1536, 14, 14], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f32), T([1536], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16), T([32, 1472, 14, 14], f16), T([1472], f16), T([1472], f16), T([1472], f16), T([1472], f32), T([1472], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1408, 14, 14], f16), T([32, 1408, 14, 14], f16), T([1408], f16), T([1408], f16), T([1408], f16), T([1408], f32), T([1408], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16), T([32, 1344, 14, 14], f16), T([1344], f16), T([1344], f16), T([1344], f16), T([1344], f32), T([1344], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16), T([32, 1280, 14, 14], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16), T([32, 1216, 14, 14], f16), T([1216], f16), T([1216], f16), T([1216], f16), T([1216], f32), T([1216], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 800, 28, 28], f16), T([32, 800, 28, 28], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 1152, 28, 28], f16), T([32, 1152, 28, 28], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 0.001, [True, True, True]), {}) +cnt: 15, ((T([32, 400, 28, 28], f16), T([32, 400, 28, 28], f16), T([400], f16), T([400], f16), T([400], f16), T([400], f32), T([400], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1088, 28, 28], f16), T([32, 1088, 28, 28], f16), T([1088], f16), T([1088], f16), T([1088], f16), T([1088], f32), T([1088], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16), T([32, 1024, 28, 28], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 960, 28, 28], f16), T([32, 960, 28, 28], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([32, 896, 28, 28], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 832, 28, 28], f16), T([32, 832, 28, 28], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 768, 28, 28], f16), T([32, 768, 28, 28], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 704, 28, 28], f16), T([32, 704, 28, 28], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f32), T([704], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 400, 56, 56], f16), T([32, 400, 56, 56], f16), T([400], f16), T([400], f16), T([400], f16), T([400], f32), T([400], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 376, 56, 56], f16), T([32, 376, 56, 56], f16), T([376], f16), T([376], f16), T([376], f16), T([376], f32), T([376], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([32, 200, 56, 56], f16), T([32, 200, 56, 56], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f32), T([200], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 356, 56, 56], f16), T([32, 356, 56, 56], f16), T([356], f16), T([356], f16), T([356], f16), T([356], f32), T([356], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 336, 56, 56], f16), T([32, 336, 56, 56], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 316, 56, 56], f16), T([32, 316, 56, 56], f16), T([316], f16), T([316], f16), T([316], f16), T([316], f32), T([316], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 128, 112, 112], f16),), {}) +cnt: 2, ((T([32, 128, 56, 56], f16),), {}) +cnt: 8, ((T([32, 200, 56, 56], f16),), {}) +cnt: 1, ((T([32, 316, 56, 56], f16),), {}) +cnt: 1, ((T([32, 336, 56, 56], f16),), {}) +cnt: 1, ((T([32, 356, 56, 56], f16),), {}) +cnt: 2, ((T([32, 376, 56, 56], f16),), {}) +cnt: 1, ((T([32, 400, 56, 56], f16),), {}) +cnt: 15, ((T([32, 400, 28, 28], f16),), {}) +cnt: 1, ((T([32, 704, 28, 28], f16),), {}) +cnt: 1, ((T([32, 768, 28, 28], f16),), {}) +cnt: 1, ((T([32, 832, 28, 28], f16),), {}) +cnt: 1, ((T([32, 896, 28, 28], f16),), {}) +cnt: 1, ((T([32, 960, 28, 28], f16),), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16),), {}) +cnt: 1, ((T([32, 1088, 28, 28], f16),), {}) +cnt: 2, ((T([32, 1152, 28, 28], f16),), {}) +cnt: 1, ((T([32, 800, 28, 28], f16),), {}) +cnt: 39, ((T([32, 800, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1408, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1536, 14, 14], f16),), {}) +cnt: 2, ((T([32, 1600, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1664, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1792, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1856, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1920, 14, 14], f16),), {}) +cnt: 1, ((T([32, 1984, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2112, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2176, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2304, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2368, 14, 14], f16),), {}) +cnt: 2, ((T([32, 2432, 14, 14], f16),), {}) +cnt: 5, ((T([32, 1600, 7, 7], f16),), {}) +cnt: 1, ((T([32, 2432, 7, 7], f16),), {}) +cnt: 1, ((T([32, 2560, 7, 7], f16),), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([32, 128, 7, 7], f16, stride=(131712, 49, 7, 1)), [32, 128, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([32, 128, 7, 7], f16), [32, 128, 7, 7], 2, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([32, 128, 7, 7], f16), [32, 2176, 7, 7], 1, 2048, 9223372036854775807, 1), {}) +cnt: 6, ((T([32, 2176, 7, 7], f16), [32, 2176, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(131712, 49, 7, 1)), [32, 2048, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([32, 2048, 7, 7], f16), [32, 2048, 7, 7], 2, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [32, 2176, 7, 7], 1, 0, 2048, 1), {}) +cnt: 1, ((T([32, 128, 7, 7], f16, stride=(25088, 49, 7, 1)), [32, 128, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [32, 2048, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 7, 7], f16, stride=(18816, 49, 7, 1)), [32, 128, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 256, 7, 7], f16, stride=(18816, 49, 7, 1)), [32, 256, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 256, 7, 7], f16), [32, 256, 7, 7], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 256, 7, 7], f16), [32, 2304, 7, 7], 1, 2048, 9223372036854775807, 1), {}) +cnt: 2, ((T([32, 2304, 7, 7], f16), [32, 2304, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), [32, 2304, 7, 7], 1, 0, 2048, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(476672, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 20, ((T([32, 64, 14, 14], f16), [32, 64, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 20, ((T([32, 64, 14, 14], f16), [32, 1088, 14, 14], 1, 1024, 9223372036854775807, 1), {}) +cnt: 40, ((T([32, 1088, 14, 14], f16), [32, 1088, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16, stride=(476672, 196, 14, 1)), [32, 1024, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 21, ((T([32, 1024, 14, 14], f16), [32, 1024, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 20, ((T([32, 1024, 14, 14], f16), [32, 1088, 14, 14], 1, 0, 1024, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(263424, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 20, ((T([32, 1024, 14, 14], f16), [32, 1024, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(250880, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(238336, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(225792, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(213248, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(200704, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(188160, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(175616, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(163072, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(150528, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(137984, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(125440, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(112896, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(100352, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(87808, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(75264, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(62720, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(50176, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 14, 14], f16, stride=(37632, 196, 14, 1)), [32, 64, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 14, 14], f16, stride=(37632, 196, 14, 1)), [32, 128, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 14, 14], f16), [32, 128, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 14, 14], f16), [32, 1152, 14, 14], 1, 1024, 9223372036854775807, 1), {}) +cnt: 2, ((T([32, 1152, 14, 14], f16), [32, 1152, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), [32, 1152, 14, 14], 1, 0, 1024, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(903168, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 8, ((T([32, 64, 28, 28], f16), [32, 64, 28, 28], 2, 0, 9223372036854775807, 1), {}) +cnt: 8, ((T([32, 64, 28, 28], f16), [32, 576, 28, 28], 1, 512, 9223372036854775807, 1), {}) +cnt: 16, ((T([32, 576, 28, 28], f16), [32, 576, 28, 28], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16, stride=(903168, 784, 28, 1)), [32, 512, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 9, ((T([32, 512, 28, 28], f16), [32, 512, 28, 28], 2, 0, 9223372036854775807, 1), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [32, 576, 28, 28], 1, 0, 512, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(451584, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [32, 512, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(401408, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(351232, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(301056, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(250880, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 64, 28, 28], f16, stride=(150528, 784, 28, 1)), [32, 64, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 28, 28], f16, stride=(150528, 784, 28, 1)), [32, 128, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), [32, 128, 28, 28], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), [32, 640, 28, 28], 1, 512, 9223372036854775807, 1), {}) +cnt: 2, ((T([32, 640, 28, 28], f16), [32, 640, 28, 28], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), [32, 640, 28, 28], 1, 0, 512, 1), {}) +cnt: 1, ((T([32, 20, 56, 56], f16, stride=(1179136, 3136, 56, 1)), [32, 20, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([32, 20, 56, 56], f16), [32, 20, 56, 56], 2, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([32, 20, 56, 56], f16), [32, 276, 56, 56], 1, 256, 9223372036854775807, 1), {}) +cnt: 8, ((T([32, 276, 56, 56], f16), [32, 276, 56, 56], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16, stride=(1179136, 3136, 56, 1)), [32, 256, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 5, ((T([32, 256, 56, 56], f16), [32, 256, 56, 56], 2, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), [32, 276, 56, 56], 1, 0, 256, 1), {}) +cnt: 1, ((T([32, 20, 56, 56], f16, stride=(313600, 3136, 56, 1)), [32, 20, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), [32, 256, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 20, 56, 56], f16, stride=(250880, 3136, 56, 1)), [32, 20, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 20, 56, 56], f16, stride=(188160, 3136, 56, 1)), [32, 20, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 40, 56, 56], f16, stride=(188160, 3136, 56, 1)), [32, 40, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 40, 56, 56], f16), [32, 40, 56, 56], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 40, 56, 56], f16), [32, 296, 56, 56], 1, 256, 9223372036854775807, 1), {}) +cnt: 2, ((T([32, 296, 56, 56], f16), [32, 296, 56, 56], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), [32, 296, 56, 56], 1, 0, 256, 1), {}) +Operator: aten.threshold_backward.default +cnt: 5, ((T([32, 1600, 7, 7], f16), T([32, 1600, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 2560, 7, 7], f16), T([32, 2560, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 2432, 7, 7], f16), T([32, 2432, 7, 7], f16), 0), {}) +cnt: 2, ((T([32, 1600, 14, 14], f16), T([32, 1600, 14, 14], f16), 0), {}) +cnt: 2, ((T([32, 2432, 14, 14], f16), T([32, 2432, 14, 14], f16), 0), {}) +cnt: 39, ((T([32, 800, 14, 14], f16), T([32, 800, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2368, 14, 14], f16), T([32, 2368, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2304, 14, 14], f16), T([32, 2304, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([32, 2240, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2176, 14, 14], f16), T([32, 2176, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2112, 14, 14], f16), T([32, 2112, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16), T([32, 2048, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1984, 14, 14], f16), T([32, 1984, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1920, 14, 14], f16), T([32, 1920, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1856, 14, 14], f16), T([32, 1856, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1792, 14, 14], f16), T([32, 1792, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16), T([32, 1728, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1664, 14, 14], f16), T([32, 1664, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1536, 14, 14], f16), T([32, 1536, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16), T([32, 1472, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1408, 14, 14], f16), T([32, 1408, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1344, 14, 14], f16), T([32, 1344, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1280, 14, 14], f16), T([32, 1280, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 1216, 14, 14], f16), T([32, 1216, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 800, 28, 28], f16), T([32, 800, 28, 28], f16), 0), {}) +cnt: 2, ((T([32, 1152, 28, 28], f16), T([32, 1152, 28, 28], f16), 0), {}) +cnt: 15, ((T([32, 400, 28, 28], f16), T([32, 400, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 1088, 28, 28], f16), T([32, 1088, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16), T([32, 1024, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 960, 28, 28], f16), T([32, 960, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([32, 896, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 832, 28, 28], f16), T([32, 832, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 768, 28, 28], f16), T([32, 768, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 704, 28, 28], f16), T([32, 704, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 400, 56, 56], f16), T([32, 400, 56, 56], f16), 0), {}) +cnt: 2, ((T([32, 376, 56, 56], f16), T([32, 376, 56, 56], f16), 0), {}) +cnt: 8, ((T([32, 200, 56, 56], f16), T([32, 200, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 356, 56, 56], f16), T([32, 356, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 336, 56, 56], f16), T([32, 336, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 316, 56, 56], f16), T([32, 316, 56, 56], f16), 0), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_botnext26ts_256_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_botnext26ts_256_training.txt new file mode 100644 index 000000000..ab778074a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_botnext26ts_256_training.txt @@ -0,0 +1,288 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([512, 256, 256], f16), -1, False), {}) +cnt: 1, ((T([512, 64, 64], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 64], f16), -1, f16), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 256], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 4, ((T([128, 64, 16, 16], f16), [512, 16, 256]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), [512, 64, 256]), {}) +cnt: 2, ((T([512, 256, 256], f16), [512, 256, 256]), {}) +cnt: 4, ((T([512, 16, 16, 16], f16), [131072, 16]), {}) +cnt: 4, ((T([131072, 31], f16), [512, 16, 16, 31]), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16), [512, 256, 256]), {}) +cnt: 1, ((T([512, 256, 64], f16), [512, 256, 64]), {}) +cnt: 2, ((T([512, 64, 256], f16), [128, 256, 16, 16]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), [512, 128, 256]), {}) +cnt: 1, ((T([512, 256, 128], f16), [512, 256, 128]), {}) +cnt: 2, ((T([512, 128, 256], f16), [128, 512, 16, 16]), {}) +cnt: 2, ((T([128, 64, 8, 8], f16), [512, 16, 64]), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), [512, 128, 64]), {}) +cnt: 1, ((T([512, 64, 64], f16), [512, 64, 64]), {}) +cnt: 2, ((T([512, 8, 8, 16], f16), [32768, 16]), {}) +cnt: 2, ((T([32768, 15], f16), [512, 8, 8, 15]), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16), [512, 64, 64]), {}) +cnt: 1, ((T([512, 64, 128], f16), [512, 64, 128]), {}) +cnt: 2, ((T([512, 128, 64], f16), [128, 512, 8, 8]), {}) +cnt: 1, ((T([512, 8, 8, 16], f16), [512, 64, 16]), {}) +cnt: 1, ((T([512, 16, 64], f16), [128, 64, 8, 8]), {}) +cnt: 2, ((T([512, 16, 16, 16], f16), [512, 256, 16]), {}) +cnt: 2, ((T([512, 16, 256], f16), [128, 64, 16, 16]), {}) +Operator: aten.add.Tensor +cnt: 31, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16)), {}) +cnt: 4, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16)), {}) +cnt: 4, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16)), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(8432, 31, 527, 1, 0)), T([512, 16, 16, 16, 16], f16, stride=(8432, 527, 31, 0, 1))), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 256], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(1080, 15, 135, 1, 0)), T([512, 8, 8, 8, 8], f16, stride=(1080, 135, 15, 0, 1))), {}) +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 64], f16)), {}) +cnt: 1, ((T([512, 8, 8, 16], f16, stride=(1024, 16, 128, 1)), T([512, 8, 8, 16], f16)), {}) +cnt: 1, ((T([512, 64, 16], f16), T([512, 64, 16], f16)), {}) +cnt: 2, ((T([512, 16, 16, 16], f16, stride=(4096, 16, 256, 1)), T([512, 16, 16, 16], f16)), {}) +cnt: 2, ((T([512, 256, 16], f16), T([512, 256, 16], f16)), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 3, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 512, 16, 16], f16), [2, 2], [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 512, 16, 16], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +Operator: aten.bmm.default +cnt: 2, ((T([512, 256, 16], f16, stride=(4096, 1, 256)), T([512, 16, 256], f16)), {}) +cnt: 1, ((T([512, 256, 256], f16), T([512, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 1, ((T([512, 256, 256], f16), T([512, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 1, ((T([512, 64, 16], f16, stride=(1024, 1, 64)), T([512, 16, 64], f16)), {}) +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([512, 64, 64], f16, stride=(4096, 1, 64)), T([512, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([512, 64, 128], f16, stride=(8192, 1, 64)), T([512, 128, 64], f16)), {}) +cnt: 1, ((T([512, 16, 64], f16), T([512, 64, 64], f16)), {}) +cnt: 1, ((T([512, 64, 64], f16), T([512, 64, 16], f16, stride=(1024, 1, 64))), {}) +cnt: 1, ((T([512, 256, 256], f16, stride=(65536, 1, 256)), T([512, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 1, ((T([512, 256, 128], f16, stride=(32768, 1, 256)), T([512, 128, 256], f16)), {}) +cnt: 2, ((T([512, 16, 256], f16), T([512, 256, 256], f16)), {}) +cnt: 2, ((T([512, 256, 256], f16), T([512, 256, 16], f16, stride=(4096, 1, 256))), {}) +cnt: 1, ((T([512, 256, 256], f16, stride=(65536, 1, 256)), T([512, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 1, ((T([512, 256, 64], f16, stride=(16384, 1, 256)), T([512, 64, 256], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 8, 8], f16), T([128, 64, 8, 8], f16), T([128, 512, 8, 8], f16)], 1), {}) +cnt: 1, (([T([128, 64, 16, 16], f16), T([128, 64, 16, 16], f16), T([128, 512, 16, 16], f16)], 1), {}) +cnt: 1, (([T([128, 64, 16, 16], f16), T([128, 64, 16, 16], f16), T([128, 256, 16, 16], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 256, 256], f16),), {}) +cnt: 1, ((T([128, 24, 128, 128], f16),), {}) +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 1, ((T([128, 64, 128, 128], f16),), {}) +cnt: 4, ((T([128, 64, 64, 64], f16),), {}) +cnt: 2, ((T([128, 256, 64, 64], f16),), {}) +cnt: 1, ((T([128, 128, 64, 64], f16),), {}) +cnt: 3, ((T([128, 128, 32, 32], f16),), {}) +cnt: 2, ((T([128, 512, 32, 32], f16),), {}) +cnt: 1, ((T([128, 256, 32, 32], f16),), {}) +cnt: 3, ((T([128, 256, 16, 16], f16),), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([128, 512, 16, 16], f16),), {}) +cnt: 3, ((T([128, 512, 8, 8], f16),), {}) +cnt: 2, ((T([128, 2048, 8, 8], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 4, ((T([8192, 16, 31], f16), [0, 1], 0.0), {}) +cnt: 4, ((T([8192, 512], f16), [0, 15], 0.0), {}) +cnt: 2, ((T([4096, 8, 15], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([4096, 128], f16), [0, 7], 0.0), {}) +cnt: 2, ((T([4096, 135], f16), [0, -7]), {}) +cnt: 2, ((T([4096, 8, 16], f16), [0, -1]), {}) +cnt: 4, ((T([8192, 527], f16), [0, -15]), {}) +cnt: 4, ((T([8192, 16, 32], f16), [0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([64, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 4), {}) +cnt: 2, ((T([128, 1, 64], f16), T([1, 1, 3], f16), None, [1], [1], [1], False, [0], 1), {}) +cnt: 3, ((T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 2, ((T([128, 1, 128], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 1, 256], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([384, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([640, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([640, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), T([128, 512, 8, 8], f16), T([640, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), T([128, 512, 16, 16], f16), T([640, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 16, 16], f16), T([128, 256, 16, 16], f16), T([384, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1, 256], f16), T([128, 1, 256], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 32, 32], f16), T([256, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1, 128], f16), T([128, 1, 128], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 64, 64], f16), T([128, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1, 64], f16), T([128, 1, 64], f16), T([1, 1, 3], f16), [0], [1], [1], [1], False, [0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 4, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([128, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +cnt: 1, ((T([128, 256, 16, 16], f16, stride=(256, 1, 0, 0)), 256), {}) +cnt: 2, ((T([128, 128, 32, 32], f16, stride=(128, 1, 0, 0)), 1024), {}) +cnt: 2, ((T([128, 64, 64, 64], f16, stride=(64, 1, 0, 0)), 4096), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 64, 64], i64)), {}) +Operator: aten.mean.dim +cnt: 2, ((T([128, 64, 64, 64], f16), [2, 3]), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), [2, 3]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), [2, 3]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 4, ((T([131072, 16], f16), T([16, 31], f16, stride=(1, 16))), {}) +cnt: 2, ((T([32768, 16], f16), T([16, 15], f16, stride=(1, 16))), {}) +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +cnt: 2, ((T([15, 32768], f16, stride=(1, 15)), T([32768, 16], f16)), {}) +cnt: 2, ((T([32768, 15], f16), T([15, 16], f16)), {}) +cnt: 4, ((T([31, 131072], f16, stride=(1, 31)), T([131072, 16], f16)), {}) +cnt: 4, ((T([131072, 31], f16), T([31, 16], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16, stride=(64, 1, 0, 0))), {}) +cnt: 4, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16, stride=(128, 1, 0, 0))), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16, stride=(256, 1, 0, 0))), {}) +cnt: 4, ((T([512, 256, 256], f16), 0.25), {}) +cnt: 2, ((T([512, 64, 64], f16), 0.25), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sigmoid.default +cnt: 2, ((T([128, 1, 64], f16),), {}) +cnt: 2, ((T([128, 1, 128], f16),), {}) +cnt: 1, ((T([128, 1, 256], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([128, 1, 256], f16), T([128, 1, 256], f16)), {}) +cnt: 2, ((T([128, 1, 128], f16), T([128, 1, 128], f16)), {}) +cnt: 2, ((T([128, 1, 64], f16), T([128, 1, 64], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([128, 24, 128, 128], f16),), {}) +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 1, ((T([128, 64, 128, 128], f16),), {}) +cnt: 4, ((T([128, 64, 64, 64], f16),), {}) +cnt: 2, ((T([128, 256, 64, 64], f16),), {}) +cnt: 1, ((T([128, 128, 64, 64], f16),), {}) +cnt: 3, ((T([128, 128, 32, 32], f16),), {}) +cnt: 2, ((T([128, 512, 32, 32], f16),), {}) +cnt: 1, ((T([128, 256, 32, 32], f16),), {}) +cnt: 3, ((T([128, 256, 16, 16], f16),), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([128, 512, 16, 16], f16),), {}) +cnt: 3, ((T([128, 512, 8, 8], f16),), {}) +cnt: 2, ((T([128, 2048, 8, 8], f16),), {}) +Operator: aten.silu_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16)), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16)), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16)), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16)), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16)), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16)), {}) +cnt: 2, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16)), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16)), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16)), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16)), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([4096, 8, 8], f16), [4096, 8, 15], 2, 7, 9223372036854775807, 1), {}) +cnt: 2, ((T([4096, 8, 15], f16), [4096, 9, 15], 1, 0, 8, 1), {}) +cnt: 2, ((T([4096, 9, 15], f16), [4096, 9, 15], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([8192, 16, 16], f16), [8192, 16, 31], 2, 15, 9223372036854775807, 1), {}) +cnt: 4, ((T([8192, 16, 31], f16), [8192, 17, 31], 1, 0, 16, 1), {}) +cnt: 4, ((T([8192, 17, 31], f16), [8192, 17, 31], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([128, 384, 16, 16], f16), [64, 64, 256], 1), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), [64, 64, 512], 1), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), [64, 64, 512], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(4096, 64, 1, 512, 8)), [2], True), {}) +cnt: 1, ((T([512, 8, 8, 8, 8], f16, stride=(4096, 512, 8, 64, 1)), [2], True), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(65536, 256, 1, 4096, 16)), [2], True), {}) +cnt: 2, ((T([512, 16, 16, 16, 16], f16, stride=(65536, 4096, 16, 256, 1)), [2], True), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), [2, 3], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_halonext26ts_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_halonext26ts_training.txt new file mode 100644 index 000000000..714fcdbba --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/eca_halonext26ts_training.txt @@ -0,0 +1,343 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 1, ((T([1024, 4, 64, 144], f16), -1, False), {}) +cnt: 1, ((T([1024, 4, 16, 144], f16), -1, False), {}) +cnt: 1, ((T([1024, 1, 64, 144], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([1024, 1, 64, 144], f16), T([1024, 1, 64, 144], f16), -1, f16), {}) +cnt: 1, ((T([1024, 4, 16, 144], f16), T([1024, 4, 16, 144], f16), -1, f16), {}) +cnt: 1, ((T([1024, 4, 64, 144], f16), T([1024, 4, 64, 144], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 1, ((T([1024, 16, 8, 8, 2, 2], f16), [1024, 16, 64, 4]), {}) +cnt: 1, ((T([128, 384, 2, 2, 12, 12], f16), [1024, 48, 4, 144]), {}) +cnt: 1, ((T([1024, 4, 64, 16], f16), [4096, 64, 16]), {}) +cnt: 2, ((T([1024, 4, 16, 144], f16), [4096, 16, 144]), {}) +cnt: 1, ((T([4096, 64, 144], f16), [1024, 4, 64, 144]), {}) +cnt: 1, ((T([1024, 4, 64, 16], f16), [4096, 8, 8, 16]), {}) +cnt: 2, ((T([262144, 23], f16), [4096, 8, 8, 23]), {}) +cnt: 1, ((T([4096, 8, 8, 16], f16), [262144, 16]), {}) +cnt: 1, ((T([4096, 8, 8, 12, 12], f16), [1024, 4, 64, 144]), {}) +cnt: 1, ((T([1024, 4, 144, 32], f16), [4096, 144, 32]), {}) +cnt: 1, ((T([4096, 64, 32], f16), [1024, 4, 64, 32]), {}) +cnt: 1, ((T([1024, 32, 64, 4], f16), [32768, 8, 8, 2, 2]), {}) +cnt: 1, ((T([1024, 16, 4, 4, 2, 2], f16), [1024, 16, 16, 4]), {}) +cnt: 1, ((T([128, 640, 2, 2, 12, 12], f16), [1024, 80, 4, 144]), {}) +cnt: 1, ((T([1024, 4, 16, 16], f16), [4096, 16, 16]), {}) +cnt: 1, ((T([4096, 16, 144], f16), [1024, 4, 16, 144]), {}) +cnt: 1, ((T([1024, 4, 16, 16], f16), [4096, 4, 4, 16]), {}) +cnt: 2, ((T([65536, 23], f16), [4096, 4, 4, 23]), {}) +cnt: 1, ((T([4096, 4, 4, 16], f16), [65536, 16]), {}) +cnt: 1, ((T([4096, 4, 4, 12, 12], f16), [1024, 4, 16, 144]), {}) +cnt: 1, ((T([1024, 4, 144, 64], f16), [4096, 144, 64]), {}) +cnt: 1, ((T([4096, 16, 64], f16), [1024, 4, 16, 64]), {}) +cnt: 1, ((T([1024, 64, 16, 4], f16), [65536, 4, 4, 2, 2]), {}) +cnt: 1, ((T([1024, 64, 144], f16), [1024, 1, 64, 144]), {}) +cnt: 2, ((T([1024, 8, 8, 16], f16), [65536, 16]), {}) +cnt: 2, ((T([65536, 23], f16), [1024, 8, 8, 23]), {}) +cnt: 1, ((T([1024, 8, 8, 12, 12], f16), [1024, 1, 64, 144]), {}) +cnt: 1, ((T([1024, 64, 64], f16), [1024, 1, 64, 64]), {}) +cnt: 1, ((T([1024, 64, 64, 1], f16), [65536, 8, 8, 1, 1]), {}) +cnt: 1, ((T([1024, 8, 8, 16], f16), [1024, 1, 64, 16]), {}) +cnt: 1, ((T([1024, 80, 1, 144], f16), [128, 640, 1, 1, 12, 12]), {}) +cnt: 1, ((T([1024, 16, 1, 8, 1, 8], f16), [128, 128, 8, 8]), {}) +cnt: 1, ((T([65536, 4, 4, 2, 2], f16), [1024, 64, 16, 4]), {}) +cnt: 1, ((T([1024, 4, 16, 64], f16), [4096, 16, 64]), {}) +cnt: 1, ((T([4096, 4, 4, 16], f16), [1024, 4, 16, 16]), {}) +cnt: 1, ((T([1024, 80, 4, 144], f16), [128, 640, 2, 2, 12, 12]), {}) +cnt: 1, ((T([1024, 16, 2, 4, 2, 4], f16), [128, 128, 8, 8]), {}) +cnt: 1, ((T([32768, 8, 8, 2, 2], f16), [1024, 32, 64, 4]), {}) +cnt: 1, ((T([1024, 4, 64, 32], f16), [4096, 64, 32]), {}) +cnt: 1, ((T([4096, 8, 8, 16], f16), [1024, 4, 64, 16]), {}) +cnt: 1, ((T([1024, 48, 4, 144], f16), [128, 384, 2, 2, 12, 12]), {}) +cnt: 1, ((T([1024, 16, 2, 8, 2, 8], f16), [128, 128, 16, 16]), {}) +Operator: aten.add.Tensor +cnt: 31, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16)), {}) +cnt: 4, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16)), {}) +cnt: 4, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16)), {}) +cnt: 1, ((T([4096, 8, 8, 12, 12], f16, stride=(1656, 23, 207, 1, 0)), T([4096, 8, 8, 12, 12], f16, stride=(1656, 207, 23, 0, 1))), {}) +cnt: 1, ((T([1024, 4, 64, 144], f16), T([1024, 4, 64, 144], f16)), {}) +cnt: 1, ((T([4096, 4, 4, 12, 12], f16, stride=(460, 23, 115, 1, 0)), T([4096, 4, 4, 12, 12], f16, stride=(460, 115, 23, 0, 1))), {}) +cnt: 1, ((T([1024, 4, 16, 144], f16), T([1024, 4, 16, 144], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 1, ((T([1024, 8, 8, 12, 12], f16, stride=(1656, 23, 207, 1, 0)), T([1024, 8, 8, 12, 12], f16, stride=(1656, 207, 23, 0, 1))), {}) +cnt: 1, ((T([1024, 1, 64, 144], f16), T([1024, 1, 64, 144], f16)), {}) +cnt: 1, ((T([1024, 8, 8, 16], f16, stride=(1024, 16, 128, 1)), T([1024, 8, 8, 16], f16)), {}) +cnt: 1, ((T([1024, 1, 64, 16], f16), T([1024, 1, 64, 16], f16)), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16)), {}) +cnt: 1, ((T([4096, 4, 4, 16], f16, stride=(256, 16, 64, 1)), T([4096, 4, 4, 16], f16)), {}) +cnt: 1, ((T([1024, 4, 16, 16], f16), T([1024, 4, 16, 16], f16)), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16)), {}) +cnt: 1, ((T([4096, 8, 8, 16], f16, stride=(1024, 16, 128, 1)), T([4096, 8, 8, 16], f16)), {}) +cnt: 1, ((T([1024, 4, 64, 16], f16), T([1024, 4, 64, 16], f16)), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 3, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.bmm.default +cnt: 1, ((T([4096, 64, 16], f16), T([4096, 16, 144], f16)), {}) +cnt: 1, ((T([4096, 64, 144], f16), T([4096, 144, 32], f16)), {}) +cnt: 1, ((T([4096, 16, 16], f16), T([4096, 16, 144], f16)), {}) +cnt: 1, ((T([4096, 16, 144], f16), T([4096, 144, 64], f16)), {}) +cnt: 1, ((T([1024, 64, 16], f16, stride=(1024, 1, 64)), T([1024, 16, 144], f16, stride=(11520, 144, 1))), {}) +cnt: 1, ((T([1024, 64, 144], f16), T([1024, 144, 64], f16, stride=(11520, 1, 144))), {}) +cnt: 1, ((T([1024, 144, 64], f16, stride=(9216, 1, 144)), T([1024, 64, 64], f16, stride=(4096, 1, 64))), {}) +cnt: 1, ((T([1024, 64, 64], f16, stride=(4096, 1, 64)), T([1024, 64, 144], f16, stride=(11520, 144, 1))), {}) +cnt: 1, ((T([1024, 16, 64], f16), T([1024, 64, 144], f16)), {}) +cnt: 1, ((T([1024, 64, 144], f16), T([1024, 144, 16], f16, stride=(11520, 1, 144))), {}) +cnt: 1, ((T([4096, 144, 16], f16, stride=(2304, 1, 144)), T([4096, 16, 64], f16)), {}) +cnt: 1, ((T([4096, 16, 64], f16), T([4096, 64, 144], f16, stride=(9216, 1, 64))), {}) +cnt: 1, ((T([4096, 16, 16], f16, stride=(256, 1, 16)), T([4096, 16, 144], f16)), {}) +cnt: 1, ((T([4096, 16, 144], f16), T([4096, 144, 16], f16, stride=(2304, 1, 144))), {}) +cnt: 1, ((T([4096, 144, 64], f16, stride=(9216, 1, 144)), T([4096, 64, 32], f16)), {}) +cnt: 1, ((T([4096, 64, 32], f16), T([4096, 32, 144], f16, stride=(4608, 1, 32))), {}) +cnt: 1, ((T([4096, 16, 64], f16, stride=(1024, 1, 16)), T([4096, 64, 144], f16)), {}) +cnt: 1, ((T([4096, 64, 144], f16), T([4096, 144, 16], f16, stride=(2304, 1, 144))), {}) +Operator: aten.cat.default +cnt: 1, (([T([1024, 1, 144, 16], f16, stride=(2304, 2304, 1, 144)), T([1024, 1, 144, 64], f16)], 3), {}) +cnt: 1, (([T([1024, 4, 144, 16], f16, stride=(9216, 2304, 1, 144)), T([1024, 4, 144, 64], f16)], 3), {}) +cnt: 1, (([T([1024, 4, 144, 16], f16, stride=(9216, 2304, 1, 144)), T([1024, 4, 144, 32], f16)], 3), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 256, 256], f16),), {}) +cnt: 1, ((T([128, 24, 128, 128], f16),), {}) +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 1, ((T([128, 64, 128, 128], f16),), {}) +cnt: 4, ((T([128, 64, 64, 64], f16),), {}) +cnt: 2, ((T([128, 256, 64, 64], f16),), {}) +cnt: 1, ((T([128, 128, 64, 64], f16),), {}) +cnt: 3, ((T([128, 128, 32, 32], f16),), {}) +cnt: 2, ((T([128, 512, 32, 32], f16),), {}) +cnt: 1, ((T([128, 256, 32, 32], f16),), {}) +cnt: 3, ((T([128, 256, 16, 16], f16),), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([128, 512, 16, 16], f16),), {}) +cnt: 3, ((T([128, 512, 8, 8], f16),), {}) +cnt: 2, ((T([128, 2048, 8, 8], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([128, 384, 16, 16], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([32768, 8, 23], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([32768, 192], f16), [0, 15], 0.0), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([16384, 4, 23], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([16384, 96], f16), [0, 19], 0.0), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([8192, 8, 23], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([8192, 192], f16), [0, 15], 0.0), {}) +cnt: 2, ((T([8192, 207], f16), [0, -15]), {}) +cnt: 2, ((T([8192, 8, 24], f16), [0, -1]), {}) +cnt: 1, ((T([128, 640, 12, 12], f16), [-2, -2, -2, -2]), {}) +cnt: 2, ((T([16384, 115], f16), [0, -19]), {}) +cnt: 2, ((T([16384, 4, 24], f16), [0, -1]), {}) +cnt: 1, ((T([128, 640, 20, 20], f16), [-2, -2, -2, -2]), {}) +cnt: 2, ((T([32768, 207], f16), [0, -15]), {}) +cnt: 2, ((T([32768, 8, 24], f16), [0, -1]), {}) +cnt: 1, ((T([128, 384, 20, 20], f16), [-2, -2, -2, -2]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([64, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 4), {}) +cnt: 2, ((T([128, 1, 64], f16), T([1, 1, 3], f16), None, [1], [1], [1], False, [0], 1), {}) +cnt: 3, ((T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 2, ((T([128, 1, 128], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 1, 256], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([384, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([640, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([640, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), T([128, 512, 8, 8], f16), T([640, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 8, 8], f16), T([128, 512, 8, 8], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 8, 8], f16), T([128, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 1024, 16, 16], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), T([128, 512, 16, 16], f16), T([640, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 8, 8], f16), T([128, 512, 16, 16], f16), T([128, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 16, 16], f16), T([128, 256, 16, 16], f16), T([384, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 16, 16], f16), T([128, 256, 16, 16], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 16, 16], f16), T([128, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1, 256], f16), T([128, 1, 256], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 32, 32], f16), T([256, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 512, 32, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 128, 32, 32], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1, 128], f16), T([128, 1, 128], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 512, 32, 32], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 32, 32], f16), T([128, 256, 64, 64], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 32, 32], f16), T([128, 128, 64, 64], f16), T([128, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 256, 64, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 1, 64], f16), T([128, 1, 64], f16), T([1, 1, 3], f16), [0], [1], [1], [1], False, [0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 4, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 32, 128, 128], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 24, 128, 128], f16), T([32, 24, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 3, 256, 256], f16), T([24, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([128, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +cnt: 1, ((T([128, 256, 16, 16], f16, stride=(256, 1, 0, 0)), 256), {}) +cnt: 2, ((T([128, 128, 32, 32], f16, stride=(128, 1, 0, 0)), 1024), {}) +cnt: 2, ((T([128, 64, 64, 64], f16, stride=(64, 1, 0, 0)), 4096), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 64, 64], f16), T([128, 64, 128, 128], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 64, 64], i64)), {}) +Operator: aten.mean.dim +cnt: 2, ((T([128, 64, 64, 64], f16), [2, 3]), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), [2, 3]), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), [2, 3]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 2, ((T([262144, 16], f16), T([16, 23], f16, stride=(1, 16))), {}) +cnt: 4, ((T([65536, 16], f16), T([16, 23], f16, stride=(1, 16))), {}) +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +cnt: 4, ((T([23, 65536], f16, stride=(1, 23)), T([65536, 16], f16)), {}) +cnt: 4, ((T([65536, 23], f16), T([23, 16], f16)), {}) +cnt: 2, ((T([23, 262144], f16, stride=(1, 23)), T([262144, 16], f16)), {}) +cnt: 2, ((T([262144, 23], f16), T([23, 16], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16, stride=(64, 1, 0, 0))), {}) +cnt: 4, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16, stride=(128, 1, 0, 0))), {}) +cnt: 2, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16, stride=(256, 1, 0, 0))), {}) +cnt: 2, ((T([1024, 4, 64, 144], f16), 0.25), {}) +cnt: 2, ((T([1024, 4, 16, 144], f16), 0.25), {}) +cnt: 2, ((T([1024, 1, 64, 144], f16), 0.25), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sigmoid.default +cnt: 2, ((T([128, 1, 64], f16),), {}) +cnt: 2, ((T([128, 1, 128], f16),), {}) +cnt: 1, ((T([128, 1, 256], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([128, 1, 256], f16), T([128, 1, 256], f16)), {}) +cnt: 2, ((T([128, 1, 128], f16), T([128, 1, 128], f16)), {}) +cnt: 2, ((T([128, 1, 64], f16), T([128, 1, 64], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([128, 24, 128, 128], f16),), {}) +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 1, ((T([128, 64, 128, 128], f16),), {}) +cnt: 4, ((T([128, 64, 64, 64], f16),), {}) +cnt: 2, ((T([128, 256, 64, 64], f16),), {}) +cnt: 1, ((T([128, 128, 64, 64], f16),), {}) +cnt: 3, ((T([128, 128, 32, 32], f16),), {}) +cnt: 2, ((T([128, 512, 32, 32], f16),), {}) +cnt: 1, ((T([128, 256, 32, 32], f16),), {}) +cnt: 3, ((T([128, 256, 16, 16], f16),), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([128, 512, 16, 16], f16),), {}) +cnt: 3, ((T([128, 512, 8, 8], f16),), {}) +cnt: 2, ((T([128, 2048, 8, 8], f16),), {}) +Operator: aten.silu_backward.default +cnt: 2, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 3, ((T([128, 512, 8, 8], f16), T([128, 512, 8, 8], f16)), {}) +cnt: 1, ((T([128, 512, 16, 16], f16), T([128, 512, 16, 16], f16)), {}) +cnt: 2, ((T([128, 1024, 16, 16], f16), T([128, 1024, 16, 16], f16)), {}) +cnt: 3, ((T([128, 256, 16, 16], f16), T([128, 256, 16, 16], f16)), {}) +cnt: 1, ((T([128, 256, 32, 32], f16), T([128, 256, 32, 32], f16)), {}) +cnt: 2, ((T([128, 512, 32, 32], f16), T([128, 512, 32, 32], f16)), {}) +cnt: 3, ((T([128, 128, 32, 32], f16), T([128, 128, 32, 32], f16)), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16)), {}) +cnt: 2, ((T([128, 256, 64, 64], f16), T([128, 256, 64, 64], f16)), {}) +cnt: 4, ((T([128, 64, 64, 64], f16), T([128, 64, 64, 64], f16)), {}) +cnt: 1, ((T([128, 64, 128, 128], f16), T([128, 64, 128, 128], f16)), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16)), {}) +cnt: 1, ((T([128, 24, 128, 128], f16), T([128, 24, 128, 128], f16)), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([8192, 8, 12], f16), [8192, 8, 23], 2, 11, 9223372036854775807, 1), {}) +cnt: 2, ((T([8192, 8, 23], f16), [8192, 9, 23], 1, 0, 8, 1), {}) +cnt: 2, ((T([8192, 9, 23], f16), [8192, 9, 23], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([16384, 4, 12], f16), [16384, 4, 23], 2, 11, 9223372036854775807, 1), {}) +cnt: 2, ((T([16384, 4, 23], f16), [16384, 5, 23], 1, 0, 4, 1), {}) +cnt: 2, ((T([16384, 5, 23], f16), [16384, 5, 23], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([32768, 8, 12], f16), [32768, 8, 23], 2, 11, 9223372036854775807, 1), {}) +cnt: 2, ((T([32768, 8, 23], f16), [32768, 9, 23], 1, 0, 8, 1), {}) +cnt: 2, ((T([32768, 9, 23], f16), [32768, 9, 23], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([1024, 4, 144, 48], f16, stride=(27648, 144, 1, 576)), [16, 32], -1), {}) +cnt: 1, ((T([1024, 4, 144, 80], f16, stride=(46080, 144, 1, 576)), [16, 64], -1), {}) +cnt: 1, ((T([1024, 1, 144, 80], f16, stride=(11520, 144, 1, 144)), [16, 64], -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([1024, 8, 12, 8, 12], f16, stride=(9216, 144, 1, 1152, 12)), [2], True), {}) +cnt: 1, ((T([1024, 8, 12, 8, 12], f16, stride=(9216, 1152, 12, 144, 1)), [2], True), {}) +cnt: 1, ((T([4096, 4, 12, 4, 12], f16, stride=(2304, 144, 1, 576, 12)), [2], True), {}) +cnt: 1, ((T([4096, 4, 12, 4, 12], f16, stride=(2304, 576, 12, 144, 1)), [2], True), {}) +cnt: 1, ((T([4096, 8, 12, 8, 12], f16, stride=(9216, 144, 1, 1152, 12)), [2], True), {}) +cnt: 1, ((T([4096, 8, 12, 8, 12], f16, stride=(9216, 1152, 12, 144, 1)), [2], True), {}) +cnt: 1, ((T([128, 256, 16, 16], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 128, 32, 32], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 64, 64, 64], f16), [2, 3], True), {}) +Operator: aten.unfold_backward.default +cnt: 1, ((T([128, 640, 1, 1, 12, 12], f16), [128, 640, 1, 12, 12], 3, 12, 8), {}) +cnt: 1, ((T([128, 640, 1, 12, 12], f16), [128, 640, 12, 12], 2, 12, 8), {}) +cnt: 1, ((T([128, 640, 2, 2, 12, 12], f16), [128, 640, 2, 20, 12], 3, 12, 8), {}) +cnt: 1, ((T([128, 640, 2, 20, 12], f16), [128, 640, 20, 20], 2, 12, 8), {}) +cnt: 1, ((T([128, 384, 2, 2, 12, 12], f16), [128, 384, 2, 20, 12], 3, 12, 8), {}) +cnt: 1, ((T([128, 384, 2, 20, 12], f16), [128, 384, 20, 20], 2, 12, 8), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ecaresnet101d_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ecaresnet101d_training.txt new file mode 100644 index 000000000..21e66cff1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ecaresnet101d_training.txt @@ -0,0 +1,195 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 5, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16)), {}) +cnt: 46, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16)), {}) +cnt: 8, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16)), {}) +cnt: 6, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16)), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 106, ((T([], i64), 1), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16)), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16)), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([64, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([64, 1024, 7, 7], f16), T([64, 1024, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 1, 256], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 2, ((T([64, 256, 56, 56], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 128, 28, 28], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 1, 512], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 28, 28], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 128, 28, 28], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 23, ((T([64, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 23, ((T([64, 1, 1024], f16), T([1, 1, 5], f16), None, [1], [2], [1], False, [0], 1), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([64, 1024, 14, 14], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([64, 256, 14, 14], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 1, 2048], f16), T([1, 1, 7], f16), None, [1], [3], [1], False, [0], 1), {}) +cnt: 1, ((T([64, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 2048, 7, 7], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 512, 7, 7], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([64, 1, 2048], f16), T([64, 1, 2048], f16), T([1, 1, 7], f16), [0], [1], [3], [1], False, [0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 512, 7, 7], f16), T([64, 2048, 7, 7], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 2048, 7, 7], f16), T([64, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([64, 1, 1024], f16), T([64, 1, 1024], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([64, 256, 14, 14], f16), T([64, 1024, 14, 14], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([64, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 14, 14], f16), T([64, 256, 28, 28], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 1, 512], f16), T([64, 1, 512], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 128, 28, 28], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 128, 28, 28], f16), T([64, 512, 28, 28], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([64, 256, 28, 28], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 28, 28], f16), T([64, 128, 56, 56], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1, 256], f16), T([64, 1, 256], f16), T([1, 1, 5], f16), [0], [1], [2], [1], False, [0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 256, 56, 56], f16), T([64, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 64, 56, 56], f16), T([64, 256, 56, 56], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 32, 112, 112], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 4, ((T([64, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16, stride=(1024, 1, 0, 0)), 196), {}) +cnt: 4, ((T([64, 512, 28, 28], f16, stride=(512, 1, 0, 0)), 784), {}) +cnt: 3, ((T([64, 256, 56, 56], f16, stride=(256, 1, 0, 0)), 3136), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([64, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 3, ((T([64, 256, 56, 56], f16), [2, 3]), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), [2, 3]), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), [2, 3]), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), [2, 3]), {}) +cnt: 1, ((T([64, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 2048], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16, stride=(256, 1, 0, 0))), {}) +cnt: 8, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16, stride=(512, 1, 0, 0))), {}) +cnt: 46, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16, stride=(1024, 1, 0, 0))), {}) +cnt: 6, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16, stride=(2048, 1, 0, 0))), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16)), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16)), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16)), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 45, ((T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 24, ((T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 24, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 45, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([64, 32, 112, 112], f16),), {}) +cnt: 1, ((T([64, 64, 112, 112], f16),), {}) +cnt: 6, ((T([64, 64, 56, 56], f16),), {}) +cnt: 3, ((T([64, 256, 56, 56], f16),), {}) +cnt: 1, ((T([64, 128, 56, 56], f16),), {}) +cnt: 7, ((T([64, 128, 28, 28], f16),), {}) +cnt: 4, ((T([64, 512, 28, 28], f16),), {}) +cnt: 1, ((T([64, 256, 28, 28], f16),), {}) +cnt: 45, ((T([64, 256, 14, 14], f16),), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([64, 512, 14, 14], f16),), {}) +cnt: 5, ((T([64, 512, 7, 7], f16),), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 3, ((T([64, 1, 256], f16),), {}) +cnt: 4, ((T([64, 1, 512], f16),), {}) +cnt: 23, ((T([64, 1, 1024], f16),), {}) +cnt: 3, ((T([64, 1, 2048], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 3, ((T([64, 1, 2048], f16), T([64, 1, 2048], f16)), {}) +cnt: 23, ((T([64, 1, 1024], f16), T([64, 1, 1024], f16)), {}) +cnt: 4, ((T([64, 1, 512], f16), T([64, 1, 512], f16)), {}) +cnt: 3, ((T([64, 1, 256], f16), T([64, 1, 256], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), [2, 3], True), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), [2, 3], True), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), 0), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), 0), {}) +cnt: 45, ((T([64, 256, 14, 14], f16), T([64, 256, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 28, 28], f16), 0), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), 0), {}) +cnt: 7, ((T([64, 128, 28, 28], f16), T([64, 128, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 56, 56], f16), 0), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), 0), {}) +cnt: 6, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), 0), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ese_vovnet19b_dw_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ese_vovnet19b_dw_training.txt new file mode 100644 index 000000000..f81cd27ec --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ese_vovnet19b_dw_training.txt @@ -0,0 +1,182 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 23, ((T([], i64), 1), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16)), {}) +cnt: 2, ((T([128, 224, 7, 7], f16, stride=(70560, 49, 7, 1)), T([128, 224, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 7, 7], f16, stride=(70560, 49, 7, 1)), T([128, 768, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16)), {}) +cnt: 2, ((T([128, 192, 14, 14], f16, stride=(213248, 196, 14, 1)), T([128, 192, 14, 14], f16)), {}) +cnt: 1, ((T([128, 512, 14, 14], f16, stride=(213248, 196, 14, 1)), T([128, 512, 14, 14], f16)), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 2, ((T([128, 160, 28, 28], f16, stride=(577024, 784, 28, 1)), T([128, 160, 28, 28], f16)), {}) +cnt: 1, ((T([128, 256, 28, 28], f16, stride=(577024, 784, 28, 1)), T([128, 256, 28, 28], f16)), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 2, ((T([128, 128, 56, 56], f16, stride=(1404928, 3136, 56, 1)), T([128, 128, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16, stride=(1404928, 3136, 56, 1)), T([128, 64, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 56, 56], f16), T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16)], 1), {}) +cnt: 1, (([T([128, 256, 28, 28], f16), T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 512, 14, 14], f16), T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 768, 7, 7], f16), T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 448, 56, 56], f16), T([256, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([256, 256, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([160, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 160, 28, 28], f16), T([160, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 160), {}) +cnt: 3, ((T([128, 160, 28, 28], f16), T([160, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 736, 28, 28], f16), T([512, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 1, 1], f16), T([512, 512, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([192, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([192, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([192, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1088, 14, 14], f16), T([768, 1088, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([768, 768, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([224, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 224, 7, 7], f16), T([224, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 224), {}) +cnt: 3, ((T([128, 224, 7, 7], f16), T([224, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1440, 7, 7], f16), T([1024, 1440, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 1, 1], f16), T([1024, 1024, 1, 1], f16), T([1024], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1024, 1, 1], f16), T([128, 1024, 1, 1], f16), T([1024, 1024, 1, 1], f16), [1024], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1440, 7, 7], f16), T([1024, 1440, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), T([224, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), T([224, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 224, [True, True, False]), {}) +cnt: 1, ((T([128, 224, 7, 7], f16), T([128, 768, 7, 7], f16), T([224, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([128, 768, 1, 1], f16), T([768, 768, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 1088, 14, 14], f16), T([768, 1088, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 512, 14, 14], f16), T([192, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16), T([512, 512, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 736, 28, 28], f16), T([512, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16), T([160, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16), T([160, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 160, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 28, 28], f16), T([128, 256, 28, 28], f16), T([160, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), T([256, 256, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 448, 56, 56], f16), T([256, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 2, ((T([128, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 768, 14, 14], f16, stride=(768, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 512, 28, 28], f16, stride=(512, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 256, 56, 56], f16, stride=(256, 1, 0, 0)), 3136), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([128, 256, 1, 1], f16),), {}) +cnt: 1, ((T([128, 512, 1, 1], f16),), {}) +cnt: 1, ((T([128, 768, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1024, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 1, ((T([128, 1024, 1, 1], f16), T([128, 1024, 1, 1], f16)), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([128, 768, 1, 1], f16)), {}) +cnt: 1, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 256, 56, 56], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 14, 14], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([128, 768, 7, 7], i64)), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 512, 28, 28], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([128, 512, 14, 14], i64)), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 256, 56, 56], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([128, 256, 28, 28], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 256, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1024], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 2, ((T([128, 768, 14, 14], f16), T([128, 768, 1, 1], f16)), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([128, 1024, 1, 1], f16)), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16)), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([128, 64, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 56, 56], f16),), {}) +cnt: 4, ((T([128, 128, 56, 56], f16),), {}) +cnt: 1, ((T([128, 256, 56, 56], f16),), {}) +cnt: 4, ((T([128, 160, 28, 28], f16),), {}) +cnt: 1, ((T([128, 512, 28, 28], f16),), {}) +cnt: 4, ((T([128, 192, 14, 14], f16),), {}) +cnt: 1, ((T([128, 768, 14, 14], f16),), {}) +cnt: 4, ((T([128, 224, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 224, 7, 7], f16, stride=(70560, 49, 7, 1)), T([128, 224, 7, 7], f16), 0), {}) +cnt: 3, ((T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 192, 14, 14], f16, stride=(213248, 196, 14, 1)), T([128, 192, 14, 14], f16), 0), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 160, 28, 28], f16, stride=(577024, 784, 28, 1)), T([128, 160, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 160, 28, 28], f16), T([128, 160, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 128, 56, 56], f16, stride=(1404928, 3136, 56, 1)), T([128, 128, 56, 56], f16), 0), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), 0), {}) +cnt: 2, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetc_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetc_100_training.txt new file mode 100644 index 000000000..4be2a0309 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetc_100_training.txt @@ -0,0 +1,189 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 65, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 6, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16)), {}) +cnt: 6, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16)), {}) +cnt: 6, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 6, ((T([128, 184, 7, 7], f16), T([128, 184, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1984], f16), T([1984, 1000], f16, stride=(1, 1984))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([24, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([24, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 24), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([32, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([96, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 28, 28], f16), T([96, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 28, 28], f16), T([32, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 32, 28, 28], f16), T([192, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([192, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 192), {}) +cnt: 2, ((T([128, 192, 28, 28], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([192, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 192), {}) +cnt: 2, ((T([128, 192, 14, 14], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([192, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([192, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 192), {}) +cnt: 3, ((T([128, 64, 14, 14], f16), T([384, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 384, 14, 14], f16), T([384, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 384), {}) +cnt: 2, ((T([128, 384, 14, 14], f16), T([64, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([112, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([336, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 336, 14, 14], f16), T([336, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 336), {}) +cnt: 1, ((T([128, 336, 14, 14], f16), T([112, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([184, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 184, 7, 7], f16), T([1104, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 1104, 7, 7], f16), T([1104, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1104), {}) +cnt: 3, ((T([128, 1104, 7, 7], f16), T([184, 1104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([1104, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1104), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([352, 1104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 352, 7, 7], f16), T([1984, 352, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1984, 7, 7], f16), T([128, 352, 7, 7], f16), T([1984, 352, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 352, 7, 7], f16), T([128, 1104, 7, 7], f16), T([352, 1104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), T([1104, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1104, [True, True, False]), {}) +cnt: 4, ((T([128, 1104, 7, 7], f16), T([128, 184, 7, 7], f16), T([1104, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 184, 7, 7], f16), T([128, 1104, 7, 7], f16), T([184, 1104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), T([1104, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1104, [True, True, False]), {}) +cnt: 1, ((T([128, 184, 7, 7], f16), T([128, 672, 7, 7], f16), T([184, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 336, 14, 14], f16), T([112, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), T([336, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 1, ((T([128, 336, 14, 14], f16), T([128, 112, 14, 14], f16), T([336, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 384, 14, 14], f16), T([112, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 384, [True, True, False]), {}) +cnt: 3, ((T([128, 384, 14, 14], f16), T([128, 64, 14, 14], f16), T([384, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 14, 14], f16), T([128, 384, 14, 14], f16), T([64, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 14, 14], f16), T([128, 192, 14, 14], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 64, 14, 14], f16), T([192, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 192, 28, 28], f16), T([192, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([128, 32, 28, 28], f16), T([192, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 32, 28, 28], f16), T([128, 192, 28, 28], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 96, 28, 28], f16), T([32, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 28, 28], f16), T([128, 96, 28, 28], f16), T([96, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 28, 28], f16), T([128, 32, 28, 28], f16), T([96, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 144, 28, 28], f16), T([32, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 56, 56], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 24, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1984, 7, 7], f16, stride=(1984, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1984, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1984], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1984], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 4, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 96, 28, 28], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 184, 7, 7], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 1104, 7, 7], f16), T([1104], f16), T([1104], f16), T([1104], f16), T([1104], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 352, 7, 7], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1984, 7, 7], f16), T([1984], f16), T([1984], f16), T([1984], f16), T([1984], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1984, 7, 7], f16), T([128, 1984, 7, 7], f16), T([1984], f16), T([1984], f16), T([1984], f16), T([1984], f32), T([1984], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 352, 7, 7], f16), T([128, 352, 7, 7], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f32), T([352], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), T([1104], f16), T([1104], f16), T([1104], f16), T([1104], f32), T([1104], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 184, 7, 7], f16), T([128, 184, 7, 7], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f32), T([184], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 96, 28, 28], f16), T([128, 96, 28, 28], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 3, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 56, 56], f16),), {}) +cnt: 4, ((T([128, 24, 56, 56], f16),), {}) +cnt: 1, ((T([128, 144, 56, 56], f16),), {}) +cnt: 1, ((T([128, 144, 28, 28], f16),), {}) +cnt: 2, ((T([128, 96, 28, 28], f16),), {}) +cnt: 5, ((T([128, 192, 28, 28], f16),), {}) +cnt: 3, ((T([128, 192, 14, 14], f16),), {}) +cnt: 6, ((T([128, 384, 14, 14], f16),), {}) +cnt: 5, ((T([128, 672, 14, 14], f16),), {}) +cnt: 2, ((T([128, 336, 14, 14], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 8, ((T([128, 1104, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1984, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1984, 7, 7], f16), T([128, 1984, 7, 7], f16), 0), {}) +cnt: 8, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), 0), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), 0), {}) +cnt: 6, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), 0), {}) +cnt: 3, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), 0), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), 0), {}) +cnt: 2, ((T([128, 96, 28, 28], f16), T([128, 96, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), 0), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), 0), {}) +cnt: 3, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetv3_b_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetv3_b_training.txt new file mode 100644 index 000000000..85ee90a54 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/fbnetv3_b_training.txt @@ -0,0 +1,287 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 87, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 6, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 8, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 8, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16)), {}) +cnt: 10, ((T([128, 120, 14, 14], f16), T([128, 120, 14, 14], f16)), {}) +cnt: 10, ((T([128, 184, 7, 7], f16), T([128, 184, 7, 7], f16)), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16)), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([128, 736, 7, 7], f16)), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([128, 720, 7, 7], f16)), {}) +cnt: 6, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16)), {}) +cnt: 5, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1984], f16), T([1984, 1000], f16, stride=(1, 1984))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 3, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 56, 56], f16),), {}) +cnt: 6, ((T([128, 48, 56, 56], f16),), {}) +cnt: 1, ((T([128, 120, 56, 56], f16),), {}) +cnt: 9, ((T([128, 120, 28, 28], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 4, ((T([128, 16, 1, 1], f16),), {}) +cnt: 1, ((T([128, 200, 28, 28], f16),), {}) +cnt: 1, ((T([128, 200, 14, 14], f16),), {}) +cnt: 8, ((T([128, 216, 14, 14], f16),), {}) +cnt: 12, ((T([128, 360, 14, 14], f16),), {}) +cnt: 1, ((T([128, 24, 1, 1], f16),), {}) +cnt: 6, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 720, 14, 14], f16),), {}) +cnt: 1, ((T([128, 720, 7, 7], f16),), {}) +cnt: 10, ((T([128, 736, 7, 7], f16),), {}) +cnt: 6, ((T([128, 48, 1, 1], f16),), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1984, 1, 1], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([64, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([24, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([48, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 48, 56, 56], f16), T([48, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 48), {}) +cnt: 3, ((T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([120, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 56, 56], f16), T([120, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([8, 120, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([120, 8, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 4, ((T([128, 120, 1, 1], f16), T([16, 120, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 16, 1, 1], f16), T([120, 16, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([200, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 200, 28, 28], f16), T([200, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 200), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([72, 200, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 72, 14, 14], f16), T([216, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 216, 14, 14], f16), T([216, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 216), {}) +cnt: 4, ((T([128, 216, 14, 14], f16), T([72, 216, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([360, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 360, 14, 14], f16), T([360, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 360), {}) +cnt: 1, ((T([128, 360, 1, 1], f16), T([24, 360, 1, 1], f16), T([24], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([360, 24, 1, 1], f16), T([360], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 360, 14, 14], f16), T([120, 360, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 120, 14, 14], f16), T([360, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 360, 14, 14], f16), T([360, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 360), {}) +cnt: 5, ((T([128, 360, 1, 1], f16), T([32, 360, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 32, 1, 1], f16), T([360, 32, 1, 1], f16), T([360], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 14, 14], f16), T([720, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 720, 14, 14], f16), T([720, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 720), {}) +cnt: 1, ((T([128, 720, 1, 1], f16), T([32, 720, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([720, 32, 1, 1], f16), T([720], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([184, 720, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 184, 7, 7], f16), T([736, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([736, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 736), {}) +cnt: 5, ((T([128, 736, 1, 1], f16), T([48, 736, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 48, 1, 1], f16), T([736, 48, 1, 1], f16), T([736], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([184, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 184, 7, 7], f16), T([1104, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([1104, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1104), {}) +cnt: 1, ((T([128, 1104, 1, 1], f16), T([48, 1104, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 48, 1, 1], f16), T([1104, 48, 1, 1], f16), T([1104], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([224, 1104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 224, 7, 7], f16), T([1344, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1344, 1, 1], f16), T([1984, 1344, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1984, 1, 1], f16), T([128, 1344, 1, 1], f16), T([1984, 1344, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16), T([128, 224, 7, 7], f16), T([1344, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 224, 7, 7], f16), T([128, 1104, 7, 7], f16), T([224, 1104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1104, 1, 1], f16), T([128, 48, 1, 1], f16), T([1104, 48, 1, 1], f16), [1104], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 1, 1], f16), T([128, 1104, 1, 1], f16), T([48, 1104, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), T([1104, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1104, [True, True, False]), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([128, 184, 7, 7], f16), T([1104, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 184, 7, 7], f16), T([128, 736, 7, 7], f16), T([184, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 736, 1, 1], f16), T([128, 48, 1, 1], f16), T([736, 48, 1, 1], f16), [736], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 48, 1, 1], f16), T([128, 736, 1, 1], f16), T([48, 736, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([128, 736, 7, 7], f16), T([736, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 736, [True, True, False]), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([128, 184, 7, 7], f16), T([736, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 184, 7, 7], f16), T([128, 720, 7, 7], f16), T([184, 720, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 720, 1, 1], f16), T([128, 32, 1, 1], f16), T([720, 32, 1, 1], f16), [720], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 720, 1, 1], f16), T([32, 720, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([128, 720, 14, 14], f16), T([720, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 720, [True, True, False]), {}) +cnt: 1, ((T([128, 720, 14, 14], f16), T([128, 120, 14, 14], f16), T([720, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 120, 14, 14], f16), T([128, 360, 14, 14], f16), T([120, 360, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 360, 1, 1], f16), T([128, 32, 1, 1], f16), T([360, 32, 1, 1], f16), [360], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 32, 1, 1], f16), T([128, 360, 1, 1], f16), T([32, 360, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16), T([360, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 360, [True, True, False]), {}) +cnt: 5, ((T([128, 360, 14, 14], f16), T([128, 120, 14, 14], f16), T([360, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 360, 1, 1], f16), T([128, 24, 1, 1], f16), T([360, 24, 1, 1], f16), [360], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 360, 1, 1], f16), T([24, 360, 1, 1], f16), [24], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16), T([360, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 360, [True, True, False]), {}) +cnt: 1, ((T([128, 360, 14, 14], f16), T([128, 72, 14, 14], f16), T([360, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 72, 14, 14], f16), T([128, 216, 14, 14], f16), T([72, 216, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 216, 14, 14], f16), T([128, 216, 14, 14], f16), T([216, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 216, [True, True, False]), {}) +cnt: 4, ((T([128, 216, 14, 14], f16), T([128, 72, 14, 14], f16), T([216, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([128, 200, 14, 14], f16), T([72, 200, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([128, 200, 28, 28], f16), T([200, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 200, [True, True, False]), {}) +cnt: 1, ((T([128, 200, 28, 28], f16), T([128, 40, 28, 28], f16), T([200, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 40, 28, 28], f16), T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 120, 1, 1], f16), T([128, 16, 1, 1], f16), T([120, 16, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 16, 1, 1], f16), T([128, 120, 1, 1], f16), T([16, 120, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 8, 1, 1], f16), T([120, 8, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 120, 1, 1], f16), T([8, 120, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 120, 56, 56], f16), T([120, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 1, ((T([128, 120, 56, 56], f16), T([128, 24, 56, 56], f16), T([120, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 3, ((T([128, 48, 56, 56], f16), T([128, 24, 56, 56], f16), T([48, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 64, 56, 56], f16), T([24, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), T([64, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 16, 112, 112], f16), T([64, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1344, 7, 7], f16, stride=(1344, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16, stride=(1104, 1, 0, 0)), 49), {}) +cnt: 5, ((T([128, 736, 7, 7], f16, stride=(736, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 720, 7, 7], f16, stride=(720, 1, 0, 0)), 49), {}) +cnt: 6, ((T([128, 360, 14, 14], f16, stride=(360, 1, 0, 0)), 196), {}) +cnt: 5, ((T([128, 120, 28, 28], f16, stride=(120, 1, 0, 0)), 784), {}) +Operator: aten.hardsigmoid.default +cnt: 5, ((T([128, 120, 1, 1], f16),), {}) +cnt: 6, ((T([128, 360, 1, 1], f16),), {}) +cnt: 1, ((T([128, 720, 1, 1], f16),), {}) +cnt: 5, ((T([128, 736, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1104, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 1, ((T([128, 1104, 1, 1], f16), T([128, 1104, 1, 1], f16)), {}) +cnt: 5, ((T([128, 736, 1, 1], f16), T([128, 736, 1, 1], f16)), {}) +cnt: 1, ((T([128, 720, 1, 1], f16), T([128, 720, 1, 1], f16)), {}) +cnt: 6, ((T([128, 360, 1, 1], f16), T([128, 360, 1, 1], f16)), {}) +cnt: 5, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16)), {}) +Operator: aten.hardswish_.default +cnt: 3, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 56, 56], f16),), {}) +cnt: 6, ((T([128, 48, 56, 56], f16),), {}) +cnt: 1, ((T([128, 120, 56, 56], f16),), {}) +cnt: 9, ((T([128, 120, 28, 28], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 4, ((T([128, 16, 1, 1], f16),), {}) +cnt: 1, ((T([128, 200, 28, 28], f16),), {}) +cnt: 1, ((T([128, 200, 14, 14], f16),), {}) +cnt: 8, ((T([128, 216, 14, 14], f16),), {}) +cnt: 12, ((T([128, 360, 14, 14], f16),), {}) +cnt: 1, ((T([128, 24, 1, 1], f16),), {}) +cnt: 6, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 720, 14, 14], f16),), {}) +cnt: 1, ((T([128, 720, 7, 7], f16),), {}) +cnt: 10, ((T([128, 736, 7, 7], f16),), {}) +cnt: 6, ((T([128, 48, 1, 1], f16),), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1984, 1, 1], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 1, ((T([128, 1984, 1, 1], f16), T([128, 1984, 1, 1], f16)), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16), T([128, 1344, 7, 7], f16)), {}) +cnt: 6, ((T([128, 48, 1, 1], f16), T([128, 48, 1, 1], f16)), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16)), {}) +cnt: 10, ((T([128, 736, 7, 7], f16), T([128, 736, 7, 7], f16)), {}) +cnt: 6, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16)), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([128, 720, 7, 7], f16)), {}) +cnt: 1, ((T([128, 720, 14, 14], f16), T([128, 720, 14, 14], f16)), {}) +cnt: 12, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16)), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 24, 1, 1], f16)), {}) +cnt: 8, ((T([128, 216, 14, 14], f16), T([128, 216, 14, 14], f16)), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([128, 200, 14, 14], f16)), {}) +cnt: 1, ((T([128, 200, 28, 28], f16), T([128, 200, 28, 28], f16)), {}) +cnt: 4, ((T([128, 16, 1, 1], f16), T([128, 16, 1, 1], f16)), {}) +cnt: 9, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 8, 1, 1], f16)), {}) +cnt: 1, ((T([128, 120, 56, 56], f16), T([128, 120, 56, 56], f16)), {}) +cnt: 6, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16)), {}) +cnt: 3, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 5, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 360, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), [2, 3], True), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1984], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1984], f16)), {}) +Operator: aten.mul.Tensor +cnt: 10, ((T([128, 120, 28, 28], f16), T([128, 120, 1, 1], f16)), {}) +cnt: 12, ((T([128, 360, 14, 14], f16), T([128, 360, 1, 1], f16)), {}) +cnt: 2, ((T([128, 720, 7, 7], f16), T([128, 720, 1, 1], f16)), {}) +cnt: 10, ((T([128, 736, 7, 7], f16), T([128, 736, 1, 1], f16)), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16), T([128, 1104, 1, 1], f16)), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16)), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), T([128, 736, 7, 7], f16)), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([128, 720, 7, 7], f16)), {}) +cnt: 6, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16)), {}) +cnt: 5, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 5, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 200, 28, 28], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 216, 14, 14], f16), T([216], f16), T([216], f16), T([216], f16), T([216], f16), True, 0.1, 1e-05), {}) +cnt: 12, ((T([128, 360, 14, 14], f16), T([360], f16), T([360], f16), T([360], f16), T([360], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 120, 14, 14], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 720, 14, 14], f16), T([720], f16), T([720], f16), T([720], f16), T([720], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([720], f16), T([720], f16), T([720], f16), T([720], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 184, 7, 7], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16), T([1104], f16), T([1104], f16), T([1104], f16), T([1104], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1344, 7, 7], f16), T([1344], f16), T([1344], f16), T([1344], f16), T([1344], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1344, 7, 7], f16), T([128, 1344, 7, 7], f16), T([1344], f16), T([1344], f16), T([1344], f16), T([1344], f32), T([1344], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 224, 7, 7], f16), T([128, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 1104, 7, 7], f16), T([128, 1104, 7, 7], f16), T([1104], f16), T([1104], f16), T([1104], f16), T([1104], f32), T([1104], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 184, 7, 7], f16), T([128, 184, 7, 7], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f32), T([184], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([128, 736, 7, 7], f16), T([128, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f32), T([736], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), T([128, 720, 7, 7], f16), T([720], f16), T([720], f16), T([720], f16), T([720], f32), T([720], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 720, 14, 14], f16), T([128, 720, 14, 14], f16), T([720], f16), T([720], f16), T([720], f16), T([720], f32), T([720], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 120, 14, 14], f16), T([128, 120, 14, 14], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 12, ((T([128, 360, 14, 14], f16), T([128, 360, 14, 14], f16), T([360], f16), T([360], f16), T([360], f16), T([360], f32), T([360], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 216, 14, 14], f16), T([128, 216, 14, 14], f16), T([216], f16), T([216], f16), T([216], f16), T([216], f32), T([216], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([128, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f32), T([200], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 200, 28, 28], f16), T([128, 200, 28, 28], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f32), T([200], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 120, 56, 56], f16), T([128, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 1104, 7, 7], f16), [2, 3], True), {}) +cnt: 5, ((T([128, 736, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 720, 7, 7], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 360, 14, 14], f16), [2, 3], True), {}) +cnt: 5, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gernet_l_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gernet_l_training.txt new file mode 100644 index 000000000..1efcbbfec --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gernet_l_training.txt @@ -0,0 +1,118 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 57, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16)), {}) +cnt: 4, ((T([128, 192, 32, 32], f16), T([128, 192, 32, 32], f16)), {}) +cnt: 12, ((T([128, 640, 16, 16], f16), T([128, 640, 16, 16], f16)), {}) +cnt: 17, ((T([128, 640, 8, 8], f16), T([128, 640, 8, 8], f16)), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2560], f16), T([2560, 1000], f16, stride=(1, 2560))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 256, 256], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([192, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 192, 32, 32], f16), T([192, 192, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([192, 128, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 32, 32], f16), T([160, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 160, 32, 32], f16), T([160, 160, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 160, 16, 16], f16), T([640, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 32, 32], f16), T([640, 192, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 640, 16, 16], f16), T([160, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 160, 16, 16], f16), T([160, 160, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), T([1920, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16), T([1920, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1920), {}) +cnt: 9, ((T([128, 1920, 8, 8], f16), T([640, 1920, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), T([640, 640, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([128, 640, 8, 8], f16), T([1920, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([128, 1920, 8, 8], f16), T([1920, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1920), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), T([2560, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 2560, 8, 8], f16), T([128, 640, 8, 8], f16), T([2560, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 9, ((T([128, 640, 8, 8], f16), T([128, 1920, 8, 8], f16), T([640, 1920, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([128, 1920, 8, 8], f16), T([128, 1920, 8, 8], f16), T([1920, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1920, [True, True, False]), {}) +cnt: 8, ((T([128, 1920, 8, 8], f16), T([128, 640, 8, 8], f16), T([1920, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 8, 8], f16), T([128, 640, 16, 16], f16), T([640, 640, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1920, 8, 8], f16), T([128, 1920, 16, 16], f16), T([1920, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1920, [True, True, False]), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16), T([128, 640, 16, 16], f16), T([1920, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 640, 16, 16], f16), T([128, 160, 16, 16], f16), T([640, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 160, 16, 16], f16), T([128, 160, 16, 16], f16), T([160, 160, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 160, 16, 16], f16), T([128, 640, 16, 16], f16), T([160, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 640, 16, 16], f16), T([128, 192, 32, 32], f16), T([640, 192, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 16, 16], f16), T([128, 160, 32, 32], f16), T([160, 160, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 32, 32], f16), T([128, 192, 32, 32], f16), T([160, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 32, 32], f16), T([128, 192, 32, 32], f16), T([192, 192, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 32, 32], f16), T([128, 128, 64, 64], f16), T([192, 128, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 32, 32], f16), T([128, 128, 64, 64], f16), T([192, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 32, 128, 128], f16), T([128, 32, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 64, 64], f16), T([128, 32, 128, 128], f16), T([128, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 3, 256, 256], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 256, 256], f16), T([128, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2560, 8, 8], f16, stride=(2560, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2560, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2560], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2560], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 192, 32, 32], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 160, 32, 32], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 11, ((T([128, 160, 16, 16], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 640, 16, 16], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f16), True, 0.1, 1e-05), {}) +cnt: 17, ((T([128, 1920, 8, 8], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 640, 8, 8], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 2560, 8, 8], f16), T([2560], f16), T([2560], f16), T([2560], f16), T([2560], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 2560, 8, 8], f16), T([128, 2560, 8, 8], f16), T([2560], f16), T([2560], f16), T([2560], f16), T([2560], f32), T([2560], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([128, 640, 8, 8], f16), T([128, 640, 8, 8], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 17, ((T([128, 1920, 8, 8], f16), T([128, 1920, 8, 8], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f32), T([1920], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16), T([128, 1920, 16, 16], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f32), T([1920], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 640, 16, 16], f16), T([128, 640, 16, 16], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 11, ((T([128, 160, 16, 16], f16), T([128, 160, 16, 16], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 160, 32, 32], f16), T([128, 160, 32, 32], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 192, 32, 32], f16), T([128, 192, 32, 32], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 128, 128], f16),), {}) +cnt: 2, ((T([128, 128, 64, 64], f16),), {}) +cnt: 4, ((T([128, 192, 32, 32], f16),), {}) +cnt: 1, ((T([128, 160, 32, 32], f16),), {}) +cnt: 11, ((T([128, 160, 16, 16], f16),), {}) +cnt: 6, ((T([128, 640, 16, 16], f16),), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16),), {}) +cnt: 17, ((T([128, 1920, 8, 8], f16),), {}) +cnt: 9, ((T([128, 640, 8, 8], f16),), {}) +cnt: 1, ((T([128, 2560, 8, 8], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 2560, 8, 8], f16), T([128, 2560, 8, 8], f16), 0), {}) +cnt: 9, ((T([128, 640, 8, 8], f16), T([128, 640, 8, 8], f16), 0), {}) +cnt: 17, ((T([128, 1920, 8, 8], f16), T([128, 1920, 8, 8], f16), 0), {}) +cnt: 1, ((T([128, 1920, 16, 16], f16), T([128, 1920, 16, 16], f16), 0), {}) +cnt: 6, ((T([128, 640, 16, 16], f16), T([128, 640, 16, 16], f16), 0), {}) +cnt: 11, ((T([128, 160, 16, 16], f16), T([128, 160, 16, 16], f16), 0), {}) +cnt: 1, ((T([128, 160, 32, 32], f16), T([128, 160, 32, 32], f16), 0), {}) +cnt: 4, ((T([128, 192, 32, 32], f16), T([128, 192, 32, 32], f16), 0), {}) +cnt: 2, ((T([128, 128, 64, 64], f16), T([128, 128, 64, 64], f16), 0), {}) +cnt: 1, ((T([128, 32, 128, 128], f16), T([128, 32, 128, 128], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ghostnet_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ghostnet_100_training.txt new file mode 100644 index 000000000..15066dcc1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/ghostnet_100_training.txt @@ -0,0 +1,411 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([], i64), 1), {}) +cnt: 5, ((T([128, 80, 7, 7], f16, stride=(7840, 49, 7, 1)), T([128, 80, 7, 7], f16)), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 4, ((T([128, 480, 7, 7], f16, stride=(47040, 49, 7, 1)), T([128, 480, 7, 7], f16)), {}) +cnt: 4, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 2, ((T([128, 336, 14, 14], f16, stride=(131712, 196, 14, 1)), T([128, 336, 14, 14], f16)), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 2, ((T([128, 56, 14, 14], f16, stride=(21952, 196, 14, 1)), T([128, 56, 14, 14], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([128, 240, 14, 14], f16)), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 4, ((T([128, 40, 14, 14], f16, stride=(15680, 196, 14, 1)), T([128, 40, 14, 14], f16)), {}) +cnt: 2, ((T([128, 92, 14, 14], f16, stride=(36064, 196, 14, 1)), T([128, 92, 14, 14], f16)), {}) +cnt: 1, ((T([128, 100, 14, 14], f16, stride=(39200, 196, 14, 1)), T([128, 100, 14, 14], f16)), {}) +cnt: 1, ((T([128, 120, 28, 28], f16, stride=(188160, 784, 28, 1)), T([128, 120, 28, 28], f16)), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 2, ((T([128, 20, 28, 28], f16, stride=(31360, 784, 28, 1)), T([128, 20, 28, 28], f16)), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +cnt: 1, ((T([128, 60, 28, 28], f16, stride=(94080, 784, 28, 1)), T([128, 60, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16)), {}) +cnt: 2, ((T([128, 36, 56, 56], f16, stride=(225792, 3136, 56, 1)), T([128, 36, 56, 56], f16)), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 2, ((T([128, 12, 56, 56], f16, stride=(75264, 3136, 56, 1)), T([128, 12, 56, 56], f16)), {}) +cnt: 1, ((T([128, 24, 112, 112], f16, stride=(602112, 12544, 112, 1)), T([128, 24, 112, 112], f16)), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 2, ((T([128, 8, 112, 112], f16, stride=(200704, 12544, 112, 1)), T([128, 8, 112, 112], f16)), {}) +Operator: aten.add_.Tensor +cnt: 79, ((T([], i64), 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 5, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.cat.default +cnt: 2, (([T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16)], 1), {}) +cnt: 1, (([T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16)], 1), {}) +cnt: 2, (([T([128, 12, 56, 56], f16), T([128, 12, 56, 56], f16)], 1), {}) +cnt: 2, (([T([128, 36, 56, 56], f16), T([128, 36, 56, 56], f16)], 1), {}) +cnt: 2, (([T([128, 20, 28, 28], f16), T([128, 20, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 60, 28, 28], f16), T([128, 60, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)], 1), {}) +cnt: 4, (([T([128, 40, 14, 14], f16), T([128, 40, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 100, 14, 14], f16), T([128, 100, 14, 14], f16)], 1), {}) +cnt: 2, (([T([128, 92, 14, 14], f16), T([128, 92, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)], 1), {}) +cnt: 2, (([T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16)], 1), {}) +cnt: 2, (([T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16)], 1), {}) +cnt: 5, (([T([128, 80, 7, 7], f16), T([128, 80, 7, 7], f16)], 1), {}) +cnt: 4, (([T([128, 480, 7, 7], f16), T([128, 480, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([8, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 8, 112, 112], f16), T([8, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([24, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([24, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 24), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([12, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 12, 56, 56], f16), T([12, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 12), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([24, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([36, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 36, 56, 56], f16), T([36, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 36), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([12, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([20, 72, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 20, 1, 1], f16), T([72, 20, 1, 1], f16), T([72], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([20, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 20, 28, 28], f16), T([20, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 20), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([24, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 24), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([40, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([60, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 60, 28, 28], f16), T([60, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([32, 120, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([120, 32, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([20, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 40, 14, 14], f16), T([40, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 40), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([40, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 40), {}) +cnt: 1, ((T([128, 40, 14, 14], f16), T([80, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([100, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 100, 14, 14], f16), T([100, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 100), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([40, 200, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([92, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 92, 14, 14], f16), T([92, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 92), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), T([40, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([240, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([240, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([56, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 56, 14, 14], f16), T([56, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 56), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([80, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 80), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([112, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([336, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([336, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 336), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), T([168], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([56, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([80, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 80, 7, 7], f16), T([80, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 80), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([112, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([128, 112, 7, 7], f16), T([160, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 7, 7], f16), T([480, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 480, 7, 7], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 4, ((T([128, 960, 7, 7], f16), T([80, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 960, 1, 1], f16), T([240, 960, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([960, 240, 1, 1], f16), T([960], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), T([1280], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), [1280], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 80, 7, 7], f16), T([128, 80, 7, 7], f16), T([80, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 80, [True, True, False]), {}) +cnt: 4, ((T([128, 80, 7, 7], f16), T([128, 960, 7, 7], f16), T([80, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 960, 1, 1], f16), T([128, 240, 1, 1], f16), T([960, 240, 1, 1], f16), [960], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 960, 1, 1], f16), T([240, 960, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 480, 7, 7], f16), T([128, 480, 7, 7], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 4, ((T([128, 480, 7, 7], f16), T([128, 160, 7, 7], f16), T([480, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([128, 112, 7, 7], f16), T([160, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 7, 7], f16), T([128, 112, 14, 14], f16), T([112, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 7, 7], f16), T([128, 672, 7, 7], f16), T([80, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), [168], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), T([336, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([128, 112, 14, 14], f16), T([336, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([56, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 56, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 14, 14], f16), T([128, 672, 14, 14], f16), T([56, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 80, 14, 14], f16), T([112, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 80, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 14, 14], f16), T([128, 480, 14, 14], f16), T([56, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 80, 14, 14], f16), T([240, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 40, 14, 14], f16), T([128, 40, 14, 14], f16), T([40, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 40, [True, True, False]), {}) +cnt: 2, ((T([128, 40, 14, 14], f16), T([128, 184, 14, 14], f16), T([40, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 92, 14, 14], f16), T([128, 92, 14, 14], f16), T([92, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 92, [True, True, False]), {}) +cnt: 2, ((T([128, 92, 14, 14], f16), T([128, 80, 14, 14], f16), T([92, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 14, 14], f16), T([128, 200, 14, 14], f16), T([40, 200, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 100, 14, 14], f16), T([128, 100, 14, 14], f16), T([100, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 100, [True, True, False]), {}) +cnt: 1, ((T([128, 100, 14, 14], f16), T([128, 80, 14, 14], f16), T([100, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 40, 14, 14], f16), T([80, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 14, 14], f16), T([128, 40, 28, 28], f16), T([40, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 40, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 14, 14], f16), T([128, 240, 14, 14], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 28, 28], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 20, 28, 28], f16), T([128, 20, 28, 28], f16), T([20, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 20, [True, True, False]), {}) +cnt: 1, ((T([128, 20, 28, 28], f16), T([128, 120, 28, 28], f16), T([20, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 32, 1, 1], f16), T([120, 32, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 120, 1, 1], f16), T([32, 120, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 60, 28, 28], f16), T([128, 60, 28, 28], f16), T([60, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([128, 60, 28, 28], f16), T([128, 40, 28, 28], f16), T([60, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 24, 28, 28], f16), T([40, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([128, 24, 56, 56], f16), T([24, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 24, [True, True, False]), {}) +cnt: 1, ((T([128, 20, 28, 28], f16), T([128, 72, 28, 28], f16), T([20, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 20, 1, 1], f16), T([72, 20, 1, 1], f16), [72], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 20, 1, 1], f16), T([128, 72, 1, 1], f16), T([20, 72, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 2, ((T([128, 36, 56, 56], f16), T([128, 36, 56, 56], f16), T([36, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 36, [True, True, False]), {}) +cnt: 2, ((T([128, 36, 56, 56], f16), T([128, 24, 56, 56], f16), T([36, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 12, 56, 56], f16), T([128, 12, 56, 56], f16), T([12, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 12, [True, True, False]), {}) +cnt: 1, ((T([128, 12, 56, 56], f16), T([128, 72, 56, 56], f16), T([12, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 16, 56, 56], f16), T([24, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 12, 56, 56], f16), T([128, 48, 56, 56], f16), T([12, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), T([24, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 24, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 16, 112, 112], f16), T([24, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16), T([8, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 2, ((T([128, 8, 112, 112], f16), T([128, 16, 112, 112], f16), T([8, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +cnt: 15, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16)), {}) +cnt: 6, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 12, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 6, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 6, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 3, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +Operator: aten.div.Scalar +cnt: 3, ((T([128, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 120, 28, 28], f16, stride=(120, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(72, 1, 0, 0)), 784), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([128, 72, 1, 1], f16),), {}) +cnt: 1, ((T([128, 120, 1, 1], f16),), {}) +cnt: 1, ((T([128, 480, 1, 1], f16),), {}) +cnt: 2, ((T([128, 672, 1, 1], f16),), {}) +cnt: 2, ((T([128, 960, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 2, ((T([128, 960, 1, 1], f16), T([128, 960, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16)), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 72, 1, 1], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 72, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 72, 28, 28], f16), T([128, 72, 1, 1], f16)), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 1, 1], f16)), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 7, 7], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 4, ((T([128, 960, 7, 7], f16), T([128, 960, 1, 1], f16)), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 8, 112, 112], f16), T([8], f16), T([8], f16), T([8], f16), T([8], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 12, 56, 56], f16), T([12], f16), T([12], f16), T([12], f16), T([12], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 36, 56, 56], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 20, 28, 28], f16), T([20], f16), T([20], f16), T([20], f16), T([20], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 60, 28, 28], f16), T([60], f16), T([60], f16), T([60], f16), T([60], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 40, 14, 14], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 100, 14, 14], f16), T([100], f16), T([100], f16), T([100], f16), T([100], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 92, 14, 14], f16), T([92], f16), T([92], f16), T([92], f16), T([92], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 56, 14, 14], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 80, 7, 7], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 112, 7, 7], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 480, 7, 7], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 80, 7, 7], f16, stride=(7840, 49, 7, 1)), T([128, 80, 7, 7], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 80, 7, 7], f16), T([128, 80, 7, 7], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 480, 7, 7], f16), T([128, 480, 7, 7], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 56, 14, 14], f16, stride=(21952, 196, 14, 1)), T([128, 56, 14, 14], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 40, 14, 14], f16, stride=(15680, 196, 14, 1)), T([128, 40, 14, 14], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 40, 14, 14], f16), T([128, 40, 14, 14], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 92, 14, 14], f16), T([128, 92, 14, 14], f16), T([92], f16), T([92], f16), T([92], f16), T([92], f32), T([92], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 100, 14, 14], f16), T([128, 100, 14, 14], f16), T([100], f16), T([100], f16), T([100], f16), T([100], f32), T([100], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 20, 28, 28], f16, stride=(31360, 784, 28, 1)), T([128, 20, 28, 28], f16), T([20], f16), T([20], f16), T([20], f16), T([20], f32), T([20], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 20, 28, 28], f16), T([128, 20, 28, 28], f16), T([20], f16), T([20], f16), T([20], f16), T([20], f32), T([20], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 60, 28, 28], f16), T([128, 60, 28, 28], f16), T([60], f16), T([60], f16), T([60], f16), T([60], f32), T([60], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([128, 24, 28, 28], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 36, 56, 56], f16), T([128, 36, 56, 56], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f32), T([36], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 12, 56, 56], f16, stride=(75264, 3136, 56, 1)), T([128, 12, 56, 56], f16), T([12], f16), T([12], f16), T([12], f16), T([12], f32), T([12], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 12, 56, 56], f16), T([128, 12, 56, 56], f16), T([12], f16), T([12], f16), T([12], f16), T([12], f32), T([12], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([128, 16, 56, 56], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 8, 112, 112], f16, stride=(200704, 12544, 112, 1)), T([128, 8, 112, 112], f16), T([8], f16), T([8], f16), T([8], f16), T([8], f32), T([8], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16), T([8], f16), T([8], f16), T([8], f16), T([8], f32), T([8], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 5, ((T([128, 160, 7, 7], f16), [128, 160, 7, 7], [7840, 49, 7, 1]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), [128, 112, 14, 14], [21952, 196, 14, 1]), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), [128, 80, 14, 14], [15680, 196, 14, 1]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), [128, 40, 28, 28], [31360, 784, 28, 1]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), [128, 24, 56, 56], [75264, 3136, 56, 1]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), [128, 16, 112, 112], [200704, 12544, 112, 1]), {}) +Operator: aten.new_zeros.default +cnt: 5, ((T([128, 160, 7, 7], f16), [1003520]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), [2809856]), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), [2007040]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), [4014080]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), [9633792]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), [25690112]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 2, ((T([128, 8, 112, 112], f16),), {}) +cnt: 2, ((T([128, 24, 112, 112], f16),), {}) +cnt: 4, ((T([128, 36, 56, 56], f16),), {}) +cnt: 1, ((T([128, 20, 1, 1], f16),), {}) +cnt: 2, ((T([128, 60, 28, 28], f16),), {}) +cnt: 1, ((T([128, 32, 1, 1], f16),), {}) +cnt: 2, ((T([128, 120, 28, 28], f16),), {}) +cnt: 2, ((T([128, 100, 14, 14], f16),), {}) +cnt: 4, ((T([128, 92, 14, 14], f16),), {}) +cnt: 2, ((T([128, 240, 14, 14], f16),), {}) +cnt: 1, ((T([128, 120, 1, 1], f16),), {}) +cnt: 4, ((T([128, 336, 14, 14], f16),), {}) +cnt: 2, ((T([128, 168, 1, 1], f16),), {}) +cnt: 8, ((T([128, 480, 7, 7], f16),), {}) +cnt: 2, ((T([128, 240, 1, 1], f16),), {}) +cnt: 1, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.slice_backward.default +cnt: 4, ((T([128, 960, 7, 7], f16), [128, 960, 7, 7], 3, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 960, 7, 7], f16), [128, 960, 7, 7], 2, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([128, 960, 7, 7], f16), [128, 960, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), [128, 672, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), [128, 672, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), [128, 672, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [128, 480, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [128, 480, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [128, 480, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), [128, 184, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), [128, 184, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), [128, 184, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), [128, 200, 14, 14], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), [128, 200, 14, 14], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), [128, 200, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [128, 240, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [128, 240, 28, 28], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [128, 240, 28, 28], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), [128, 120, 28, 28], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), [128, 120, 28, 28], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), [128, 120, 28, 28], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), [128, 72, 56, 56], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), [128, 72, 56, 56], 2, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), [128, 72, 56, 56], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), [128, 48, 112, 112], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), [128, 48, 112, 112], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), [128, 48, 112, 112], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), [128, 16, 112, 112], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), [128, 16, 112, 112], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), [128, 16, 112, 112], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 1280, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 240, 1, 1], f16), 0), {}) +cnt: 4, ((T([128, 480, 7, 7], f16, stride=(47040, 49, 7, 1)), T([128, 480, 7, 7], f16), 0), {}) +cnt: 4, ((T([128, 480, 7, 7], f16), T([128, 480, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 168, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 336, 14, 14], f16, stride=(131712, 196, 14, 1)), T([128, 336, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 336, 14, 14], f16), T([128, 336, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([128, 240, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 92, 14, 14], f16, stride=(36064, 196, 14, 1)), T([128, 92, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 92, 14, 14], f16), T([128, 92, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 100, 14, 14], f16, stride=(39200, 196, 14, 1)), T([128, 100, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 100, 14, 14], f16), T([128, 100, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 120, 28, 28], f16, stride=(188160, 784, 28, 1)), T([128, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 60, 28, 28], f16, stride=(94080, 784, 28, 1)), T([128, 60, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 60, 28, 28], f16), T([128, 60, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 20, 1, 1], f16), T([128, 20, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 36, 56, 56], f16, stride=(225792, 3136, 56, 1)), T([128, 36, 56, 56], f16), 0), {}) +cnt: 2, ((T([128, 36, 56, 56], f16), T([128, 36, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 24, 112, 112], f16, stride=(602112, 12544, 112, 1)), T([128, 24, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 8, 112, 112], f16, stride=(200704, 12544, 112, 1)), T([128, 8, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_inception_v3_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_inception_v3_training.txt new file mode 100644 index 000000000..c11cd6890 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_inception_v3_training.txt @@ -0,0 +1,239 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 3, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16)), {}) +cnt: 14, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16)), {}) +cnt: 5, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16)), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16)), {}) +cnt: 3, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16)), {}) +Operator: aten.add_.Tensor +cnt: 94, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 32, 35, 35], f16)], 1), {}) +cnt: 2, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16)], 1), {}) +cnt: 1, (([T([128, 384, 17, 17], f16), T([128, 96, 17, 17], f16), T([128, 288, 17, 17], f16)], 1), {}) +cnt: 4, (([T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16)], 1), {}) +cnt: 1, (([T([128, 320, 8, 8], f16), T([128, 192, 8, 8], f16), T([128, 768, 8, 8], f16)], 1), {}) +cnt: 4, (([T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)], 1), {}) +cnt: 2, (([T([128, 320, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 192, 8, 8], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 299, 299], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), None, [1, 1], [0, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), None, [1, 1], [1, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), [0], [1, 1], [1, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), [0], [1, 1], [0, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 384, 8, 8], f16), T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([128, 192, 17, 17], f16), T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 35, 35], f16), T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([128, 3, 299, 299], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 147, 147], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 768, 17, 17], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 768, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 768, 17, 17], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 768, 8, 8], i64)), {}) +cnt: 1, ((T([128, 288, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 288, 35, 35], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 288, 17, 17], i64)), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 71, 71], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 192, 35, 35], i64)), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([128, 64, 147, 147], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 64, 73, 73], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 0.001), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 192, 8, 8], f16), T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 0.001, [True, True, True]), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 149, 149], f16),), {}) +cnt: 1, ((T([128, 32, 147, 147], f16),), {}) +cnt: 1, ((T([128, 64, 147, 147], f16),), {}) +cnt: 1, ((T([128, 80, 73, 73], f16),), {}) +cnt: 1, ((T([128, 192, 71, 71], f16),), {}) +cnt: 12, ((T([128, 64, 35, 35], f16),), {}) +cnt: 3, ((T([128, 48, 35, 35], f16),), {}) +cnt: 7, ((T([128, 96, 35, 35], f16),), {}) +cnt: 1, ((T([128, 32, 35, 35], f16),), {}) +cnt: 1, ((T([128, 384, 17, 17], f16),), {}) +cnt: 1, ((T([128, 96, 17, 17], f16),), {}) +cnt: 26, ((T([128, 192, 17, 17], f16),), {}) +cnt: 6, ((T([128, 128, 17, 17], f16),), {}) +cnt: 12, ((T([128, 160, 17, 17], f16),), {}) +cnt: 3, ((T([128, 320, 8, 8], f16),), {}) +cnt: 3, ((T([128, 192, 8, 8], f16),), {}) +cnt: 12, ((T([128, 384, 8, 8], f16),), {}) +cnt: 2, ((T([128, 448, 8, 8], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 192, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 8, ((T([128, 384, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 384, 8, 8], f16), 0), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 320, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 1, ((T([128, 192, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 10, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 320, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 16, ((T([128, 192, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 192, 17, 17], f16), 0), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 96, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 96, 17, 17], f16), 0), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), 0), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 384, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 384, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 64, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 96, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 32, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 32, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 96, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 64, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), 0), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), 0), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_senet154_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_senet154_training.txt new file mode 100644 index 000000000..b766b8a41 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_senet154_training.txt @@ -0,0 +1,187 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 5, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 72, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 16, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 6, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 157, ((T([], i64), 1), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([256, 2, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([16, 256, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([256, 16, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 4, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 9, ((T([32, 512, 28, 28], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([512, 32, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([512, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 37, ((T([32, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([64, 1024, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([1024, 64, 1, 1], f16), T([1024], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 35, ((T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([1024, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([2048, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 2048, 1, 1], f16), T([128, 2048, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([2048, 128, 1, 1], f16), T([2048], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 1024, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([2048, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 2048, 1, 1], f16), T([32, 128, 1, 1], f16), T([2048, 128, 1, 1], f16), [2048], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([32, 2048, 1, 1], f16), T([128, 2048, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 1024, 7, 7], f16), T([2048, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 1024, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 37, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([32, 64, 1, 1], f16), T([1024, 64, 1, 1], f16), [1024], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([32, 1024, 1, 1], f16), T([64, 1024, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 35, ((T([32, 1024, 14, 14], f16), T([32, 512, 14, 14], f16), T([1024, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 9, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 32, 1, 1], f16), T([512, 32, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 7, ((T([32, 512, 28, 28], f16), T([32, 256, 28, 28], f16), T([512, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 4, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 16, 1, 1], f16), T([256, 16, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([32, 256, 1, 1], f16), T([16, 256, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 128, 56, 56], f16), T([256, 2, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 128, 56, 56], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 4, ((T([32, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16, stride=(1024, 1, 0, 0)), 196), {}) +cnt: 8, ((T([32, 512, 28, 28], f16, stride=(512, 1, 0, 0)), 784), {}) +cnt: 3, ((T([32, 256, 56, 56], f16, stride=(256, 1, 0, 0)), 3136), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 128, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 128, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 3, ((T([32, 256, 56, 56], f16), [2, 3], True), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 2048], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([32, 256, 56, 56], f16), T([32, 256, 1, 1], f16)), {}) +cnt: 16, ((T([32, 512, 28, 28], f16), T([32, 512, 1, 1], f16)), {}) +cnt: 72, ((T([32, 1024, 14, 14], f16), T([32, 1024, 1, 1], f16)), {}) +cnt: 6, ((T([32, 2048, 7, 7], f16), T([32, 2048, 1, 1], f16)), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 18, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 74, ((T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 7, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 74, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 18, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 64, 112, 112], f16),), {}) +cnt: 1, ((T([32, 128, 112, 112], f16),), {}) +cnt: 3, ((T([32, 128, 56, 56], f16),), {}) +cnt: 7, ((T([32, 256, 56, 56], f16),), {}) +cnt: 3, ((T([32, 16, 1, 1], f16),), {}) +cnt: 17, ((T([32, 512, 28, 28], f16),), {}) +cnt: 8, ((T([32, 32, 1, 1], f16),), {}) +cnt: 7, ((T([32, 256, 28, 28], f16),), {}) +cnt: 73, ((T([32, 1024, 14, 14], f16),), {}) +cnt: 36, ((T([32, 64, 1, 1], f16),), {}) +cnt: 35, ((T([32, 512, 14, 14], f16),), {}) +cnt: 6, ((T([32, 2048, 7, 7], f16),), {}) +cnt: 3, ((T([32, 128, 1, 1], f16),), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 3, ((T([32, 256, 1, 1], f16),), {}) +cnt: 8, ((T([32, 512, 1, 1], f16),), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16),), {}) +cnt: 3, ((T([32, 2048, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 3, ((T([32, 2048, 1, 1], f16), T([32, 2048, 1, 1], f16)), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([32, 1024, 1, 1], f16)), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16)), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [2, 3], True), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), [2, 3], True), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), 0), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), 0), {}) +cnt: 73, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), 0), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), 0), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), 0), {}) +cnt: 17, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), 0), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), 0), {}) +cnt: 7, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([32, 16, 1, 1], f16), 0), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), 0), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_xception65_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_xception65_training.txt new file mode 100644 index 000000000..53a6cc214 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gluon_xception65_training.txt @@ -0,0 +1,155 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([32, 128, 75, 75], f16), T([32, 128, 75, 75], f16)), {}) +cnt: 2, ((T([32, 256, 38, 38], f16), T([32, 256, 38, 38], f16)), {}) +cnt: 34, ((T([32, 728, 19, 19], f16), T([32, 728, 19, 19], f16)), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 1024, 10, 10], f16)), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([32, 64, 150, 150], f16)), {}) +Operator: aten.add_.Tensor +cnt: 132, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 299, 299], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 299, 299], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 150, 150], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([128, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([64, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([128, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([128, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([256, 128, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([128, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([256, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([256, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([728, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([256, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([728, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([728, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 728), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([728, 728, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([728, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 728), {}) +cnt: 50, ((T([32, 728, 19, 19], f16), T([728, 728, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 50, ((T([32, 728, 19, 19], f16), T([728, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 728), {}) +cnt: 1, ((T([32, 728, 19, 19], f16), T([1024, 728, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 728, 19, 19], f16), T([1024, 728, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16), T([1024, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1024), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([1024, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1024), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([1536, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 1536, 10, 10], f16), T([1536, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1536), {}) +cnt: 1, ((T([32, 1536, 10, 10], f16), T([1536, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1536, 10, 10], f16), T([2048, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 2048, 10, 10], f16), T([32, 1536, 10, 10], f16), T([2048, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 1536, 10, 10], f16), T([32, 1536, 10, 10], f16), T([1536, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1536, [True, True, False]), {}) +cnt: 1, ((T([32, 1536, 10, 10], f16), T([32, 1536, 10, 10], f16), T([1536, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1536, 10, 10], f16), T([32, 1024, 10, 10], f16), T([1536, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 1024, 10, 10], f16), T([1024, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1024, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 1024, 10, 10], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 1024, 19, 19], f16), T([1024, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1024, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16), T([32, 728, 19, 19], f16), T([1024, 728, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 50, ((T([32, 728, 19, 19], f16), T([32, 728, 19, 19], f16), T([728, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 728, [True, True, False]), {}) +cnt: 50, ((T([32, 728, 19, 19], f16), T([32, 728, 19, 19], f16), T([728, 728, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 728, 19, 19], f16), T([1024, 728, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 728, 19, 19], f16), T([32, 728, 38, 38], f16), T([728, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 728, [True, True, False]), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([32, 728, 38, 38], f16), T([728, 728, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([32, 728, 38, 38], f16), T([728, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 728, [True, True, False]), {}) +cnt: 1, ((T([32, 728, 38, 38], f16), T([32, 256, 38, 38], f16), T([728, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([32, 256, 38, 38], f16), T([256, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 1, ((T([32, 728, 19, 19], f16), T([32, 256, 38, 38], f16), T([728, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([32, 256, 38, 38], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([32, 256, 75, 75], f16), T([256, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([32, 256, 75, 75], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([32, 256, 75, 75], f16), T([256, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 75, 75], f16), T([32, 128, 75, 75], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([32, 128, 75, 75], f16), T([128, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([32, 128, 75, 75], f16), T([256, 128, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([32, 128, 75, 75], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([32, 128, 150, 150], f16), T([128, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([32, 128, 150, 150], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([32, 128, 150, 150], f16), T([128, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 150, 150], f16), T([32, 64, 150, 150], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([32, 64, 150, 150], f16), T([64, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([32, 64, 150, 150], f16), T([128, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([32, 32, 150, 150], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 150, 150], f16), T([32, 3, 299, 299], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 299, 299], f16), T([32, 3, 299, 299], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2048, 10, 10], f16, stride=(2048, 1, 0, 0)), 100), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 2048, 10, 10], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 32, 150, 150], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 64, 150, 150], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 128, 75, 75], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 128, 150, 150], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 256, 38, 38], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 256, 75, 75], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 102, ((T([32, 728, 19, 19], f16), T([728], f16), T([728], f16), T([728], f16), T([728], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 728, 38, 38], f16), T([728], f16), T([728], f16), T([728], f16), T([728], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 1024, 10, 10], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 1536, 10, 10], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 2048, 10, 10], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([32, 2048, 10, 10], f16), T([32, 2048, 10, 10], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 1536, 10, 10], f16), T([32, 1536, 10, 10], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f32), T([1536], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 1024, 10, 10], f16), T([32, 1024, 10, 10], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16), T([32, 1024, 19, 19], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 102, ((T([32, 728, 19, 19], f16), T([32, 728, 19, 19], f16), T([728], f16), T([728], f16), T([728], f16), T([728], f32), T([728], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 728, 38, 38], f16), T([32, 728, 38, 38], f16), T([728], f16), T([728], f16), T([728], f16), T([728], f32), T([728], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 256, 38, 38], f16), T([32, 256, 38, 38], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 75, 75], f16), T([32, 256, 75, 75], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 128, 75, 75], f16), T([32, 128, 75, 75], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 150, 150], f16), T([32, 128, 150, 150], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 150, 150], f16), T([32, 64, 150, 150], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 150, 150], f16), T([32, 32, 150, 150], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 1, ((T([32, 256, 38, 38], f16),), {}) +cnt: 17, ((T([32, 728, 19, 19], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 32, 150, 150], f16),), {}) +cnt: 1, ((T([32, 64, 150, 150], f16),), {}) +cnt: 2, ((T([32, 128, 150, 150], f16),), {}) +cnt: 1, ((T([32, 128, 75, 75], f16),), {}) +cnt: 2, ((T([32, 256, 75, 75], f16),), {}) +cnt: 2, ((T([32, 728, 38, 38], f16),), {}) +cnt: 33, ((T([32, 728, 19, 19], f16),), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16),), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16),), {}) +cnt: 2, ((T([32, 1536, 10, 10], f16),), {}) +cnt: 1, ((T([32, 2048, 10, 10], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([32, 2048, 10, 10], f16), T([32, 2048, 10, 10], f16), 0), {}) +cnt: 2, ((T([32, 1536, 10, 10], f16), T([32, 1536, 10, 10], f16), 0), {}) +cnt: 1, ((T([32, 1024, 10, 10], f16), T([32, 1024, 10, 10], f16), 0), {}) +cnt: 1, ((T([32, 1024, 19, 19], f16), T([32, 1024, 19, 19], f16), 0), {}) +cnt: 50, ((T([32, 728, 19, 19], f16), T([32, 728, 19, 19], f16), 0), {}) +cnt: 2, ((T([32, 728, 38, 38], f16), T([32, 728, 38, 38], f16), 0), {}) +cnt: 1, ((T([32, 256, 38, 38], f16), T([32, 256, 38, 38], f16), 0), {}) +cnt: 2, ((T([32, 256, 75, 75], f16), T([32, 256, 75, 75], f16), 0), {}) +cnt: 1, ((T([32, 128, 75, 75], f16), T([32, 128, 75, 75], f16), 0), {}) +cnt: 2, ((T([32, 128, 150, 150], f16), T([32, 128, 150, 150], f16), 0), {}) +cnt: 1, ((T([32, 64, 150, 150], f16), T([32, 64, 150, 150], f16), 0), {}) +cnt: 1, ((T([32, 32, 150, 150], f16), T([32, 32, 150, 150], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmixer_24_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmixer_24_224_training.txt new file mode 100644 index 000000000..3e4deb286 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmixer_24_224_training.txt @@ -0,0 +1,83 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 24, ((T([64, 384, 384], f16), [64, 384, 384]), {}) +cnt: 24, ((T([64, 384, 196], f16), [24576, 196]), {}) +Operator: aten.add.Tensor +cnt: 24, ((T([64, 384, 384], f16), T([384], f16)), {}) +cnt: 24, ((T([64, 196, 384], f16, stride=(75264, 1, 196)), T([64, 196, 384], f16, stride=(75264, 1, 196))), {}) +cnt: 24, ((T([64, 196, 384], f16, stride=(75264, 1, 196)), T([64, 196, 384], f16)), {}) +cnt: 24, ((T([64, 196, 384], f16), T([64, 196, 384], f16)), {}) +cnt: 24, ((T([64, 196, 384], f16), T([64, 196, 384], f16, stride=(75264, 1, 196))), {}) +Operator: aten.addmm.default +cnt: 24, ((T([196], f16), T([24576, 192], f16), T([192, 196], f16, stride=(1, 192))), {}) +cnt: 24, ((T([1536], f16), T([12544, 384], f16), T([384, 1536], f16, stride=(1, 384))), {}) +cnt: 24, ((T([384], f16), T([12544, 768], f16), T([768, 384], f16, stride=(1, 768))), {}) +cnt: 1, ((T([1000], f16), T([64, 384], f16), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.bmm.default +cnt: 24, ((T([64, 384, 196], f16, stride=(75264, 1, 384)), T([64, 196, 384], f16, stride=(0, 1, 196))), {}) +cnt: 24, ((T([64, 196, 384], f16), T([64, 384, 384], f16)), {}) +cnt: 24, ((T([64, 384, 384], f16), T([64, 384, 196], f16, stride=(0, 196, 1))), {}) +Operator: aten.cat.default +cnt: 24, (([T([64, 196, 768], f16), T([64, 196, 768], f16)], 2), {}) +cnt: 24, (([T([64, 384, 192], f16), T([64, 384, 192], f16)], 2), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([384, 3, 16, 16], f16), T([384], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 384, 14, 14], f16, stride=(75264, 1, 5376, 384)), T([64, 3, 224, 224], f16), T([384, 3, 16, 16], f16), [384], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +cnt: 24, ((T([384, 196], f16), T([384, 196], f16, stride=(1, 384))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 196, 384], f16, stride=(384, 0, 1)), 196), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 196, 384], f16), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 384], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 384], f16)), {}) +cnt: 24, ((T([12544, 384], f16), T([384, 768], f16)), {}) +cnt: 24, ((T([384, 12544], f16, stride=(1, 384)), T([12544, 768], f16)), {}) +cnt: 24, ((T([12544, 1536], f16), T([1536, 384], f16)), {}) +cnt: 24, ((T([1536, 12544], f16, stride=(1, 1536)), T([12544, 384], f16)), {}) +cnt: 24, ((T([24576, 196], f16), T([196, 192], f16)), {}) +cnt: 24, ((T([196, 24576], f16, stride=(1, 196)), T([24576, 192], f16)), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([64, 384, 192], f16, stride=(147456, 384, 1)), T([64, 384, 192], f16)), {}) +cnt: 24, ((T([64, 196, 768], f16, stride=(301056, 1536, 1)), T([64, 196, 768], f16)), {}) +cnt: 24, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(301056, 1536, 1))), {}) +cnt: 24, ((T([64, 196, 768], f16), T([64, 196, 768], f16)), {}) +cnt: 24, ((T([64, 384, 192], f16), T([64, 384, 192], f16, stride=(147456, 384, 1))), {}) +cnt: 24, ((T([64, 384, 192], f16), T([64, 384, 192], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 49, ((T([64, 196, 384], f16, stride=(75264, 1, 196)), [384], T([384], f16), T([384], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 196, 384], f16), T([64, 196, 384], f16, stride=(75264, 1, 196)), [384], T([64, 196, 1], f32), T([64, 196, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 24, ((T([64, 196, 384], f16, stride=(75264, 1, 196)), T([64, 196, 384], f16, stride=(75264, 1, 196)), [384], T([64, 196, 1], f32), T([64, 196, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 24, ((T([384, 196], f16, stride=(1, 384)), [384, 196], [196, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.silu.default +cnt: 24, ((T([64, 384, 192], f16, stride=(147456, 384, 1)),), {}) +cnt: 24, ((T([64, 196, 768], f16, stride=(301056, 1536, 1)),), {}) +Operator: aten.silu_backward.default +cnt: 24, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(301056, 1536, 1))), {}) +cnt: 24, ((T([64, 384, 192], f16), T([64, 384, 192], f16, stride=(147456, 384, 1))), {}) +Operator: aten.split.Tensor +cnt: 24, ((T([64, 384, 384], f16), 192, -1), {}) +cnt: 24, ((T([64, 196, 1536], f16), 768, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 24, ((T([12544, 384], f16), [0], True), {}) +cnt: 24, ((T([12544, 1536], f16), [0], True), {}) +cnt: 24, ((T([24576, 196], f16), [0], True), {}) +cnt: 24, ((T([64, 384, 384], f16), [0, 1], True), {}) +cnt: 24, ((T([64, 196, 384], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmlp_s16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmlp_s16_224_training.txt new file mode 100644 index 000000000..81057185f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/gmlp_s16_224_training.txt @@ -0,0 +1,70 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 30, ((T([64, 768, 196], f16), [64, 768, 196]), {}) +Operator: aten.add.Tensor +cnt: 30, ((T([64, 768, 196], f16), T([196], f16)), {}) +cnt: 30, ((T([64, 196, 256], f16, stride=(50176, 1, 196)), T([64, 196, 256], f16)), {}) +cnt: 30, ((T([64, 196, 256], f16), T([64, 196, 256], f16)), {}) +Operator: aten.addmm.default +cnt: 30, ((T([1536], f16), T([12544, 256], f16), T([256, 1536], f16, stride=(1, 256))), {}) +cnt: 30, ((T([256], f16), T([12544, 768], f16), T([768, 256], f16, stride=(1, 768))), {}) +cnt: 1, ((T([1000], f16), T([64, 256], f16), T([256, 1000], f16, stride=(1, 256))), {}) +Operator: aten.bmm.default +cnt: 30, ((T([64, 768, 196], f16, stride=(150528, 1, 768)), T([64, 196, 196], f16, stride=(0, 1, 196))), {}) +cnt: 30, ((T([64, 196, 768], f16), T([64, 768, 196], f16, stride=(150528, 1, 768))), {}) +cnt: 30, ((T([64, 768, 196], f16, stride=(150528, 1, 768)), T([64, 196, 196], f16, stride=(0, 196, 1))), {}) +Operator: aten.cat.default +cnt: 30, (([T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(150528, 1, 196))], 2), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([256, 3, 16, 16], f16), T([256], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 256, 14, 14], f16, stride=(50176, 1, 3584, 256)), T([64, 3, 224, 224], f16), T([256, 3, 16, 16], f16), [256], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +cnt: 30, ((T([196, 196], f16), T([196, 196], f16, stride=(1, 196))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 196, 256], f16, stride=(256, 0, 1)), 196), {}) +Operator: aten.gelu.default +cnt: 30, ((T([64, 196, 1536], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 30, ((T([64, 196, 1536], f16), T([64, 196, 1536], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 196, 256], f16), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 256], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 256], f16)), {}) +cnt: 30, ((T([12544, 256], f16), T([256, 768], f16)), {}) +cnt: 30, ((T([256, 12544], f16, stride=(1, 256)), T([12544, 768], f16)), {}) +cnt: 30, ((T([12544, 1536], f16), T([1536, 256], f16)), {}) +cnt: 30, ((T([1536, 12544], f16, stride=(1, 1536)), T([12544, 256], f16)), {}) +Operator: aten.mul.Tensor +cnt: 30, ((T([64, 196, 768], f16, stride=(301056, 1536, 1)), T([64, 196, 768], f16, stride=(150528, 1, 196))), {}) +cnt: 30, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(301056, 1536, 1))), {}) +cnt: 30, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(150528, 1, 196))), {}) +Operator: aten.native_layer_norm.default +cnt: 31, ((T([64, 196, 256], f16, stride=(50176, 1, 196)), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 30, ((T([64, 196, 768], f16, stride=(301056, 1536, 1)), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 31, ((T([64, 196, 256], f16), T([64, 196, 256], f16, stride=(50176, 1, 196)), [256], T([64, 196, 1], f32), T([64, 196, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 30, ((T([64, 196, 768], f16, stride=(150528, 1, 196)), T([64, 196, 768], f16, stride=(301056, 1536, 1)), [768], T([64, 196, 1], f32), T([64, 196, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 30, ((T([196, 196], f16, stride=(1, 196)), [196, 196], [196, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.split.Tensor +cnt: 30, ((T([64, 196, 1536], f16), 768, -1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 30, ((T([12544, 256], f16), [0], True), {}) +cnt: 30, ((T([64, 768, 196], f16, stride=(150528, 1, 768)), [0, 1], True), {}) +cnt: 30, ((T([64, 196, 196], f16), [0], True), {}) +cnt: 30, ((T([12544, 1536], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hardcorenas_a_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hardcorenas_a_training.txt new file mode 100644 index 000000000..18f12cb61 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hardcorenas_a_training.txt @@ -0,0 +1,260 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 34, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 2, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 4, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 672, 14, 14], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 1, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([24, 72, 1, 1], f16), T([24], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([72, 24, 1, 1], f16), T([72], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([64, 240, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 1, 1], f16), T([240, 64, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 2, ((T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), T([168], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([192, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([128, 1152, 1, 1], f16), T([288, 1152, 1, 1], f16), T([288], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 1, 1], f16), T([1152, 288, 1, 1], f16), T([1152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([960, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), T([1280], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), [1280], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 192, 7, 7], f16), T([960, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1152, 1, 1], f16), T([128, 288, 1, 1], f16), T([1152, 288, 1, 1], f16), [1152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 288, 1, 1], f16), T([128, 1152, 1, 1], f16), T([288, 1152, 1, 1], f16), [288], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([128, 672, 7, 7], f16), T([192, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), [168], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 480, 1, 1], f16), T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 64, 1, 1], f16), T([240, 64, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 1, 1], f16), T([128, 240, 1, 1], f16), T([64, 240, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 28, 28], f16), T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 240, 28, 28], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 24, 1, 1], f16), T([72, 24, 1, 1], f16), [72], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 72, 1, 1], f16), T([24, 72, 1, 1], f16), [24], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 112, 112], f16), T([48, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16, stride=(1152, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 2, ((T([128, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 240, 14, 14], f16, stride=(240, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 240, 28, 28], f16, stride=(240, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 72, 56, 56], f16, stride=(72, 1, 0, 0)), 3136), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([128, 72, 1, 1], f16),), {}) +cnt: 2, ((T([128, 240, 1, 1], f16),), {}) +cnt: 2, ((T([128, 480, 1, 1], f16),), {}) +cnt: 2, ((T([128, 672, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1152, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 1, ((T([128, 1152, 1, 1], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 480, 1, 1], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 72, 1, 1], f16)), {}) +Operator: aten.hardswish_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 4, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 672, 14, 14], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 1, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 1280, 1, 1], f16)), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 4, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 72, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 72, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 28, 28], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 14, 14], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 4, ((T([128, 480, 14, 14], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 7, 7], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 56, 56], f16),), {}) +cnt: 3, ((T([128, 72, 56, 56], f16),), {}) +cnt: 1, ((T([128, 24, 1, 1], f16),), {}) +cnt: 1, ((T([128, 72, 28, 28], f16),), {}) +cnt: 2, ((T([128, 240, 28, 28], f16),), {}) +cnt: 2, ((T([128, 64, 1, 1], f16),), {}) +cnt: 2, ((T([128, 120, 1, 1], f16),), {}) +cnt: 2, ((T([128, 168, 1, 1], f16),), {}) +cnt: 1, ((T([128, 288, 1, 1], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 288, 1, 1], f16), T([128, 288, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 168, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 64, 1, 1], f16), T([128, 64, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 24, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hrnet_w18_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hrnet_w18_training.txt new file mode 100644 index 000000000..cf63431ee --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/hrnet_w18_training.txt @@ -0,0 +1,247 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 69, ((T([128, 18, 56, 56], f16), T([128, 18, 56, 56], f16)), {}) +cnt: 70, ((T([128, 36, 28, 28], f16), T([128, 36, 28, 28], f16)), {}) +cnt: 64, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16)), {}) +cnt: 31, ((T([128, 144, 7, 7], f16), T([128, 144, 7, 7], f16)), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16)), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16)), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16)), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 325, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 32, ((T([128, 18, 56, 56], f16), T([128, 18, 56, 56], f16)), {}) +cnt: 32, ((T([128, 36, 28, 28], f16), T([128, 36, 28, 28], f16)), {}) +cnt: 28, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16)), {}) +cnt: 12, ((T([128, 144, 7, 7], f16), T([128, 144, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16)), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16)), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16)), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([18, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([36, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 64, ((T([128, 18, 56, 56], f16), T([18, 18, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 64, ((T([128, 36, 28, 28], f16), T([36, 36, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([128, 36, 28, 28], f16), T([18, 36, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([128, 18, 56, 56], f16), T([36, 18, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([128, 36, 28, 28], f16), T([72, 36, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 56, ((T([128, 72, 14, 14], f16), T([72, 72, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 72, 14, 14], f16), T([18, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 72, 14, 14], f16), T([36, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 10, ((T([128, 18, 56, 56], f16), T([18, 18, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 18, 28, 28], f16), T([72, 18, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 72, 14, 14], f16), T([144, 72, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 24, ((T([128, 144, 7, 7], f16), T([144, 144, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 144, 7, 7], f16), T([18, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 144, 7, 7], f16), T([36, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 144, 7, 7], f16), T([72, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 18, 28, 28], f16), T([18, 18, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 18, 14, 14], f16), T([144, 18, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 36, 28, 28], f16), T([36, 36, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 36, 14, 14], f16), T([144, 36, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 18, 56, 56], f16), T([32, 18, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([32, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 18, 56, 56], f16), T([128, 18, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 36, 28, 28], f16), T([64, 36, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 36, 28, 28], f16), T([256, 36, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([128, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([512, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([512, 256, 3, 3], f16), T([512], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 7, 7], f16), T([256, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 7, 7], f16), T([1024, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([1024, 512, 3, 3], f16), T([1024], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), T([2048], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 2048, 7, 7], f16), T([128, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), [2048], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 512, 14, 14], f16), T([1024, 512, 3, 3], f16), [1024], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 144, 7, 7], f16), T([1024, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 256, 7, 7], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 144, 7, 7], f16), T([256, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 256, 28, 28], f16), T([512, 256, 3, 3], f16), [512], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 72, 14, 14], f16), T([512, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 128, 14, 14], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 72, 14, 14], f16), T([128, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 128, 56, 56], f16), T([256, 128, 3, 3], f16), [256], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 36, 28, 28], f16), T([256, 36, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 64, 28, 28], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 36, 28, 28], f16), T([64, 36, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 18, 56, 56], f16), T([128, 18, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 18, 56, 56], f16), T([32, 18, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 144, 7, 7], f16), T([128, 72, 14, 14], f16), T([144, 72, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 144, 7, 7], f16), T([128, 36, 14, 14], f16), T([144, 36, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 36, 14, 14], f16), T([128, 36, 28, 28], f16), T([36, 36, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 144, 7, 7], f16), T([128, 18, 14, 14], f16), T([144, 18, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 18, 14, 14], f16), T([128, 18, 28, 28], f16), T([18, 18, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([128, 18, 28, 28], f16), T([128, 18, 56, 56], f16), T([18, 18, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 72, 7, 7], f16), T([128, 144, 7, 7], f16), T([72, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([128, 72, 14, 14], f16), T([128, 36, 28, 28], f16), T([72, 36, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 72, 14, 14], f16), T([128, 18, 28, 28], f16), T([72, 18, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 36, 7, 7], f16), T([128, 144, 7, 7], f16), T([36, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 36, 14, 14], f16), T([128, 72, 14, 14], f16), T([36, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([128, 36, 28, 28], f16), T([128, 18, 56, 56], f16), T([36, 18, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 18, 7, 7], f16), T([128, 144, 7, 7], f16), T([18, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 18, 14, 14], f16), T([128, 72, 14, 14], f16), T([18, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([128, 18, 28, 28], f16), T([128, 36, 28, 28], f16), T([18, 36, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 24, ((T([128, 144, 7, 7], f16), T([128, 144, 7, 7], f16), T([144, 144, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 56, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16), T([72, 72, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 64, ((T([128, 36, 28, 28], f16), T([128, 36, 28, 28], f16), T([36, 36, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 64, ((T([128, 18, 56, 56], f16), T([128, 18, 56, 56], f16), T([18, 18, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 36, 28, 28], f16), T([128, 256, 56, 56], f16), T([36, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 18, 56, 56], f16), T([128, 256, 56, 56], f16), T([18, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 256, 56, 56], f16), T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 56, 56], f16), T([128, 256, 56, 56], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 65, ((T([128, 18, 56, 56], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f16), True, 0.1, 1e-05), {}) +cnt: 73, ((T([128, 36, 28, 28], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f16), True, 0.1, 1e-05), {}) +cnt: 18, ((T([128, 18, 28, 28], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f16), True, 0.1, 1e-05), {}) +cnt: 71, ((T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 18, 14, 14], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 36, 14, 14], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f16), True, 0.1, 1e-05), {}) +cnt: 34, ((T([128, 144, 7, 7], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 18, 7, 7], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 36, 7, 7], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 72, 7, 7], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 34, ((T([128, 144, 7, 7], f16), T([128, 144, 7, 7], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([128, 36, 14, 14], f16), T([128, 36, 14, 14], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f32), T([36], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([128, 18, 14, 14], f16), T([128, 18, 14, 14], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f32), T([18], f32), True, 1e-05, [True, True, True]), {}) +cnt: 18, ((T([128, 18, 28, 28], f16), T([128, 18, 28, 28], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f32), T([18], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 72, 7, 7], f16), T([128, 72, 7, 7], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 71, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 36, 7, 7], f16), T([128, 36, 7, 7], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f32), T([36], f32), True, 1e-05, [True, True, True]), {}) +cnt: 73, ((T([128, 36, 28, 28], f16), T([128, 36, 28, 28], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f32), T([36], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 18, 7, 7], f16), T([128, 18, 7, 7], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f32), T([18], f32), True, 1e-05, [True, True, True]), {}) +cnt: 65, ((T([128, 18, 56, 56], f16), T([128, 18, 56, 56], f16), T([18], f16), T([18], f16), T([18], f16), T([18], f32), T([18], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 8, ((T([128, 18, 56, 56], f16),), {}) +cnt: 8, ((T([128, 36, 28, 28], f16),), {}) +cnt: 10, ((T([128, 18, 28, 28], f16),), {}) +cnt: 7, ((T([128, 72, 14, 14], f16),), {}) +cnt: 3, ((T([128, 18, 14, 14], f16),), {}) +cnt: 3, ((T([128, 36, 14, 14], f16),), {}) +cnt: 3, ((T([128, 144, 7, 7], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 9, ((T([128, 64, 56, 56], f16),), {}) +cnt: 4, ((T([128, 256, 56, 56], f16),), {}) +cnt: 65, ((T([128, 18, 56, 56], f16),), {}) +cnt: 65, ((T([128, 36, 28, 28], f16),), {}) +cnt: 57, ((T([128, 72, 14, 14], f16),), {}) +cnt: 25, ((T([128, 144, 7, 7], f16),), {}) +cnt: 2, ((T([128, 32, 56, 56], f16),), {}) +cnt: 1, ((T([128, 128, 56, 56], f16),), {}) +cnt: 2, ((T([128, 64, 28, 28], f16),), {}) +cnt: 2, ((T([128, 256, 28, 28], f16),), {}) +cnt: 2, ((T([128, 128, 14, 14], f16),), {}) +cnt: 2, ((T([128, 512, 14, 14], f16),), {}) +cnt: 2, ((T([128, 256, 7, 7], f16),), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16),), {}) +cnt: 1, ((T([128, 2048, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16), 0), {}) +cnt: 2, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), 0), {}) +cnt: 2, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), 0), {}) +cnt: 28, ((T([128, 144, 7, 7], f16), T([128, 144, 7, 7], f16), 0), {}) +cnt: 3, ((T([128, 36, 14, 14], f16), T([128, 36, 14, 14], f16), 0), {}) +cnt: 3, ((T([128, 18, 14, 14], f16), T([128, 18, 14, 14], f16), 0), {}) +cnt: 10, ((T([128, 18, 28, 28], f16), T([128, 18, 28, 28], f16), 0), {}) +cnt: 64, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16), 0), {}) +cnt: 73, ((T([128, 36, 28, 28], f16), T([128, 36, 28, 28], f16), 0), {}) +cnt: 73, ((T([128, 18, 56, 56], f16), T([128, 18, 56, 56], f16), 0), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), 0), {}) +cnt: 9, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) +Operator: aten.upsample_nearest2d.vec +cnt: 8, ((T([128, 18, 28, 28], f16), None, [2.0, 2.0]), {}) +cnt: 7, ((T([128, 18, 14, 14], f16), None, [4.0, 4.0]), {}) +cnt: 7, ((T([128, 36, 14, 14], f16), None, [2.0, 2.0]), {}) +cnt: 3, ((T([128, 18, 7, 7], f16), None, [8.0, 8.0]), {}) +cnt: 3, ((T([128, 36, 7, 7], f16), None, [4.0, 4.0]), {}) +cnt: 3, ((T([128, 72, 7, 7], f16), None, [2.0, 2.0]), {}) +Operator: aten.upsample_nearest2d_backward.vec +cnt: 3, ((T([128, 72, 14, 14], f16), None, [128, 72, 7, 7], [2.0, 2.0]), {}) +cnt: 3, ((T([128, 36, 28, 28], f16), None, [128, 36, 7, 7], [4.0, 4.0]), {}) +cnt: 7, ((T([128, 36, 28, 28], f16), None, [128, 36, 14, 14], [2.0, 2.0]), {}) +cnt: 3, ((T([128, 18, 56, 56], f16), None, [128, 18, 7, 7], [8.0, 8.0]), {}) +cnt: 7, ((T([128, 18, 56, 56], f16), None, [128, 18, 14, 14], [4.0, 4.0]), {}) +cnt: 8, ((T([128, 18, 56, 56], f16), None, [128, 18, 28, 28], [2.0, 2.0]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/inception_v3_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/inception_v3_training.txt new file mode 100644 index 000000000..c11cd6890 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/inception_v3_training.txt @@ -0,0 +1,239 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)), {}) +cnt: 3, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16)), {}) +cnt: 3, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16)), {}) +cnt: 14, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16)), {}) +cnt: 5, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16)), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16)), {}) +cnt: 3, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16)), {}) +Operator: aten.add_.Tensor +cnt: 94, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 2048, 8, 8], f16), T([128, 2048, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([128, 1280, 8, 8], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([128, 768, 17, 17], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([128, 288, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([128, 256, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 35, 35], f16), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 32, 35, 35], f16)], 1), {}) +cnt: 2, (([T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16)], 1), {}) +cnt: 1, (([T([128, 384, 17, 17], f16), T([128, 96, 17, 17], f16), T([128, 288, 17, 17], f16)], 1), {}) +cnt: 4, (([T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16)], 1), {}) +cnt: 1, (([T([128, 320, 8, 8], f16), T([128, 192, 8, 8], f16), T([128, 768, 8, 8], f16)], 1), {}) +cnt: 4, (([T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16)], 1), {}) +cnt: 2, (([T([128, 320, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 768, 8, 8], f16), T([128, 192, 8, 8], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 299, 299], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), None, [1, 1], [0, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), None, [1, 1], [3, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), None, [1, 1], [0, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), None, [1, 1], [1, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 2048, 8, 8], f16), T([192, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 3, 1], f16), [0], [1, 1], [1, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384, 384, 1, 3], f16), [0], [1, 1], [0, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 384, 8, 8], f16), T([128, 448, 8, 8], f16), T([384, 448, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 2048, 8, 8], f16), T([448, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 2048, 8, 8], f16), T([384, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 2048, 8, 8], f16), T([320, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 1280, 8, 8], f16), T([192, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 8, 8], f16), T([128, 1280, 8, 8], f16), T([448, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 8, 8], f16), T([128, 1280, 8, 8], f16), T([384, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 1280, 8, 8], f16), T([320, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 8, 8], f16), T([128, 192, 17, 17], f16), T([192, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192, 192, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([128, 192, 17, 17], f16), T([128, 768, 17, 17], f16), T([192, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 8, 8], f16), T([128, 192, 17, 17], f16), T([320, 192, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160, 160, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 160, 17, 17], f16), T([128, 768, 17, 17], f16), T([160, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 17, 17], f16), T([128, 160, 17, 17], f16), T([192, 160, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128, 128, 1, 7], f16), [0], [1, 1], [0, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 17, 17], f16), T([128, 768, 17, 17], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 17, 17], f16), T([128, 128, 17, 17], f16), T([192, 128, 7, 1], f16), [0], [1, 1], [3, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 64, 35, 35], f16), T([96, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 288, 35, 35], f16), T([64, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 288, 35, 35], f16), T([384, 288, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96, 96, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 48, 35, 35], f16), T([64, 48, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 288, 35, 35], f16), T([48, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 35, 35], f16), T([128, 256, 35, 35], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 256, 35, 35], f16), T([48, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 192, 35, 35], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 35, 35], f16), T([128, 192, 35, 35], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 35, 35], f16), T([128, 192, 35, 35], f16), T([48, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 80, 73, 73], f16), T([192, 80, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 64, 73, 73], f16), T([80, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 32, 147, 147], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 149, 149], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 3, 299, 299], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 299, 299], f16), T([128, 3, 299, 299], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 147, 147], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 288, 35, 35], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 768, 17, 17], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 768, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 768, 17, 17], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 768, 8, 8], i64)), {}) +cnt: 1, ((T([128, 288, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 288, 35, 35], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 288, 17, 17], i64)), {}) +cnt: 1, ((T([128, 192, 35, 35], f16), T([128, 192, 71, 71], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 192, 35, 35], i64)), {}) +cnt: 1, ((T([128, 64, 73, 73], f16), T([128, 64, 147, 147], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 64, 73, 73], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 0.001), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 192, 8, 8], f16), T([128, 192, 8, 8], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 320, 8, 8], f16), T([128, 320, 8, 8], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 0.001, [True, True, True]), {}) +cnt: 26, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 17, 17], f16), T([128, 96, 17, 17], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 7, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 12, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 17, 17], f16), T([128, 384, 17, 17], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 35, 35], f16), T([128, 32, 35, 35], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 149, 149], f16),), {}) +cnt: 1, ((T([128, 32, 147, 147], f16),), {}) +cnt: 1, ((T([128, 64, 147, 147], f16),), {}) +cnt: 1, ((T([128, 80, 73, 73], f16),), {}) +cnt: 1, ((T([128, 192, 71, 71], f16),), {}) +cnt: 12, ((T([128, 64, 35, 35], f16),), {}) +cnt: 3, ((T([128, 48, 35, 35], f16),), {}) +cnt: 7, ((T([128, 96, 35, 35], f16),), {}) +cnt: 1, ((T([128, 32, 35, 35], f16),), {}) +cnt: 1, ((T([128, 384, 17, 17], f16),), {}) +cnt: 1, ((T([128, 96, 17, 17], f16),), {}) +cnt: 26, ((T([128, 192, 17, 17], f16),), {}) +cnt: 6, ((T([128, 128, 17, 17], f16),), {}) +cnt: 12, ((T([128, 160, 17, 17], f16),), {}) +cnt: 3, ((T([128, 320, 8, 8], f16),), {}) +cnt: 3, ((T([128, 192, 8, 8], f16),), {}) +cnt: 12, ((T([128, 384, 8, 8], f16),), {}) +cnt: 2, ((T([128, 448, 8, 8], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 192, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 8, ((T([128, 384, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 384, 8, 8], f16), 0), {}) +cnt: 4, ((T([128, 384, 8, 8], f16), T([128, 384, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 448, 8, 8], f16), T([128, 448, 8, 8], f16), 0), {}) +cnt: 2, ((T([128, 320, 8, 8], f16, stride=(131072, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 1, ((T([128, 192, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 192, 8, 8], f16), 0), {}) +cnt: 10, ((T([128, 192, 17, 17], f16), T([128, 192, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 320, 8, 8], f16, stride=(81920, 64, 8, 1)), T([128, 320, 8, 8], f16), 0), {}) +cnt: 16, ((T([128, 192, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 192, 17, 17], f16), 0), {}) +cnt: 12, ((T([128, 160, 17, 17], f16), T([128, 160, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 128, 17, 17], f16), T([128, 128, 17, 17], f16), 0), {}) +cnt: 1, ((T([128, 96, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 96, 17, 17], f16), 0), {}) +cnt: 4, ((T([128, 96, 35, 35], f16), T([128, 96, 35, 35], f16), 0), {}) +cnt: 4, ((T([128, 64, 35, 35], f16), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 384, 17, 17], f16, stride=(221952, 289, 17, 1)), T([128, 384, 17, 17], f16), 0), {}) +cnt: 6, ((T([128, 64, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 96, 35, 35], f16, stride=(352800, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 3, ((T([128, 48, 35, 35], f16), T([128, 48, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 32, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 32, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 96, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 96, 35, 35], f16), 0), {}) +cnt: 2, ((T([128, 64, 35, 35], f16, stride=(313600, 1225, 35, 1)), T([128, 64, 35, 35], f16), 0), {}) +cnt: 1, ((T([128, 192, 71, 71], f16), T([128, 192, 71, 71], f16), 0), {}) +cnt: 1, ((T([128, 80, 73, 73], f16), T([128, 80, 73, 73], f16), 0), {}) +cnt: 1, ((T([128, 64, 147, 147], f16), T([128, 64, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 147, 147], f16), T([128, 32, 147, 147], f16), 0), {}) +cnt: 1, ((T([128, 32, 149, 149], f16), T([128, 32, 149, 149], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/jx_nest_base_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/jx_nest_base_training.txt new file mode 100644 index 000000000..ddb7593f5 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/jx_nest_base_training.txt @@ -0,0 +1,269 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([64, 4, 16, 196, 196], f16), -1, False), {}) +cnt: 2, ((T([64, 8, 4, 196, 196], f16), -1, False), {}) +cnt: 20, ((T([64, 16, 1, 196, 196], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 20, ((T([64, 16, 1, 196, 196], f16), T([64, 16, 1, 196, 196], f16), -1, f16), {}) +cnt: 2, ((T([64, 8, 4, 196, 196], f16), T([64, 8, 4, 196, 196], f16), -1, f16), {}) +cnt: 2, ((T([64, 4, 16, 196, 196], f16), T([64, 4, 16, 196, 196], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 2, ((T([64, 4, 4, 14, 14, 128], f16), [64, 16, 196, 128]), {}) +cnt: 2, ((T([200704, 384], f16), [64, 16, 196, 384]), {}) +cnt: 6, ((T([64, 4, 16, 196, 32], f16), [4096, 196, 32]), {}) +cnt: 2, ((T([64, 4, 16, 32, 196], f16), [4096, 32, 196]), {}) +cnt: 2, ((T([4096, 196, 196], f16), [64, 4, 16, 196, 196]), {}) +cnt: 2, ((T([4096, 196, 32], f16), [64, 4, 16, 196, 32]), {}) +cnt: 2, ((T([64, 16, 196, 32, 4], f16), [64, 16, 196, 128]), {}) +cnt: 4, ((T([200704, 128], f16), [64, 16, 196, 128]), {}) +cnt: 2, ((T([200704, 512], f16), [64, 16, 196, 512]), {}) +cnt: 2, ((T([64, 4, 14, 4, 14, 128], f16), [64, 56, 56, 128]), {}) +cnt: 2, ((T([64, 2, 2, 14, 14, 256], f16), [64, 4, 196, 256]), {}) +cnt: 2, ((T([50176, 768], f16), [64, 4, 196, 768]), {}) +cnt: 6, ((T([64, 8, 4, 196, 32], f16), [2048, 196, 32]), {}) +cnt: 2, ((T([64, 8, 4, 32, 196], f16), [2048, 32, 196]), {}) +cnt: 2, ((T([2048, 196, 196], f16), [64, 8, 4, 196, 196]), {}) +cnt: 2, ((T([2048, 196, 32], f16), [64, 8, 4, 196, 32]), {}) +cnt: 2, ((T([64, 4, 196, 32, 8], f16), [64, 4, 196, 256]), {}) +cnt: 4, ((T([50176, 256], f16), [64, 4, 196, 256]), {}) +cnt: 2, ((T([50176, 1024], f16), [64, 4, 196, 1024]), {}) +cnt: 2, ((T([64, 2, 14, 2, 14, 256], f16), [64, 28, 28, 256]), {}) +cnt: 20, ((T([12544, 1536], f16), [64, 1, 196, 1536]), {}) +cnt: 60, ((T([64, 16, 1, 196, 32], f16), [1024, 196, 32]), {}) +cnt: 20, ((T([64, 16, 1, 32, 196], f16), [1024, 32, 196]), {}) +cnt: 20, ((T([1024, 196, 196], f16), [64, 16, 1, 196, 196]), {}) +cnt: 20, ((T([1024, 196, 32], f16), [64, 16, 1, 196, 32]), {}) +cnt: 20, ((T([64, 1, 196, 32, 16], f16), [64, 1, 196, 512]), {}) +cnt: 40, ((T([12544, 512], f16), [64, 1, 196, 512]), {}) +cnt: 20, ((T([12544, 2048], f16), [64, 1, 196, 2048]), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), [12544, 512]), {}) +cnt: 20, ((T([64, 1, 196, 3, 16, 32], f16), [64, 1, 196, 1536]), {}) +cnt: 2, ((T([64, 4, 196, 3, 8, 32], f16), [64, 4, 196, 768]), {}) +cnt: 2, ((T([64, 16, 196, 3, 4, 32], f16), [64, 16, 196, 384]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 16, 196, 128], f16), T([1, 16, 196, 128], f16)), {}) +cnt: 2, ((T([64, 16, 196, 384], f16), T([384], f16)), {}) +cnt: 4, ((T([64, 16, 196, 128], f16), T([128], f16)), {}) +cnt: 8, ((T([64, 16, 196, 128], f16), T([64, 16, 196, 128], f16)), {}) +cnt: 2, ((T([64, 16, 196, 512], f16), T([512], f16)), {}) +cnt: 1, ((T([64, 4, 196, 256], f16), T([1, 4, 196, 256], f16)), {}) +cnt: 2, ((T([64, 4, 196, 768], f16), T([768], f16)), {}) +cnt: 4, ((T([64, 4, 196, 256], f16), T([256], f16)), {}) +cnt: 8, ((T([64, 4, 196, 256], f16), T([64, 4, 196, 256], f16)), {}) +cnt: 2, ((T([64, 4, 196, 1024], f16), T([1024], f16)), {}) +cnt: 1, ((T([64, 1, 196, 512], f16), T([1, 1, 196, 512], f16)), {}) +cnt: 20, ((T([64, 1, 196, 1536], f16), T([1536], f16)), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), T([512], f16)), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 196, 512], f16)), {}) +cnt: 20, ((T([64, 1, 196, 2048], f16), T([2048], f16)), {}) +cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), T([64, 1, 196, 512], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 512], f16), T([512, 1000], f16, stride=(1, 512))), {}) +Operator: aten.as_strided_.default +cnt: 1, ((T([64, 512, 1, 1], f16), [64, 512, 1, 1], [512, 1, 512, 512]), {}) +Operator: aten.bernoulli_.float +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9782608691602945), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9565217383205891), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9347826093435287), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9130434766411781), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8913043439388275), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8695652186870575), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8478260785341263), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8260869532823563), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8043478280305862), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.782608687877655), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.760869562625885), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.739130437374115), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.717391312122345), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.695652186870575), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6739130318164825), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6521739065647125), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6304347813129425), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6086956560611725), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.5869565308094025), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.5652174055576324), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.54347825050354), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.52173912525177), {}) +cnt: 2, ((T([64, 1, 1, 1], f16),), {}) +Operator: aten.bmm.default +cnt: 2, ((T([4096, 196, 32], f16), T([4096, 32, 196], f16)), {}) +cnt: 2, ((T([4096, 196, 196], f16), T([4096, 196, 32], f16)), {}) +cnt: 2, ((T([2048, 196, 32], f16), T([2048, 32, 196], f16)), {}) +cnt: 2, ((T([2048, 196, 196], f16), T([2048, 196, 32], f16)), {}) +cnt: 20, ((T([1024, 196, 32], f16), T([1024, 32, 196], f16)), {}) +cnt: 20, ((T([1024, 196, 196], f16), T([1024, 196, 32], f16)), {}) +cnt: 20, ((T([1024, 196, 196], f16, stride=(38416, 1, 196)), T([1024, 196, 32], f16)), {}) +cnt: 20, ((T([1024, 196, 32], f16), T([1024, 32, 196], f16, stride=(6272, 1, 32))), {}) +cnt: 20, ((T([1024, 32, 196], f16, stride=(6272, 1, 32)), T([1024, 196, 196], f16)), {}) +cnt: 20, ((T([1024, 196, 196], f16), T([1024, 196, 32], f16, stride=(6272, 1, 196))), {}) +cnt: 2, ((T([2048, 196, 196], f16, stride=(38416, 1, 196)), T([2048, 196, 32], f16)), {}) +cnt: 2, ((T([2048, 196, 32], f16), T([2048, 32, 196], f16, stride=(6272, 1, 32))), {}) +cnt: 2, ((T([2048, 32, 196], f16, stride=(6272, 1, 32)), T([2048, 196, 196], f16)), {}) +cnt: 2, ((T([2048, 196, 196], f16), T([2048, 196, 32], f16, stride=(6272, 1, 196))), {}) +cnt: 2, ((T([4096, 196, 196], f16, stride=(38416, 1, 196)), T([4096, 196, 32], f16)), {}) +cnt: 2, ((T([4096, 196, 32], f16), T([4096, 32, 196], f16, stride=(6272, 1, 32))), {}) +cnt: 2, ((T([4096, 32, 196], f16, stride=(6272, 1, 32)), T([4096, 196, 196], f16)), {}) +cnt: 2, ((T([4096, 196, 196], f16), T([4096, 196, 32], f16, stride=(6272, 1, 196))), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([64, 256, 56, 56], f16, stride=(802816, 1, 14336, 256)), [0, 1, 0, 1], -inf), {}) +cnt: 1, ((T([64, 512, 28, 28], f16, stride=(401408, 1, 14336, 512)), [0, 1, 0, 1], -inf), {}) +cnt: 1, ((T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [0, -1, 0, -1]), {}) +cnt: 1, ((T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), T([128], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 512, 28, 28], f16, stride=(401408, 1, 14336, 512)), T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 56, 56], f16, stride=(802816, 1, 14336, 256)), T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), [128], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +cnt: 1, ((T([64, 512], f16), T([64, 512], f16)), {}) +cnt: 1, ((T([512, 256, 3, 3], f16), T([512, 256, 3, 3], f16, stride=(2304, 1, 768, 256))), {}) +cnt: 1, ((T([256, 128, 3, 3], f16), T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 512, 14, 14], f16, stride=(512, 1, 0, 0)), 196), {}) +Operator: aten.div_.Tensor +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9782608691602945), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9565217383205891), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9347826093435287), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.9130434766411781), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8913043439388275), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8695652186870575), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8478260785341263), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8260869532823563), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.8043478280305862), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.782608687877655), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.760869562625885), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.739130437374115), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.717391312122345), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.695652186870575), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6739130318164825), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6521739065647125), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6304347813129425), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.6086956560611725), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.5869565308094025), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.5652174055576324), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.54347825050354), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.52173912525177), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.5), {}) +Operator: aten.gelu.default +cnt: 2, ((T([64, 16, 196, 512], f16),), {}) +cnt: 2, ((T([64, 4, 196, 1024], f16),), {}) +cnt: 20, ((T([64, 1, 196, 2048], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 20, ((T([64, 1, 196, 2048], f16), T([64, 1, 196, 2048], f16)), {}) +cnt: 2, ((T([64, 4, 196, 1024], f16), T([64, 4, 196, 1024], f16)), {}) +cnt: 2, ((T([64, 16, 196, 512], f16), T([64, 16, 196, 512], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [3, 3], [2, 2]), {}) +cnt: 1, ((T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 29, 29], f16, stride=(430592, 1, 14848, 512)), [3, 3], [2, 2], [0, 0], [1, 1], False, T([64, 512, 14, 14], i64, stride=(100352, 1, 7168, 512))), {}) +cnt: 1, ((T([64, 256, 28, 28], f16, stride=(200704, 1, 7168, 256)), T([64, 256, 57, 57], f16, stride=(831744, 1, 14592, 256)), [3, 3], [2, 2], [0, 0], [1, 1], False, T([64, 256, 28, 28], i64, stride=(200704, 1, 7168, 256))), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 512, 14, 14], f16, stride=(100352, 1, 7168, 512)), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 2, ((T([200704, 128], f16), T([128, 384], f16, stride=(1, 128))), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 512], f16, stride=(1, 128))), {}) +cnt: 2, ((T([200704, 512], f16), T([512, 128], f16, stride=(1, 512))), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 768], f16, stride=(1, 256))), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 2, ((T([50176, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 20, ((T([12544, 512], f16), T([512, 1536], f16, stride=(1, 512))), {}) +cnt: 20, ((T([12544, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 20, ((T([12544, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 20, ((T([12544, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 512], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 512], f16)), {}) +cnt: 20, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 2048], f16)), {}) +cnt: 20, ((T([12544, 512], f16), T([512, 2048], f16)), {}) +cnt: 20, ((T([2048, 12544], f16, stride=(1, 2048)), T([12544, 512], f16)), {}) +cnt: 20, ((T([12544, 2048], f16), T([2048, 512], f16)), {}) +cnt: 20, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 512], f16)), {}) +cnt: 20, ((T([12544, 512], f16), T([512, 512], f16)), {}) +cnt: 20, ((T([1536, 12544], f16, stride=(1, 1536)), T([12544, 512], f16)), {}) +cnt: 20, ((T([12544, 1536], f16), T([1536, 512], f16)), {}) +cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 1024], f16)), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 1024], f16)), {}) +cnt: 2, ((T([1024, 50176], f16, stride=(1, 1024)), T([50176, 256], f16)), {}) +cnt: 2, ((T([50176, 1024], f16), T([1024, 256], f16)), {}) +cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 256], f16)), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 256], f16)), {}) +cnt: 2, ((T([768, 50176], f16, stride=(1, 768)), T([50176, 256], f16)), {}) +cnt: 2, ((T([50176, 768], f16), T([768, 256], f16)), {}) +cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 512], f16)), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 512], f16)), {}) +cnt: 2, ((T([512, 200704], f16, stride=(1, 512)), T([200704, 128], f16)), {}) +cnt: 2, ((T([200704, 512], f16), T([512, 128], f16)), {}) +cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 128], f16)), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 128], f16)), {}) +cnt: 2, ((T([384, 200704], f16, stride=(1, 384)), T([200704, 128], f16)), {}) +cnt: 2, ((T([200704, 384], f16), T([384, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([64, 4, 16, 196, 196], f16), 0.1767766952966369), {}) +cnt: 4, ((T([64, 16, 196, 128], f16), T([64, 1, 1, 1], f16)), {}) +cnt: 4, ((T([64, 8, 4, 196, 196], f16), 0.1767766952966369), {}) +cnt: 8, ((T([64, 4, 196, 256], f16), T([64, 1, 1, 1], f16)), {}) +cnt: 40, ((T([64, 16, 1, 196, 196], f16), 0.1767766952966369), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 1, 1], f16)), {}) +cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), T([64, 1, 1, 1], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 4, ((T([64, 16, 196, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 1, ((T([64, 56, 56, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 4, ((T([64, 4, 196, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 1, ((T([64, 28, 28, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +cnt: 1, ((T([64, 14, 14, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([64, 14, 14, 512], f16, stride=(100352, 14, 1, 196)), T([64, 14, 14, 512], f16), [512], T([64, 14, 14, 1], f32), T([64, 14, 14, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 40, ((T([64, 1, 196, 512], f16), T([64, 1, 196, 512], f16), [512], T([64, 1, 196, 1], f32), T([64, 1, 196, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 28, 28, 512], f16), T([64, 28, 28, 512], f16), [512], T([64, 28, 28, 1], f32), T([64, 28, 28, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 4, ((T([64, 4, 196, 256], f16), T([64, 4, 196, 256], f16), [256], T([64, 4, 196, 1], f32), T([64, 4, 196, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 56, 56, 256], f16), T([64, 56, 56, 256], f16), [256], T([64, 56, 56, 1], f32), T([64, 56, 56, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 4, ((T([64, 16, 196, 128], f16), T([64, 16, 196, 128], f16), [128], T([64, 16, 196, 1], f32), T([64, 16, 196, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.new_empty.default +cnt: 2, ((T([64, 16, 196, 128], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 4, ((T([64, 4, 196, 256], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 40, ((T([64, 1, 196, 512], f16), [64, 1, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([512, 256, 3, 3], f16, stride=(2304, 1, 768, 256)), [512, 256, 3, 3], [2304, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128)), [256, 128, 3, 3], [1152, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.new_zeros.default +cnt: 1, ((T([64, 512], f16), [32768]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.stack.default +cnt: 20, (([T([64, 16, 1, 196, 32], f16), T([64, 16, 1, 196, 32], f16, stride=(100352, 6272, 6272, 1, 196)), T([64, 16, 1, 196, 32], f16)],), {}) +cnt: 2, (([T([64, 8, 4, 196, 32], f16), T([64, 8, 4, 196, 32], f16, stride=(200704, 25088, 6272, 1, 196)), T([64, 8, 4, 196, 32], f16)],), {}) +cnt: 2, (([T([64, 4, 16, 196, 32], f16), T([64, 4, 16, 196, 32], f16, stride=(401408, 100352, 6272, 1, 196)), T([64, 4, 16, 196, 32], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 40, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), [0, 1, 2], True), {}) +cnt: 20, ((T([64, 1, 196, 2048], f16), [0, 1, 2], True), {}) +cnt: 20, ((T([64, 1, 196, 1536], f16), [0, 1, 2], True), {}) +cnt: 1, ((T([64, 1, 196, 512], f16, stride=(100352, 196, 1, 196)), [0], True), {}) +cnt: 4, ((T([64, 4, 196, 256], f16), [0, 1, 2], True), {}) +cnt: 2, ((T([64, 4, 196, 1024], f16), [0, 1, 2], True), {}) +cnt: 2, ((T([64, 4, 196, 768], f16), [0, 1, 2], True), {}) +cnt: 1, ((T([64, 4, 196, 256], f16), [0], True), {}) +cnt: 4, ((T([64, 16, 196, 128], f16), [0, 1, 2], True), {}) +cnt: 2, ((T([64, 16, 196, 512], f16), [0, 1, 2], True), {}) +cnt: 2, ((T([64, 16, 196, 384], f16), [0, 1, 2], True), {}) +cnt: 1, ((T([64, 16, 196, 128], f16), [0], True), {}) +Operator: aten.unbind.int +cnt: 2, ((T([3, 64, 4, 16, 196, 32], f16, stride=(128, 1204224, 32, 75264, 384, 1)),), {}) +cnt: 2, ((T([3, 64, 8, 4, 196, 32], f16, stride=(256, 602112, 32, 150528, 768, 1)),), {}) +cnt: 20, ((T([3, 64, 16, 1, 196, 32], f16, stride=(512, 301056, 32, 301056, 1536, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/lcnet_050_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/lcnet_050_training.txt new file mode 100644 index 000000000..48f28c23f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/lcnet_050_training.txt @@ -0,0 +1,158 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 27, ((T([], i64), 1), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128, 128, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 2, ((T([128, 8, 112, 112], f16),), {}) +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 16, 56, 56], f16),), {}) +cnt: 3, ((T([128, 32, 56, 56], f16),), {}) +cnt: 1, ((T([128, 32, 28, 28], f16),), {}) +cnt: 3, ((T([128, 64, 28, 28], f16),), {}) +cnt: 1, ((T([128, 64, 14, 14], f16),), {}) +cnt: 11, ((T([128, 128, 14, 14], f16),), {}) +cnt: 1, ((T([128, 128, 7, 7], f16),), {}) +cnt: 3, ((T([128, 256, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([8, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 8, 112, 112], f16), T([8, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 8, 112, 112], f16), T([16, 8, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([32, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([32, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([32, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([64, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([64, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([64, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 128, 14, 14], f16), T([128, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 128), {}) +cnt: 5, ((T([128, 128, 14, 14], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([32, 128, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([256, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([64, 256, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([1280, 256, 1, 1], f16), T([1280], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 256, 1, 1], f16), T([1280, 256, 1, 1], f16), [1280], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 64, 1, 1], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 256, 1, 1], f16), T([64, 256, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 128, 7, 7], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 128, 1, 1], f16), T([32, 128, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128, 128, 14, 14], f16), T([128, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 5, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 64, 14, 14], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([128, 64, 28, 28], f16), T([64, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 32, 28, 28], f16), T([64, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 32, 56, 56], f16), T([32, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 16, 56, 56], f16), T([32, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 8, 112, 112], f16), T([16, 8, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16), T([8, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 8, 112, 112], f16), T([128, 3, 224, 224], f16), T([8, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 2, ((T([128, 256, 7, 7], f16, stride=(256, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 128, 7, 7], f16, stride=(128, 1, 0, 0)), 49), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([128, 128, 1, 1], f16),), {}) +cnt: 1, ((T([128, 256, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 128, 1, 1], f16)), {}) +Operator: aten.hardswish_.default +cnt: 2, ((T([128, 8, 112, 112], f16),), {}) +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 16, 56, 56], f16),), {}) +cnt: 3, ((T([128, 32, 56, 56], f16),), {}) +cnt: 1, ((T([128, 32, 28, 28], f16),), {}) +cnt: 3, ((T([128, 64, 28, 28], f16),), {}) +cnt: 1, ((T([128, 64, 14, 14], f16),), {}) +cnt: 11, ((T([128, 128, 14, 14], f16),), {}) +cnt: 1, ((T([128, 128, 7, 7], f16),), {}) +cnt: 3, ((T([128, 256, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 1280, 1, 1], f16)), {}) +cnt: 3, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128, 128, 7, 7], f16)), {}) +cnt: 11, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16)), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16)), {}) +cnt: 3, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16)), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16)), {}) +cnt: 3, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16)), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([128, 16, 56, 56], f16)), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 2, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 128, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 128, 7, 7], f16), T([128, 128, 1, 1], f16)), {}) +cnt: 2, ((T([128, 256, 7, 7], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128, 128, 7, 7], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 8, 112, 112], f16), T([8], f16), T([8], f16), T([8], f16), T([8], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 11, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), T([128, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 11, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 56, 56], f16), T([128, 16, 56, 56], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 8, 112, 112], f16), T([128, 8, 112, 112], f16), T([8], f16), T([8], f16), T([8], f16), T([8], f32), T([8], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 64, 1, 1], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 256, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 128, 7, 7], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 64, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/legacy_senet154_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/legacy_senet154_training.txt new file mode 100644 index 000000000..c4895fad4 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/legacy_senet154_training.txt @@ -0,0 +1,183 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 9, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 24, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 108, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 8, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 157, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([256, 2, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([16, 256, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([256, 16, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 4, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 9, ((T([32, 512, 28, 28], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([512, 32, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([512, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 37, ((T([32, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([64, 1024, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([1024, 64, 1, 1], f16), T([1024], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 35, ((T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([1024, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([2048, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 1024, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 2048, 1, 1], f16), T([128, 2048, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([2048, 128, 1, 1], f16), T([2048], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([2048, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 2048, 1, 1], f16), T([32, 128, 1, 1], f16), T([2048, 128, 1, 1], f16), [2048], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([32, 2048, 1, 1], f16), T([128, 2048, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 1024, 7, 7], f16), T([2048, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 1024, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 37, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([32, 64, 1, 1], f16), T([1024, 64, 1, 1], f16), [1024], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([32, 1024, 1, 1], f16), T([64, 1024, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 35, ((T([32, 1024, 14, 14], f16), T([32, 512, 14, 14], f16), T([1024, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 9, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 32, 1, 1], f16), T([512, 32, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 7, ((T([32, 512, 28, 28], f16), T([32, 256, 28, 28], f16), T([512, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 4, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 16, 1, 1], f16), T([256, 16, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([32, 256, 1, 1], f16), T([16, 256, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 128, 56, 56], f16), T([256, 2, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 128, 56, 56], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 4, ((T([32, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16, stride=(1024, 1, 0, 0)), 196), {}) +cnt: 8, ((T([32, 512, 28, 28], f16, stride=(512, 1, 0, 0)), 784), {}) +cnt: 3, ((T([32, 256, 56, 56], f16, stride=(256, 1, 0, 0)), 3136), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 128, 112, 112], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 112, 112], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 128, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 3, ((T([32, 256, 56, 56], f16), [2, 3], True), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 2048], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([32, 256, 56, 56], f16), T([32, 256, 1, 1], f16)), {}) +cnt: 16, ((T([32, 512, 28, 28], f16), T([32, 512, 1, 1], f16)), {}) +cnt: 72, ((T([32, 1024, 14, 14], f16), T([32, 1024, 1, 1], f16)), {}) +cnt: 6, ((T([32, 2048, 7, 7], f16), T([32, 2048, 1, 1], f16)), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 18, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 74, ((T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 7, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 74, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 18, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 64, 112, 112], f16),), {}) +cnt: 1, ((T([32, 128, 112, 112], f16),), {}) +cnt: 3, ((T([32, 128, 56, 56], f16),), {}) +cnt: 7, ((T([32, 256, 56, 56], f16),), {}) +cnt: 3, ((T([32, 16, 1, 1], f16),), {}) +cnt: 17, ((T([32, 512, 28, 28], f16),), {}) +cnt: 8, ((T([32, 32, 1, 1], f16),), {}) +cnt: 7, ((T([32, 256, 28, 28], f16),), {}) +cnt: 73, ((T([32, 1024, 14, 14], f16),), {}) +cnt: 36, ((T([32, 64, 1, 1], f16),), {}) +cnt: 35, ((T([32, 512, 14, 14], f16),), {}) +cnt: 6, ((T([32, 2048, 7, 7], f16),), {}) +cnt: 3, ((T([32, 128, 1, 1], f16),), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 3, ((T([32, 256, 1, 1], f16),), {}) +cnt: 8, ((T([32, 512, 1, 1], f16),), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16),), {}) +cnt: 3, ((T([32, 2048, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 3, ((T([32, 2048, 1, 1], f16), T([32, 2048, 1, 1], f16)), {}) +cnt: 36, ((T([32, 1024, 1, 1], f16), T([32, 1024, 1, 1], f16)), {}) +cnt: 8, ((T([32, 512, 1, 1], f16), T([32, 512, 1, 1], f16)), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), [2, 3], True), {}) +cnt: 36, ((T([32, 1024, 14, 14], f16), [2, 3], True), {}) +cnt: 8, ((T([32, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), 0), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), 0), {}) +cnt: 73, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), 0), {}) +cnt: 36, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), 0), {}) +cnt: 35, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), 0), {}) +cnt: 17, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 8, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), 0), {}) +cnt: 7, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), 0), {}) +cnt: 7, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 3, ((T([32, 16, 1, 1], f16), T([32, 16, 1, 1], f16), 0), {}) +cnt: 3, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 128, 112, 112], f16), T([32, 128, 112, 112], f16), 0), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/levit_128_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/levit_128_training.txt new file mode 100644 index 000000000..e24ac0ec6 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/levit_128_training.txt @@ -0,0 +1,295 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 4, ((T([128, 4, 196, 196], f16), -1, False), {}) +cnt: 1, ((T([128, 8, 49, 196], f16), -1, False), {}) +cnt: 4, ((T([128, 8, 49, 49], f16), -1, False), {}) +cnt: 1, ((T([128, 16, 16, 49], f16), -1, False), {}) +cnt: 4, ((T([128, 12, 16, 16], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 4, ((T([128, 12, 16, 16], f16), T([128, 12, 16, 16], f16), -1, f16), {}) +cnt: 1, ((T([128, 16, 16, 49], f16), T([128, 16, 16, 49], f16), -1, f16), {}) +cnt: 4, ((T([128, 8, 49, 49], f16), T([128, 8, 49, 49], f16), -1, f16), {}) +cnt: 1, ((T([128, 8, 49, 196], f16), T([128, 8, 49, 196], f16), -1, f16), {}) +cnt: 4, ((T([128, 4, 196, 196], f16), T([128, 4, 196, 196], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 8, ((T([128, 196, 256], f16), [128, 196, 256]), {}) +cnt: 4, ((T([128, 4, 196, 16], f16), [512, 196, 16]), {}) +cnt: 4, ((T([128, 4, 16, 196], f16), [512, 16, 196]), {}) +cnt: 4, ((T([512, 196, 196], f16), [128, 4, 196, 196]), {}) +cnt: 8, ((T([128, 4, 196, 32], f16), [512, 196, 32]), {}) +cnt: 4, ((T([512, 196, 32], f16), [128, 4, 196, 32]), {}) +cnt: 4, ((T([128, 196, 4, 32], f16), [128, 196, 128]), {}) +cnt: 8, ((T([25088, 128], f16), [128, 196, 128]), {}) +cnt: 1, ((T([128, 196, 640], f16), [128, 196, 640]), {}) +cnt: 1, ((T([128, 7, 7, 128], f16), [128, 49, 128]), {}) +cnt: 1, ((T([6272, 128], f16), [128, 49, 128]), {}) +cnt: 5, ((T([128, 8, 49, 16], f16), [1024, 49, 16]), {}) +cnt: 1, ((T([128, 8, 16, 196], f16), [1024, 16, 196]), {}) +cnt: 1, ((T([1024, 49, 196], f16), [128, 8, 49, 196]), {}) +cnt: 1, ((T([128, 8, 196, 64], f16), [1024, 196, 64]), {}) +cnt: 1, ((T([1024, 49, 64], f16), [128, 8, 49, 64]), {}) +cnt: 1, ((T([128, 49, 8, 64], f16), [128, 49, 512]), {}) +cnt: 10, ((T([6272, 256], f16), [128, 49, 256]), {}) +cnt: 9, ((T([6272, 512], f16), [128, 49, 512]), {}) +cnt: 4, ((T([128, 8, 16, 49], f16), [1024, 16, 49]), {}) +cnt: 4, ((T([1024, 49, 49], f16), [128, 8, 49, 49]), {}) +cnt: 8, ((T([128, 8, 49, 32], f16), [1024, 49, 32]), {}) +cnt: 4, ((T([1024, 49, 32], f16), [128, 8, 49, 32]), {}) +cnt: 4, ((T([128, 49, 8, 32], f16), [128, 49, 256]), {}) +cnt: 1, ((T([6272, 1280], f16), [128, 49, 1280]), {}) +cnt: 1, ((T([128, 4, 4, 256], f16), [128, 16, 256]), {}) +cnt: 1, ((T([2048, 256], f16), [128, 16, 256]), {}) +cnt: 1, ((T([128, 16, 16, 16], f16), [2048, 16, 16]), {}) +cnt: 1, ((T([128, 16, 16, 49], f16), [2048, 16, 49]), {}) +cnt: 1, ((T([2048, 16, 49], f16), [128, 16, 16, 49]), {}) +cnt: 1, ((T([128, 16, 49, 64], f16), [2048, 49, 64]), {}) +cnt: 1, ((T([2048, 16, 64], f16), [128, 16, 16, 64]), {}) +cnt: 1, ((T([128, 16, 16, 64], f16), [128, 16, 1024]), {}) +cnt: 10, ((T([2048, 384], f16), [128, 16, 384]), {}) +cnt: 9, ((T([2048, 768], f16), [128, 16, 768]), {}) +cnt: 8, ((T([128, 12, 16, 16], f16), [1536, 16, 16]), {}) +cnt: 4, ((T([1536, 16, 16], f16), [128, 12, 16, 16]), {}) +cnt: 8, ((T([128, 12, 16, 32], f16), [1536, 16, 32]), {}) +cnt: 4, ((T([1536, 16, 32], f16), [128, 12, 16, 32]), {}) +cnt: 4, ((T([128, 16, 12, 32], f16), [128, 16, 384]), {}) +cnt: 1, ((T([128, 16, 16, 64], f16), [2048, 16, 64]), {}) +cnt: 1, ((T([128, 16, 16, 16], f16), [128, 16, 256]), {}) +cnt: 1, ((T([128, 8, 49, 64], f16), [1024, 49, 64]), {}) +cnt: 1, ((T([128, 49, 8, 16], f16), [128, 49, 128]), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([128, 4, 196, 196], f16), T([4, 196, 196], f16)), {}) +cnt: 8, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 196, 128], f16)), {}) +cnt: 1, ((T([128, 8, 49, 196], f16), T([8, 49, 196], f16)), {}) +cnt: 19, ((T([128, 49, 256], f16), T([128, 49, 256], f16)), {}) +cnt: 4, ((T([128, 8, 49, 49], f16), T([8, 49, 49], f16)), {}) +cnt: 1, ((T([128, 16, 16, 49], f16), T([16, 16, 49], f16)), {}) +cnt: 18, ((T([128, 16, 384], f16), T([128, 16, 384], f16)), {}) +cnt: 4, ((T([128, 12, 16, 16], f16), T([12, 16, 16], f16)), {}) +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16)), {}) +cnt: 1, ((T([128, 384], f16), T([128, 384], f16)), {}) +cnt: 9, ((T([128, 196, 128], f16), T([128, 196, 128], f16)), {}) +Operator: aten.add_.Tensor +cnt: 64, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 2, ((T([1000], f16), T([128, 384], f16), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.bmm.default +cnt: 8, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 128, 256], f16, stride=(0, 1, 128))), {}) +cnt: 4, ((T([512, 196, 16], f16), T([512, 16, 196], f16)), {}) +cnt: 4, ((T([512, 196, 196], f16), T([512, 196, 32], f16)), {}) +cnt: 1, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 128, 640], f16, stride=(0, 1, 128))), {}) +cnt: 1, ((T([1024, 49, 16], f16), T([1024, 16, 196], f16)), {}) +cnt: 1, ((T([1024, 49, 196], f16), T([1024, 196, 64], f16)), {}) +cnt: 4, ((T([1024, 49, 16], f16), T([1024, 16, 49], f16)), {}) +cnt: 4, ((T([1024, 49, 49], f16), T([1024, 49, 32], f16)), {}) +cnt: 1, ((T([2048, 16, 16], f16), T([2048, 16, 49], f16)), {}) +cnt: 1, ((T([2048, 16, 49], f16), T([2048, 49, 64], f16)), {}) +cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 16], f16)), {}) +cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 32], f16)), {}) +cnt: 4, ((T([1536, 16, 16], f16, stride=(256, 1, 16)), T([1536, 16, 32], f16)), {}) +cnt: 4, ((T([1536, 16, 32], f16), T([1536, 32, 16], f16, stride=(512, 1, 32))), {}) +cnt: 4, ((T([1536, 16, 16], f16, stride=(256, 1, 16)), T([1536, 16, 16], f16)), {}) +cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 16], f16, stride=(256, 1, 16))), {}) +cnt: 1, ((T([2048, 49, 16], f16, stride=(784, 1, 49)), T([2048, 16, 64], f16)), {}) +cnt: 1, ((T([2048, 16, 64], f16), T([2048, 64, 49], f16, stride=(3136, 1, 64))), {}) +cnt: 1, ((T([2048, 16, 16], f16, stride=(256, 1, 16)), T([2048, 16, 49], f16)), {}) +cnt: 1, ((T([2048, 16, 49], f16), T([2048, 49, 16], f16, stride=(784, 1, 49))), {}) +cnt: 4, ((T([1024, 49, 49], f16, stride=(2401, 1, 49)), T([1024, 49, 32], f16)), {}) +cnt: 4, ((T([1024, 49, 32], f16), T([1024, 32, 49], f16, stride=(1568, 1, 32))), {}) +cnt: 4, ((T([1024, 16, 49], f16, stride=(784, 1, 16)), T([1024, 49, 49], f16)), {}) +cnt: 4, ((T([1024, 49, 49], f16), T([1024, 49, 16], f16, stride=(784, 1, 49))), {}) +cnt: 1, ((T([1024, 196, 49], f16, stride=(9604, 1, 196)), T([1024, 49, 64], f16)), {}) +cnt: 1, ((T([1024, 49, 64], f16), T([1024, 64, 196], f16, stride=(12544, 1, 64))), {}) +cnt: 1, ((T([1024, 16, 49], f16, stride=(784, 1, 16)), T([1024, 49, 196], f16)), {}) +cnt: 1, ((T([1024, 49, 196], f16), T([1024, 196, 16], f16, stride=(3136, 1, 196))), {}) +cnt: 1, ((T([128, 128, 196], f16), T([128, 196, 640], f16)), {}) +cnt: 1, ((T([128, 196, 640], f16), T([128, 640, 128], f16, stride=(0, 128, 1))), {}) +cnt: 8, ((T([128, 128, 196], f16), T([128, 196, 256], f16)), {}) +cnt: 8, ((T([128, 196, 256], f16), T([128, 256, 128], f16, stride=(0, 128, 1))), {}) +cnt: 4, ((T([512, 196, 196], f16, stride=(38416, 1, 196)), T([512, 196, 32], f16)), {}) +cnt: 4, ((T([512, 196, 32], f16), T([512, 32, 196], f16, stride=(6272, 1, 32))), {}) +cnt: 4, ((T([512, 16, 196], f16, stride=(3136, 1, 16)), T([512, 196, 196], f16)), {}) +cnt: 4, ((T([512, 196, 196], f16), T([512, 196, 16], f16, stride=(3136, 1, 196))), {}) +Operator: aten.cat.default +cnt: 4, (([T([128, 16, 12, 16], f16, stride=(3072, 16, 256, 1)), T([128, 16, 12, 16], f16, stride=(3072, 1, 256, 16)), T([128, 16, 12, 32], f16, stride=(6144, 32, 512, 1))], 3), {}) +cnt: 1, (([T([128, 49, 16, 16], f16, stride=(12544, 1, 784, 49)), T([128, 49, 16, 64], f16, stride=(50176, 64, 3136, 1))], 3), {}) +cnt: 4, (([T([128, 49, 8, 16], f16, stride=(6272, 16, 784, 1)), T([128, 49, 8, 16], f16, stride=(6272, 1, 784, 49)), T([128, 49, 8, 32], f16, stride=(12544, 32, 1568, 1))], 3), {}) +cnt: 1, (([T([128, 196, 8, 16], f16, stride=(25088, 1, 3136, 196)), T([128, 196, 8, 64], f16, stride=(100352, 64, 12544, 1))], 3), {}) +cnt: 4, (([T([128, 196, 4, 16], f16, stride=(12544, 16, 3136, 1)), T([128, 196, 4, 16], f16, stride=(12544, 1, 3136, 196)), T([128, 196, 4, 32], f16, stride=(25088, 32, 6272, 1))], 3), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 128, 14, 14], f16, stride=(25088, 1, 1792, 128)), T([128, 64, 28, 28], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 32, 56, 56], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +cnt: 1, ((T([640, 128], f16), T([640, 128], f16, stride=(1, 640))), {}) +cnt: 8, ((T([256, 128], f16), T([256, 128], f16, stride=(1, 256))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 16, 384], f16, stride=(384, 0, 1)), 16), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([128, 1000], f16), 2), {}) +Operator: aten.hardswish.default +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 32, 56, 56], f16),), {}) +cnt: 1, ((T([128, 64, 28, 28], f16),), {}) +cnt: 4, ((T([128, 196, 128], f16),), {}) +cnt: 4, ((T([128, 196, 256], f16),), {}) +cnt: 6, ((T([128, 49, 512], f16),), {}) +cnt: 4, ((T([128, 49, 256], f16),), {}) +cnt: 1, ((T([128, 16, 1024], f16),), {}) +cnt: 5, ((T([128, 16, 768], f16),), {}) +cnt: 4, ((T([128, 16, 384], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 5, ((T([128, 16, 768], f16), T([128, 16, 768], f16)), {}) +cnt: 4, ((T([128, 16, 384], f16), T([128, 16, 384], f16)), {}) +cnt: 1, ((T([128, 16, 1024], f16), T([128, 16, 1024], f16)), {}) +cnt: 6, ((T([128, 49, 512], f16), T([128, 49, 512], f16)), {}) +cnt: 4, ((T([128, 49, 256], f16), T([128, 49, 256], f16)), {}) +cnt: 4, ((T([128, 196, 256], f16), T([128, 196, 256], f16)), {}) +cnt: 4, ((T([128, 196, 128], f16), T([128, 196, 128], f16)), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16)), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16)), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +Operator: aten.index.Tensor +cnt: 4, ((T([4, 196], f16), [None, T([196, 196], i64)]), {}) +cnt: 1, ((T([8, 196], f16), [None, T([49, 196], i64)]), {}) +cnt: 4, ((T([8, 49], f16), [None, T([49, 49], i64)]), {}) +cnt: 1, ((T([16, 49], f16), [None, T([16, 49], i64)]), {}) +cnt: 4, ((T([12, 16], f16), [None, T([16, 16], i64)]), {}) +Operator: aten.index_put.default +cnt: 4, ((T([12, 16], f16), [None, T([16, 16], i64)], T([12, 16, 16], f16), True), {}) +cnt: 1, ((T([16, 49], f16), [None, T([16, 49], i64)], T([16, 16, 49], f16), True), {}) +cnt: 4, ((T([8, 49], f16), [None, T([49, 49], i64)], T([8, 49, 49], f16), True), {}) +cnt: 1, ((T([8, 196], f16), [None, T([49, 196], i64)], T([8, 49, 196], f16), True), {}) +cnt: 4, ((T([4, 196], f16), [None, T([196, 196], i64)], T([4, 196, 196], f16), True), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 16, 384], f16), [1]), {}) +Operator: aten.mm.default +cnt: 4, ((T([25088, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 4, ((T([25088, 256], f16), T([256, 128], f16, stride=(1, 256))), {}) +cnt: 1, ((T([6272, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 6, ((T([6272, 512], f16), T([512, 256], f16, stride=(1, 512))), {}) +cnt: 9, ((T([6272, 256], f16), T([256, 512], f16, stride=(1, 256))), {}) +cnt: 4, ((T([6272, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 1, ((T([6272, 256], f16), T([256, 1280], f16, stride=(1, 256))), {}) +cnt: 1, ((T([2048, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 1, ((T([2048, 1024], f16), T([1024, 384], f16, stride=(1, 1024))), {}) +cnt: 9, ((T([2048, 384], f16), T([384, 768], f16, stride=(1, 384))), {}) +cnt: 5, ((T([2048, 768], f16), T([768, 384], f16, stride=(1, 768))), {}) +cnt: 4, ((T([2048, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 2, ((T([128, 1000], f16), T([1000, 384], f16)), {}) +cnt: 2, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 384], f16)), {}) +cnt: 5, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 768], f16)), {}) +cnt: 5, ((T([2048, 384], f16), T([384, 768], f16)), {}) +cnt: 9, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 384], f16)), {}) +cnt: 9, ((T([2048, 768], f16), T([768, 384], f16)), {}) +cnt: 4, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 384], f16)), {}) +cnt: 4, ((T([2048, 384], f16), T([384, 384], f16)), {}) +cnt: 1, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 1024], f16)), {}) +cnt: 1, ((T([2048, 384], f16), T([384, 1024], f16)), {}) +cnt: 1, ((T([256, 2048], f16, stride=(1, 256)), T([2048, 256], f16)), {}) +cnt: 1, ((T([2048, 256], f16), T([256, 256], f16)), {}) +cnt: 1, ((T([1280, 6272], f16, stride=(1, 1280)), T([6272, 256], f16)), {}) +cnt: 1, ((T([6272, 1280], f16), T([1280, 256], f16)), {}) +cnt: 6, ((T([256, 6272], f16, stride=(1, 256)), T([6272, 512], f16)), {}) +cnt: 6, ((T([6272, 256], f16), T([256, 512], f16)), {}) +cnt: 9, ((T([512, 6272], f16, stride=(1, 512)), T([6272, 256], f16)), {}) +cnt: 9, ((T([6272, 512], f16), T([512, 256], f16)), {}) +cnt: 4, ((T([256, 6272], f16, stride=(1, 256)), T([6272, 256], f16)), {}) +cnt: 4, ((T([6272, 256], f16), T([256, 256], f16)), {}) +cnt: 1, ((T([128, 6272], f16, stride=(1, 128)), T([6272, 128], f16)), {}) +cnt: 1, ((T([6272, 128], f16), T([128, 128], f16)), {}) +cnt: 4, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 256], f16)), {}) +cnt: 4, ((T([25088, 128], f16), T([128, 256], f16)), {}) +cnt: 4, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 128], f16)), {}) +cnt: 4, ((T([25088, 128], f16), T([128, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 8, ((T([128, 4, 196, 196], f16), 0.25), {}) +cnt: 2, ((T([128, 8, 49, 196], f16), 0.25), {}) +cnt: 8, ((T([128, 8, 49, 49], f16), 0.25), {}) +cnt: 2, ((T([128, 16, 16, 49], f16), 0.25), {}) +cnt: 8, ((T([128, 12, 16, 16], f16), 0.25), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([25088, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([25088, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([25088, 640], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([6272, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([6272, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([6272, 512], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([6272, 1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([2048, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([2048, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([2048, 768], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([128, 384], f16), T([128, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([2048, 384], f16), T([2048, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([2048, 768], f16), T([2048, 768], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([2048, 256], f16), T([2048, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([6272, 1280], f16), T([6272, 1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([6272, 256], f16), T([6272, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([6272, 512], f16), T([6272, 512], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([6272, 128], f16), T([6272, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([25088, 640], f16), T([25088, 640], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([25088, 128], f16), T([25088, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([25088, 256], f16), T([25088, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16, stride=(25088, 1, 1792, 128)), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([640, 128], f16, stride=(1, 640)), [640, 128], [128, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 8, ((T([256, 128], f16, stride=(1, 256)), [256, 128], [128, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.new_zeros.default +cnt: 4, ((T([12, 16, 16], f16), [12, 16]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([16, 16, 49], f16), [16, 49]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 4, ((T([8, 49, 49], f16), [8, 49]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([8, 49, 196], f16), [8, 196]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 4, ((T([4, 196, 196], f16), [4, 196]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.slice_backward.default +cnt: 4, ((T([12, 16], f16), [12, 16], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([16, 49], f16), [16, 49], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 4, 4, 256], f16), [128, 4, 7, 256], 2, 0, 9223372036854775807, 2), {}) +cnt: 1, ((T([128, 4, 7, 256], f16), [128, 7, 7, 256], 1, 0, 9223372036854775807, 2), {}) +cnt: 1, ((T([128, 7, 7, 256], f16), [128, 7, 7, 256], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([8, 49], f16), [8, 49], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([8, 196], f16), [8, 196], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 7, 7, 128], f16), [128, 7, 14, 128], 2, 0, 9223372036854775807, 2), {}) +cnt: 1, ((T([128, 7, 14, 128], f16), [128, 14, 14, 128], 1, 0, 9223372036854775807, 2), {}) +cnt: 1, ((T([128, 14, 14, 128], f16), [128, 14, 14, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([4, 196], f16), [4, 196], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 4, ((T([128, 196, 4, 64], f16), [16, 16, 32], 3), {}) +cnt: 1, ((T([128, 196, 8, 80], f16), [16, 64], 3), {}) +cnt: 4, ((T([128, 49, 8, 64], f16), [16, 16, 32], 3), {}) +cnt: 1, ((T([128, 49, 16, 80], f16), [16, 64], 3), {}) +cnt: 4, ((T([128, 16, 12, 64], f16), [16, 16, 32], 3), {}) +Operator: aten.sum.SymInt +cnt: 2, ((T([128, 1000], f16), [0], True), {}) +cnt: 4, ((T([128, 12, 16, 16], f16), [0], True), {}) +cnt: 1, ((T([128, 16, 16, 49], f16), [0], True), {}) +cnt: 4, ((T([128, 8, 49, 49], f16), [0], True), {}) +cnt: 1, ((T([128, 8, 49, 196], f16), [0], True), {}) +cnt: 1, ((T([128, 128, 640], f16), [0], True), {}) +cnt: 8, ((T([128, 128, 256], f16), [0], True), {}) +cnt: 4, ((T([128, 4, 196, 196], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixer_b16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixer_b16_224_training.txt new file mode 100644 index 000000000..483b2dad3 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixer_b16_224_training.txt @@ -0,0 +1,70 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([64, 768, 384], f16), [64, 768, 384]), {}) +cnt: 12, ((T([64, 768, 196], f16), [49152, 196]), {}) +Operator: aten.add.Tensor +cnt: 12, ((T([64, 768, 384], f16), T([384], f16)), {}) +cnt: 12, ((T([64, 196, 768], f16, stride=(150528, 1, 196)), T([64, 196, 768], f16, stride=(150528, 1, 196))), {}) +cnt: 12, ((T([64, 196, 768], f16, stride=(150528, 1, 196)), T([64, 196, 768], f16)), {}) +cnt: 12, ((T([64, 196, 768], f16), T([64, 196, 768], f16)), {}) +cnt: 12, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(150528, 1, 196))), {}) +Operator: aten.addmm.default +cnt: 12, ((T([196], f16), T([49152, 384], f16), T([384, 196], f16, stride=(1, 384))), {}) +cnt: 12, ((T([3072], f16), T([12544, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12544, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([1000], f16), T([64, 768], f16), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([64, 768, 196], f16, stride=(150528, 1, 768)), T([64, 196, 384], f16, stride=(0, 1, 196))), {}) +cnt: 12, ((T([64, 196, 768], f16), T([64, 768, 384], f16)), {}) +cnt: 12, ((T([64, 768, 384], f16), T([64, 384, 196], f16, stride=(0, 196, 1))), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), T([768], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 768, 14, 14], f16, stride=(150528, 1, 10752, 768)), T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), [768], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +cnt: 12, ((T([384, 196], f16), T([384, 196], f16, stride=(1, 384))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 196, 768], f16, stride=(768, 0, 1)), 196), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 768, 384], f16),), {}) +cnt: 12, ((T([64, 196, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 196, 3072], f16), T([64, 196, 3072], f16)), {}) +cnt: 12, ((T([64, 768, 384], f16), T([64, 768, 384], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 196, 768], f16), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 768], f16)), {}) +cnt: 12, ((T([12544, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 12544], f16, stride=(1, 768)), T([12544, 3072], f16)), {}) +cnt: 12, ((T([12544, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 12544], f16, stride=(1, 3072)), T([12544, 768], f16)), {}) +cnt: 12, ((T([49152, 196], f16), T([196, 384], f16)), {}) +cnt: 12, ((T([196, 49152], f16, stride=(1, 196)), T([49152, 384], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([64, 196, 768], f16, stride=(150528, 1, 196)), [768], T([768], f16), T([768], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 13, ((T([64, 196, 768], f16), T([64, 196, 768], f16, stride=(150528, 1, 196)), [768], T([64, 196, 1], f32), T([64, 196, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +cnt: 12, ((T([64, 196, 768], f16, stride=(150528, 1, 196)), T([64, 196, 768], f16, stride=(150528, 1, 196)), [768], T([64, 196, 1], f32), T([64, 196, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 12, ((T([384, 196], f16, stride=(1, 384)), [384, 196], [196, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 12, ((T([12544, 768], f16), [0], True), {}) +cnt: 12, ((T([12544, 3072], f16), [0], True), {}) +cnt: 12, ((T([49152, 196], f16), [0], True), {}) +cnt: 12, ((T([64, 768, 384], f16), [0, 1], True), {}) +cnt: 12, ((T([64, 196, 384], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixnet_l_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixnet_l_training.txt new file mode 100644 index 000000000..74b315457 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mixnet_l_training.txt @@ -0,0 +1,378 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 58, ((T([], i64), 1), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16)), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([64, 40, 56, 56], f16)), {}) +cnt: 6, ((T([64, 56, 28, 28], f16), T([64, 56, 28, 28], f16)), {}) +cnt: 6, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16)), {}) +cnt: 6, ((T([64, 160, 14, 14], f16), T([64, 160, 14, 14], f16)), {}) +cnt: 6, ((T([64, 264, 7, 7], f16), T([64, 264, 7, 7], f16)), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 1536], f16), T([1536, 1000], f16, stride=(1, 1536))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 96, 112, 112], f16), T([64, 96, 112, 112], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16)], 1), {}) +cnt: 3, (([T([64, 20, 56, 56], f16), T([64, 20, 56, 56], f16)], 1), {}) +cnt: 2, (([T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16)], 1), {}) +cnt: 12, (([T([64, 168, 28, 28], f16), T([64, 168, 28, 28], f16)], 1), {}) +cnt: 6, (([T([64, 28, 28, 28], f16), T([64, 28, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 112, 14, 14], f16), T([64, 112, 14, 14], f16), T([64, 112, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 312, 14, 14], f16), T([64, 312, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 52, 14, 14], f16), T([64, 52, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 80, 14, 14], f16), T([64, 80, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16)], 1), {}) +cnt: 6, (([T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16)], 1), {}) +cnt: 3, (([T([64, 132, 7, 7], f16), T([64, 132, 7, 7], f16)], 1), {}) +cnt: 3, (([T([64, 792, 7, 7], f16), T([64, 792, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 112, 28, 28], f16), T([64, 112, 28, 28], f16), T([64, 112, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16)], 1), {}) +cnt: 1, (([T([64, 16, 112, 112], f16), T([64, 16, 112, 112], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +cnt: 1, ((T([64, 240, 56, 56], f16),), {}) +cnt: 1, ((T([64, 240, 28, 28], f16),), {}) +cnt: 1, ((T([64, 20, 1, 1], f16),), {}) +cnt: 7, ((T([64, 336, 28, 28], f16),), {}) +cnt: 3, ((T([64, 28, 1, 1], f16),), {}) +cnt: 1, ((T([64, 336, 14, 14], f16),), {}) +cnt: 1, ((T([64, 14, 1, 1], f16),), {}) +cnt: 8, ((T([64, 624, 14, 14], f16),), {}) +cnt: 3, ((T([64, 26, 1, 1], f16),), {}) +cnt: 1, ((T([64, 52, 1, 1], f16),), {}) +cnt: 6, ((T([64, 480, 14, 14], f16),), {}) +cnt: 4, ((T([64, 80, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 14, 14], f16),), {}) +cnt: 1, ((T([64, 960, 7, 7], f16),), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16),), {}) +cnt: 3, ((T([64, 132, 1, 1], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 16, 112, 112], f16, stride=(401408, 12544, 112, 1)), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 64), {}) +cnt: 2, ((T([64, 96, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([20, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([60, 20, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([20, 60, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 40, 56, 56], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 9, 9], f16), None, [2, 2], [4, 4], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([20, 240, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([240, 20, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([56, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([168, 28, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 168), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 168), {}) +cnt: 3, ((T([64, 336, 1, 1], f16), T([28, 336, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([336, 28, 1, 1], f16), T([336], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([28, 168, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 56, 28, 28], f16), T([336, 56, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 336, 1, 1], f16), T([14, 336, 1, 1], f16), T([14], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([336, 14, 1, 1], f16), T([336], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([104, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([312, 52, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 624, 1, 1], f16), T([26, 624, 1, 1], f16), T([26], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([624, 26, 1, 1], f16), T([624], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([52, 312, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 104, 14, 14], f16), T([624, 104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([624, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 624), {}) +cnt: 1, ((T([64, 624, 1, 1], f16), T([52, 624, 1, 1], f16), T([52], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([624, 52, 1, 1], f16), T([624], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([160, 624, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([240, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([80, 480, 1, 1], f16), T([80], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 80, 1, 1], f16), T([480, 80, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 14, 14], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 9, 9], f16), None, [2, 2], [4, 4], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([80, 960, 1, 1], f16), T([80], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 80, 1, 1], f16), T([960, 80, 1, 1], f16), T([960], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([264, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 264, 7, 7], f16), T([1584, 264, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16), T([132, 1584, 1, 1], f16), T([132], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 132, 1, 1], f16), T([1584, 132, 1, 1], f16), T([1584], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 792, 7, 7], f16, stride=(77616, 49, 7, 1)), T([132, 792, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 264, 7, 7], f16), T([1536, 264, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 264, 7, 7], f16), T([1536, 264, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 132, 7, 7], f16, stride=(12936, 49, 7, 1)), T([64, 792, 7, 7], f16, stride=(77616, 49, 7, 1)), T([132, 792, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16), T([64, 132, 1, 1], f16), T([1584, 132, 1, 1], f16), [1584], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 132, 1, 1], f16), T([64, 1584, 1, 1], f16), T([132, 1584, 1, 1], f16), [132], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 264, 7, 7], f16), T([1584, 264, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 264, 7, 7], f16), T([64, 960, 7, 7], f16), T([264, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([64, 80, 1, 1], f16), T([960, 80, 1, 1], f16), [960], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 80, 1, 1], f16), T([64, 960, 1, 1], f16), T([80, 960, 1, 1], f16), [80], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 9, 9], f16), [0], [2, 2], [4, 4], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 160, 14, 14], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([64, 80, 1, 1], f16), T([480, 80, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 80, 1, 1], f16), T([64, 480, 1, 1], f16), T([80, 480, 1, 1], f16), [80], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 6, ((T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([240, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 14, 14], f16), T([64, 624, 14, 14], f16), T([160, 624, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 624, 1, 1], f16), T([64, 52, 1, 1], f16), T([624, 52, 1, 1], f16), [624], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([64, 624, 1, 1], f16), T([52, 624, 1, 1], f16), [52], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16), T([624, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 624, [True, True, False]), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([64, 104, 14, 14], f16), T([624, 104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([52, 312, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 624, 1, 1], f16), T([64, 26, 1, 1], f16), T([624, 26, 1, 1], f16), [624], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([64, 624, 1, 1], f16), T([26, 624, 1, 1], f16), [26], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 6, ((T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([312, 52, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 104, 14, 14], f16), T([64, 336, 14, 14], f16), T([104, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 336, 1, 1], f16), T([64, 14, 1, 1], f16), T([336, 14, 1, 1], f16), [336], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([64, 336, 1, 1], f16), T([14, 336, 1, 1], f16), [14], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), T([112, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 336, 28, 28], f16), T([64, 56, 28, 28], f16), T([336, 56, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([28, 168, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 336, 1, 1], f16), T([64, 28, 1, 1], f16), T([336, 28, 1, 1], f16), [336], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([64, 336, 1, 1], f16), T([28, 336, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 6, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([168, 28, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 56, 28, 28], f16), T([64, 240, 28, 28], f16), T([56, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([64, 20, 1, 1], f16), T([240, 20, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([64, 240, 1, 1], f16), T([20, 240, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 9, 9], f16), [0], [2, 2], [4, 4], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), T([60, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 40, 56, 56], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([20, 60, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([60, 20, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([64, 96, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([20, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([64, 96, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 16, 112, 112], f16, stride=(401408, 12544, 112, 1)), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1536, 7, 7], f16, stride=(1536, 1, 0, 0)), 49), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16, stride=(1584, 1, 0, 0)), 49), {}) +cnt: 1, ((T([64, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 3, ((T([64, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 4, ((T([64, 624, 14, 14], f16, stride=(624, 1, 0, 0)), 196), {}) +cnt: 1, ((T([64, 336, 14, 14], f16, stride=(336, 1, 0, 0)), 196), {}) +cnt: 3, ((T([64, 336, 28, 28], f16, stride=(336, 1, 0, 0)), 784), {}) +cnt: 1, ((T([64, 240, 28, 28], f16, stride=(240, 1, 0, 0)), 784), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), [2, 3], True), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 1536], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1536], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([64, 240, 28, 28], f16), T([64, 240, 1, 1], f16)), {}) +cnt: 6, ((T([64, 336, 28, 28], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 2, ((T([64, 336, 14, 14], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 1, 1], f16)), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 1, 1], f16)), {}) +cnt: 2, ((T([64, 960, 7, 7], f16), T([64, 960, 1, 1], f16)), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 1, 1], f16)), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 3, ((T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([624], f16), T([624], f16), T([624], f16), T([624], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 160, 14, 14], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 264, 7, 7], f16), T([264], f16), T([264], f16), T([264], f16), T([264], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([1584], f16), T([1584], f16), T([1584], f16), T([1584], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 1536, 7, 7], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f32), T([1536], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 264, 7, 7], f16), T([64, 264, 7, 7], f16), T([264], f16), T([264], f16), T([264], f16), T([264], f32), T([264], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16), T([1584], f16), T([1584], f16), T([1584], f16), T([1584], f32), T([1584], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 160, 14, 14], f16), T([64, 160, 14, 14], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16), T([624], f16), T([624], f16), T([624], f16), T([624], f32), T([624], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f32), T([104], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 56, 28, 28], f16), T([64, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 240, 56, 56], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([64, 40, 56, 56], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([64, 192, 112, 112], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([64, 32, 112, 112], f16),), {}) +cnt: 1, ((T([64, 192, 112, 112], f16),), {}) +cnt: 1, ((T([64, 192, 56, 56], f16),), {}) +cnt: 2, ((T([64, 120, 56, 56], f16),), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([64, 240, 1, 1], f16),), {}) +cnt: 4, ((T([64, 336, 1, 1], f16),), {}) +cnt: 4, ((T([64, 624, 1, 1], f16),), {}) +cnt: 3, ((T([64, 480, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 1, 1], f16),), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 3, ((T([64, 1584, 1, 1], f16), T([64, 1584, 1, 1], f16)), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([64, 960, 1, 1], f16)), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([64, 480, 1, 1], f16)), {}) +cnt: 4, ((T([64, 624, 1, 1], f16), T([64, 624, 1, 1], f16)), {}) +cnt: 4, ((T([64, 336, 1, 1], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([64, 240, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([64, 240, 56, 56], f16),), {}) +cnt: 1, ((T([64, 240, 28, 28], f16),), {}) +cnt: 1, ((T([64, 20, 1, 1], f16),), {}) +cnt: 7, ((T([64, 336, 28, 28], f16),), {}) +cnt: 3, ((T([64, 28, 1, 1], f16),), {}) +cnt: 1, ((T([64, 336, 14, 14], f16),), {}) +cnt: 1, ((T([64, 14, 1, 1], f16),), {}) +cnt: 8, ((T([64, 624, 14, 14], f16),), {}) +cnt: 3, ((T([64, 26, 1, 1], f16),), {}) +cnt: 1, ((T([64, 52, 1, 1], f16),), {}) +cnt: 6, ((T([64, 480, 14, 14], f16),), {}) +cnt: 4, ((T([64, 80, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 14, 14], f16),), {}) +cnt: 1, ((T([64, 960, 7, 7], f16),), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16),), {}) +cnt: 3, ((T([64, 132, 1, 1], f16),), {}) +Operator: aten.silu_backward.default +cnt: 3, ((T([64, 132, 1, 1], f16), T([64, 132, 1, 1], f16)), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 4, ((T([64, 80, 1, 1], f16), T([64, 80, 1, 1], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16)), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([64, 52, 1, 1], f16)), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([64, 26, 1, 1], f16)), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([64, 14, 1, 1], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([64, 28, 1, 1], f16)), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([64, 20, 1, 1], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 240, 56, 56], f16)), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([64, 32, 112, 112], f16), [16, 16], 1), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), [64, 64, 64], 1), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), [96, 96], 1), {}) +cnt: 1, ((T([64, 40, 56, 56], f16), [20, 20], 1), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), [60, 60], 1), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), [60, 60, 60, 60], 1), {}) +cnt: 3, ((T([64, 56, 28, 28], f16), [28, 28], 1), {}) +cnt: 6, ((T([64, 336, 28, 28], f16), [168, 168], 1), {}) +cnt: 1, ((T([64, 336, 28, 28], f16), [112, 112, 112], 1), {}) +cnt: 3, ((T([64, 104, 14, 14], f16), [52, 52], 1), {}) +cnt: 3, ((T([64, 624, 14, 14], f16), [156, 156, 156, 156], 1), {}) +cnt: 3, ((T([64, 624, 14, 14], f16), [312, 312], 1), {}) +cnt: 3, ((T([64, 160, 14, 14], f16), [80, 80], 1), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [120, 120, 120, 120], 1), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [240, 240], 1), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), [240, 240, 240, 240], 1), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [396, 396, 396, 396], 1), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [792, 792], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 1536, 7, 7], f16), 0), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([64, 192, 112, 112], f16), 0), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mnasnet_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mnasnet_100_training.txt new file mode 100644 index 000000000..6524a78aa --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mnasnet_100_training.txt @@ -0,0 +1,170 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 52, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 4, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 2, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16)), {}) +cnt: 6, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 72), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([96, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([96, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([576, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([192, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 7, 7], f16), T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([128, 576, 7, 7], f16), T([192, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 14, 14], f16), T([576, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 2, ((T([128, 576, 14, 14], f16), T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 14, 14], f16), T([128, 576, 14, 14], f16), T([96, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 14, 14], f16), T([128, 480, 14, 14], f16), T([96, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 56, 56], f16),), {}) +cnt: 5, ((T([128, 72, 56, 56], f16),), {}) +cnt: 1, ((T([128, 72, 28, 28], f16),), {}) +cnt: 4, ((T([128, 120, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 6, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 576, 14, 14], f16),), {}) +cnt: 1, ((T([128, 576, 7, 7], f16),), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), 0), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), 0), {}) +cnt: 3, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), 0), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), 0), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), 0), {}) +cnt: 5, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), 0), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv2_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv2_100_training.txt new file mode 100644 index 000000000..4c6b5706f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv2_100_training.txt @@ -0,0 +1,172 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 52, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 4, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16)), {}) +cnt: 6, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16)), {}) +cnt: 4, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16)), {}) +cnt: 4, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 2, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 56, 56], f16),), {}) +cnt: 3, ((T([128, 144, 56, 56], f16),), {}) +cnt: 1, ((T([128, 144, 28, 28], f16),), {}) +cnt: 5, ((T([128, 192, 28, 28], f16),), {}) +cnt: 1, ((T([128, 192, 14, 14], f16),), {}) +cnt: 8, ((T([128, 384, 14, 14], f16),), {}) +cnt: 5, ((T([128, 576, 14, 14], f16),), {}) +cnt: 1, ((T([128, 576, 7, 7], f16),), {}) +cnt: 6, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([32, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 32, 28, 28], f16), T([192, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 2, ((T([128, 192, 28, 28], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 14, 14], f16), T([384, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([384, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 384), {}) +cnt: 3, ((T([128, 384, 14, 14], f16), T([64, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([96, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 2, ((T([128, 576, 14, 14], f16), T([96, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([160, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 960, 7, 7], f16), T([960, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 960), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([160, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([320, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 960, 7, 7], f16), T([320, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 960, [True, True, False]), {}) +cnt: 3, ((T([128, 960, 7, 7], f16), T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 160, 7, 7], f16), T([128, 960, 7, 7], f16), T([160, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([128, 576, 7, 7], f16), T([160, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 3, ((T([128, 576, 14, 14], f16), T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 96, 14, 14], f16), T([128, 576, 14, 14], f16), T([96, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 14, 14], f16), T([128, 384, 14, 14], f16), T([96, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 384, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([128, 64, 14, 14], f16), T([384, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 14, 14], f16), T([128, 384, 14, 14], f16), T([64, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 14, 14], f16), T([128, 192, 14, 14], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([128, 32, 28, 28], f16), T([192, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 32, 28, 28], f16), T([128, 192, 28, 28], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 28, 28], f16), T([128, 144, 28, 28], f16), T([32, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 56, 56], f16), T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 144, 56, 56], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +Operator: aten.hardtanh_.default +cnt: 2, ((T([128, 32, 112, 112], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), 0.0, 6.0), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), 0.0, 6.0), {}) +cnt: 8, ((T([128, 384, 14, 14], f16), 0.0, 6.0), {}) +cnt: 5, ((T([128, 576, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), 0.0, 6.0), {}) +cnt: 6, ((T([128, 960, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), 0.0, 6.0), {}) +Operator: aten.hardtanh_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), 0.0, 6.0), {}) +cnt: 6, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), 0.0, 6.0), {}) +cnt: 5, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), 0.0, 6.0), {}) +cnt: 8, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), 0.0, 6.0), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), 0.0, 6.0), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), 0.0, 6.0), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0.0, 6.0), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 14, 14], f16), T([128, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 192, 14, 14], f16), T([128, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 32, 28, 28], f16), T([128, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv3_large_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv3_large_100_training.txt new file mode 100644 index 000000000..df2ab44bf --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilenetv3_large_100_training.txt @@ -0,0 +1,269 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 46, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 4, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 6, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 4, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16)), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 2, ((T([128, 200, 14, 14], f16),), {}) +cnt: 4, ((T([128, 184, 14, 14], f16),), {}) +cnt: 2, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 672, 14, 14], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 5, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([64, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([24, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([24, 72, 1, 1], f16), T([24], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([72, 24, 1, 1], f16), T([72], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([32, 120, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 32, 1, 1], f16), T([120, 32, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([200, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([200, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 200), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([80, 200, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([184, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), T([184, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 184), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), T([80, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), T([168], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([160, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([960, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 960), {}) +cnt: 2, ((T([128, 960, 1, 1], f16), T([240, 960, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([960, 240, 1, 1], f16), T([960], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([160, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), T([1280], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 960, 1, 1], f16), T([1280, 960, 1, 1], f16), [1280], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 960, 7, 7], f16), T([128, 160, 7, 7], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 160, 7, 7], f16), T([128, 960, 7, 7], f16), T([160, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 960, 1, 1], f16), T([128, 240, 1, 1], f16), T([960, 240, 1, 1], f16), [960], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 960, 1, 1], f16), T([240, 960, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 960, [True, True, False]), {}) +cnt: 1, ((T([128, 160, 7, 7], f16), T([128, 672, 7, 7], f16), T([160, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 168, 1, 1], f16), T([672, 168, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 672, 1, 1], f16), T([168, 672, 1, 1], f16), [168], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([128, 120, 1, 1], f16), T([480, 120, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 480, 1, 1], f16), T([120, 480, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 184, 14, 14], f16), T([80, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), T([128, 184, 14, 14], f16), T([184, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 184, [True, True, False]), {}) +cnt: 2, ((T([128, 184, 14, 14], f16), T([128, 80, 14, 14], f16), T([184, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 200, 14, 14], f16), T([80, 200, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([128, 200, 14, 14], f16), T([200, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 200, [True, True, False]), {}) +cnt: 1, ((T([128, 200, 14, 14], f16), T([128, 80, 14, 14], f16), T([200, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 28, 28], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([128, 32, 1, 1], f16), T([120, 32, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 1, 1], f16), T([128, 120, 1, 1], f16), T([32, 120, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 72, 28, 28], f16), T([40, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 24, 1, 1], f16), T([72, 24, 1, 1], f16), [72], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 72, 1, 1], f16), T([24, 72, 1, 1], f16), [24], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 64, 56, 56], f16), T([24, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), T([64, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 16, 112, 112], f16), T([64, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 3, ((T([128, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 2, ((T([128, 120, 28, 28], f16, stride=(120, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(72, 1, 0, 0)), 784), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([128, 72, 1, 1], f16),), {}) +cnt: 2, ((T([128, 120, 1, 1], f16),), {}) +cnt: 1, ((T([128, 480, 1, 1], f16),), {}) +cnt: 2, ((T([128, 672, 1, 1], f16),), {}) +cnt: 2, ((T([128, 960, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 2, ((T([128, 960, 1, 1], f16), T([128, 960, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 1, 1], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 1, ((T([128, 480, 1, 1], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16)), {}) +cnt: 1, ((T([128, 72, 1, 1], f16), T([128, 72, 1, 1], f16)), {}) +Operator: aten.hardswish_.default +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 2, ((T([128, 200, 14, 14], f16),), {}) +cnt: 4, ((T([128, 184, 14, 14], f16),), {}) +cnt: 2, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 672, 14, 14], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 5, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 1, 1], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 1, ((T([128, 1280, 1, 1], f16), T([128, 1280, 1, 1], f16)), {}) +cnt: 5, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 4, ((T([128, 184, 14, 14], f16), T([128, 184, 14, 14], f16)), {}) +cnt: 2, ((T([128, 200, 14, 14], f16), T([128, 200, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 72, 28, 28], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 72, 28, 28], f16), T([128, 72, 1, 1], f16)), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 1, 1], f16)), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 7, 7], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 4, ((T([128, 960, 7, 7], f16), T([128, 960, 1, 1], f16)), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 3, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 184, 14, 14], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 5, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 160, 7, 7], f16), T([128, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 184, 14, 14], f16), T([128, 184, 14, 14], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f32), T([184], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 200, 14, 14], f16), T([128, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f32), T([200], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 56, 56], f16),), {}) +cnt: 3, ((T([128, 72, 56, 56], f16),), {}) +cnt: 1, ((T([128, 72, 28, 28], f16),), {}) +cnt: 1, ((T([128, 24, 1, 1], f16),), {}) +cnt: 4, ((T([128, 120, 28, 28], f16),), {}) +cnt: 2, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 120, 1, 1], f16),), {}) +cnt: 2, ((T([128, 168, 1, 1], f16),), {}) +cnt: 2, ((T([128, 240, 1, 1], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 2, ((T([128, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 240, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 168, 1, 1], f16), T([128, 168, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 120, 1, 1], f16), T([128, 120, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16), 0), {}) +cnt: 4, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 24, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilevit_s_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilevit_s_training.txt new file mode 100644 index 000000000..ce3dba3ad --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/mobilevit_s_training.txt @@ -0,0 +1,313 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([256, 4, 256, 256], f16), -1, False), {}) +cnt: 4, ((T([256, 4, 64, 64], f16), -1, False), {}) +cnt: 3, ((T([256, 4, 16, 16], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 3, ((T([256, 4, 16, 16], f16), T([256, 4, 16, 16], f16), -1, f16), {}) +cnt: 4, ((T([256, 4, 64, 64], f16), T([256, 4, 64, 64], f16), -1, f16), {}) +cnt: 2, ((T([256, 4, 256, 256], f16), T([256, 4, 256, 256], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 2, ((T([147456, 16, 2, 2], f16), [64, 144, 256, 4]), {}) +cnt: 2, ((T([64, 4, 256, 144], f16), [256, 256, 144]), {}) +cnt: 6, ((T([256, 4, 256, 36], f16), [1024, 256, 36]), {}) +cnt: 2, ((T([256, 4, 36, 256], f16), [1024, 36, 256]), {}) +cnt: 2, ((T([1024, 256, 256], f16), [256, 4, 256, 256]), {}) +cnt: 2, ((T([1024, 256, 36], f16), [256, 4, 256, 36]), {}) +cnt: 2, ((T([256, 256, 4, 36], f16), [256, 256, 144]), {}) +cnt: 2, ((T([64, 144, 256, 4], f16), [147456, 16, 2, 2]), {}) +cnt: 2, ((T([147456, 2, 16, 2], f16), [64, 144, 32, 32]), {}) +cnt: 2, ((T([98304, 8, 2, 2], f16), [64, 192, 64, 4]), {}) +cnt: 2, ((T([64, 4, 64, 192], f16), [256, 64, 192]), {}) +cnt: 12, ((T([256, 4, 64, 48], f16), [1024, 64, 48]), {}) +cnt: 4, ((T([256, 4, 48, 64], f16), [1024, 48, 64]), {}) +cnt: 4, ((T([1024, 64, 64], f16), [256, 4, 64, 64]), {}) +cnt: 4, ((T([1024, 64, 48], f16), [256, 4, 64, 48]), {}) +cnt: 4, ((T([256, 64, 4, 48], f16), [256, 64, 192]), {}) +cnt: 2, ((T([64, 192, 64, 4], f16), [98304, 8, 2, 2]), {}) +cnt: 2, ((T([98304, 2, 8, 2], f16), [64, 192, 16, 16]), {}) +cnt: 2, ((T([61440, 4, 2, 2], f16), [64, 240, 16, 4]), {}) +cnt: 2, ((T([64, 4, 16, 240], f16), [256, 16, 240]), {}) +cnt: 9, ((T([256, 4, 16, 60], f16), [1024, 16, 60]), {}) +cnt: 3, ((T([256, 4, 60, 16], f16), [1024, 60, 16]), {}) +cnt: 3, ((T([1024, 16, 16], f16), [256, 4, 16, 16]), {}) +cnt: 3, ((T([1024, 16, 60], f16), [256, 4, 16, 60]), {}) +cnt: 3, ((T([256, 16, 4, 60], f16), [256, 16, 240]), {}) +cnt: 2, ((T([64, 240, 16, 4], f16), [61440, 4, 2, 2]), {}) +cnt: 2, ((T([61440, 2, 4, 2], f16), [64, 240, 8, 8]), {}) +cnt: 3, ((T([256, 16, 3, 4, 60], f16), [256, 16, 720]), {}) +cnt: 4, ((T([256, 64, 3, 4, 48], f16), [256, 64, 576]), {}) +cnt: 2, ((T([256, 256, 3, 4, 36], f16), [256, 256, 432]), {}) +Operator: aten.add.Tensor +cnt: 32, ((T([], i64), 1), {}) +cnt: 4, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16)), {}) +cnt: 8, ((T([256, 256, 144], f16), T([256, 256, 144], f16)), {}) +cnt: 16, ((T([256, 64, 192], f16), T([256, 64, 192], f16)), {}) +cnt: 12, ((T([256, 16, 240], f16), T([256, 16, 240], f16)), {}) +cnt: 1, ((T([64, 160, 8, 8], f16, stride=(20480, 64, 8, 1)), T([64, 160, 8, 8], f16)), {}) +cnt: 1, ((T([64, 128, 16, 16], f16, stride=(65536, 256, 16, 1)), T([64, 128, 16, 16], f16)), {}) +cnt: 1, ((T([64, 96, 32, 32], f16, stride=(196608, 1024, 32, 1)), T([64, 96, 32, 32], f16)), {}) +Operator: aten.addmm.default +cnt: 2, ((T([432], f16), T([65536, 144], f16), T([144, 432], f16, stride=(1, 144))), {}) +cnt: 2, ((T([144], f16), T([65536, 144], f16), T([144, 144], f16, stride=(1, 144))), {}) +cnt: 2, ((T([288], f16), T([65536, 144], f16), T([144, 288], f16, stride=(1, 144))), {}) +cnt: 2, ((T([144], f16), T([65536, 288], f16), T([288, 144], f16, stride=(1, 288))), {}) +cnt: 4, ((T([576], f16), T([16384, 192], f16), T([192, 576], f16, stride=(1, 192))), {}) +cnt: 4, ((T([192], f16), T([16384, 192], f16), T([192, 192], f16, stride=(1, 192))), {}) +cnt: 4, ((T([384], f16), T([16384, 192], f16), T([192, 384], f16, stride=(1, 192))), {}) +cnt: 4, ((T([192], f16), T([16384, 384], f16), T([384, 192], f16, stride=(1, 384))), {}) +cnt: 3, ((T([720], f16), T([4096, 240], f16), T([240, 720], f16, stride=(1, 240))), {}) +cnt: 3, ((T([240], f16), T([4096, 240], f16), T([240, 240], f16, stride=(1, 240))), {}) +cnt: 3, ((T([480], f16), T([4096, 240], f16), T([240, 480], f16, stride=(1, 240))), {}) +cnt: 3, ((T([240], f16), T([4096, 480], f16), T([480, 240], f16, stride=(1, 480))), {}) +cnt: 1, ((T([1000], f16), T([64, 640], f16), T([640, 1000], f16, stride=(1, 640))), {}) +Operator: aten.bmm.default +cnt: 2, ((T([1024, 256, 36], f16), T([1024, 36, 256], f16)), {}) +cnt: 2, ((T([1024, 256, 256], f16), T([1024, 256, 36], f16)), {}) +cnt: 4, ((T([1024, 64, 48], f16), T([1024, 48, 64], f16)), {}) +cnt: 4, ((T([1024, 64, 64], f16), T([1024, 64, 48], f16)), {}) +cnt: 3, ((T([1024, 16, 60], f16), T([1024, 60, 16], f16)), {}) +cnt: 3, ((T([1024, 16, 16], f16), T([1024, 16, 60], f16)), {}) +cnt: 3, ((T([1024, 16, 16], f16, stride=(256, 1, 16)), T([1024, 16, 60], f16)), {}) +cnt: 3, ((T([1024, 16, 60], f16), T([1024, 60, 16], f16, stride=(960, 1, 60))), {}) +cnt: 3, ((T([1024, 60, 16], f16, stride=(960, 1, 60)), T([1024, 16, 16], f16)), {}) +cnt: 3, ((T([1024, 16, 16], f16), T([1024, 16, 60], f16, stride=(960, 1, 16))), {}) +cnt: 4, ((T([1024, 64, 64], f16, stride=(4096, 1, 64)), T([1024, 64, 48], f16)), {}) +cnt: 4, ((T([1024, 64, 48], f16), T([1024, 48, 64], f16, stride=(3072, 1, 48))), {}) +cnt: 4, ((T([1024, 48, 64], f16, stride=(3072, 1, 48)), T([1024, 64, 64], f16)), {}) +cnt: 4, ((T([1024, 64, 64], f16), T([1024, 64, 48], f16, stride=(3072, 1, 64))), {}) +cnt: 2, ((T([1024, 256, 256], f16, stride=(65536, 1, 256)), T([1024, 256, 36], f16)), {}) +cnt: 2, ((T([1024, 256, 36], f16), T([1024, 36, 256], f16, stride=(9216, 1, 36))), {}) +cnt: 2, ((T([1024, 36, 256], f16, stride=(9216, 1, 36)), T([1024, 256, 256], f16)), {}) +cnt: 2, ((T([1024, 256, 256], f16), T([1024, 256, 36], f16, stride=(9216, 1, 256))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 96, 32, 32], f16), T([64, 96, 32, 32], f16)], 1), {}) +cnt: 1, (([T([64, 128, 16, 16], f16), T([64, 128, 16, 16], f16)], 1), {}) +cnt: 1, (([T([64, 160, 8, 8], f16), T([64, 160, 8, 8], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 256, 256], f16),), {}) +cnt: 1, ((T([64, 16, 128, 128], f16),), {}) +cnt: 2, ((T([64, 64, 128, 128], f16),), {}) +cnt: 1, ((T([64, 128, 128, 128], f16),), {}) +cnt: 1, ((T([64, 128, 64, 64], f16),), {}) +cnt: 5, ((T([64, 256, 64, 64], f16),), {}) +cnt: 1, ((T([64, 256, 32, 32], f16),), {}) +cnt: 3, ((T([64, 96, 32, 32], f16),), {}) +cnt: 1, ((T([64, 384, 32, 32], f16),), {}) +cnt: 1, ((T([64, 384, 16, 16], f16),), {}) +cnt: 3, ((T([64, 128, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 8, 8], f16),), {}) +cnt: 3, ((T([64, 160, 8, 8], f16),), {}) +cnt: 1, ((T([64, 640, 8, 8], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 16, 128, 128], f16), T([64, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([32, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([128, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([128, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 256, 64, 64], f16), T([256, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 2, ((T([64, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 64, 64], f16), T([256, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([96, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([96, 96, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([144, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 144, 32, 32], f16), T([96, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 32, 32], f16), T([96, 192, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([384, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([384, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 384), {}) +cnt: 1, ((T([64, 384, 16, 16], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([192, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 16, 16], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([128, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([512, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 512), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([160, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([160, 160, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([240, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 240, 8, 8], f16), T([160, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 320, 8, 8], f16), T([160, 320, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([640, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 640, 8, 8], f16), T([64, 160, 8, 8], f16), T([640, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([64, 320, 8, 8], f16), T([160, 320, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([64, 240, 8, 8], f16), T([160, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 8, 8], f16), T([64, 160, 8, 8], f16), T([240, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([64, 160, 8, 8], f16), T([160, 160, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 8, 8], f16), T([64, 512, 8, 8], f16), T([160, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 16, 16], f16), T([512, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 512, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 128, 16, 16], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([64, 256, 16, 16], f16), T([128, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([64, 192, 16, 16], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 192, 16, 16], f16), T([64, 128, 16, 16], f16), T([192, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([64, 128, 16, 16], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 16, 16], f16), T([64, 384, 16, 16], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 384, 16, 16], f16), T([64, 384, 32, 32], f16), T([384, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 384, [True, True, False]), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([64, 96, 32, 32], f16), T([384, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([64, 192, 32, 32], f16), T([96, 192, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([64, 144, 32, 32], f16), T([96, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 144, 32, 32], f16), T([64, 96, 32, 32], f16), T([144, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([64, 96, 32, 32], f16), T([96, 96, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 96, 32, 32], f16), T([64, 256, 32, 32], f16), T([96, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 64, 64], f16), T([256, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 3, ((T([64, 256, 64, 64], f16), T([64, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16), T([256, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 256, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 128, 64, 64], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 128, 128], f16), T([128, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 128, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 32, 128, 128], f16), T([128, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 64, 128, 128], f16), T([32, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16), T([64, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 128, 128], f16), T([64, 16, 128, 128], f16), T([64, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 16, 128, 128], f16), T([64, 3, 256, 256], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([64, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 640, 8, 8], f16, stride=(640, 1, 0, 0)), 64), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 640, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 640], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 640], f16)), {}) +cnt: 3, ((T([4096, 240], f16), T([240, 480], f16)), {}) +cnt: 3, ((T([240, 4096], f16, stride=(1, 240)), T([4096, 480], f16)), {}) +cnt: 3, ((T([4096, 480], f16), T([480, 240], f16)), {}) +cnt: 3, ((T([480, 4096], f16, stride=(1, 480)), T([4096, 240], f16)), {}) +cnt: 3, ((T([4096, 240], f16), T([240, 240], f16)), {}) +cnt: 3, ((T([240, 4096], f16, stride=(1, 240)), T([4096, 240], f16)), {}) +cnt: 3, ((T([4096, 720], f16), T([720, 240], f16)), {}) +cnt: 3, ((T([720, 4096], f16, stride=(1, 720)), T([4096, 240], f16)), {}) +cnt: 4, ((T([16384, 192], f16), T([192, 384], f16)), {}) +cnt: 4, ((T([192, 16384], f16, stride=(1, 192)), T([16384, 384], f16)), {}) +cnt: 4, ((T([16384, 384], f16), T([384, 192], f16)), {}) +cnt: 4, ((T([384, 16384], f16, stride=(1, 384)), T([16384, 192], f16)), {}) +cnt: 4, ((T([16384, 192], f16), T([192, 192], f16)), {}) +cnt: 4, ((T([192, 16384], f16, stride=(1, 192)), T([16384, 192], f16)), {}) +cnt: 4, ((T([16384, 576], f16), T([576, 192], f16)), {}) +cnt: 4, ((T([576, 16384], f16, stride=(1, 576)), T([16384, 192], f16)), {}) +cnt: 2, ((T([65536, 144], f16), T([144, 288], f16)), {}) +cnt: 2, ((T([144, 65536], f16, stride=(1, 144)), T([65536, 288], f16)), {}) +cnt: 2, ((T([65536, 288], f16), T([288, 144], f16)), {}) +cnt: 2, ((T([288, 65536], f16, stride=(1, 288)), T([65536, 144], f16)), {}) +cnt: 2, ((T([65536, 144], f16), T([144, 144], f16)), {}) +cnt: 2, ((T([144, 65536], f16, stride=(1, 144)), T([65536, 144], f16)), {}) +cnt: 2, ((T([65536, 432], f16), T([432, 144], f16)), {}) +cnt: 2, ((T([432, 65536], f16, stride=(1, 432)), T([65536, 144], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([256, 4, 256, 256], f16), 0.16666666666666666), {}) +cnt: 8, ((T([256, 4, 64, 64], f16), 0.14433756729740643), {}) +cnt: 6, ((T([256, 4, 16, 16], f16), 0.12909944487358058), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([64, 16, 128, 128], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([64, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 96, 32, 32], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 384, 16, 16], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 160, 8, 8], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 640, 8, 8], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([64, 640, 8, 8], f16), T([64, 640, 8, 8], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 160, 8, 8], f16), T([64, 160, 8, 8], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 128, 16, 16], f16), T([64, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 384, 16, 16], f16), T([64, 384, 16, 16], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([64, 384, 32, 32], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 96, 32, 32], f16), T([64, 96, 32, 32], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 16, 128, 128], f16), T([64, 16, 128, 128], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.native_layer_norm.default +cnt: 5, ((T([256, 256, 144], f16), [144], T([144], f16), T([144], f16), 1e-05), {}) +cnt: 9, ((T([256, 64, 192], f16), [192], T([192], f16), T([192], f16), 1e-05), {}) +cnt: 7, ((T([256, 16, 240], f16), [240], T([240], f16), T([240], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 7, ((T([256, 16, 240], f16), T([256, 16, 240], f16), [240], T([256, 16, 1], f32), T([256, 16, 1], f32), T([240], f16), T([240], f16), [True, True, True]), {}) +cnt: 9, ((T([256, 64, 192], f16), T([256, 64, 192], f16), [192], T([256, 64, 1], f32), T([256, 64, 1], f32), T([192], f16), T([192], f16), [True, True, True]), {}) +cnt: 5, ((T([256, 256, 144], f16), T([256, 256, 144], f16), [144], T([256, 256, 1], f32), T([256, 256, 1], f32), T([144], f16), T([144], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.silu.default +cnt: 2, ((T([256, 256, 288], f16),), {}) +cnt: 4, ((T([256, 64, 384], f16),), {}) +cnt: 3, ((T([256, 16, 480], f16),), {}) +Operator: aten.silu_.default +cnt: 1, ((T([64, 16, 128, 128], f16),), {}) +cnt: 2, ((T([64, 64, 128, 128], f16),), {}) +cnt: 1, ((T([64, 128, 128, 128], f16),), {}) +cnt: 1, ((T([64, 128, 64, 64], f16),), {}) +cnt: 5, ((T([64, 256, 64, 64], f16),), {}) +cnt: 1, ((T([64, 256, 32, 32], f16),), {}) +cnt: 3, ((T([64, 96, 32, 32], f16),), {}) +cnt: 1, ((T([64, 384, 32, 32], f16),), {}) +cnt: 1, ((T([64, 384, 16, 16], f16),), {}) +cnt: 3, ((T([64, 128, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 8, 8], f16),), {}) +cnt: 3, ((T([64, 160, 8, 8], f16),), {}) +cnt: 1, ((T([64, 640, 8, 8], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([64, 640, 8, 8], f16), T([64, 640, 8, 8], f16)), {}) +cnt: 2, ((T([64, 160, 8, 8], f16), T([64, 160, 8, 8], f16)), {}) +cnt: 1, ((T([64, 160, 8, 8], f16, stride=(20480, 64, 8, 1)), T([64, 160, 8, 8], f16)), {}) +cnt: 3, ((T([256, 16, 480], f16), T([256, 16, 480], f16)), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16)), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16)), {}) +cnt: 2, ((T([64, 128, 16, 16], f16), T([64, 128, 16, 16], f16)), {}) +cnt: 1, ((T([64, 128, 16, 16], f16, stride=(65536, 256, 16, 1)), T([64, 128, 16, 16], f16)), {}) +cnt: 4, ((T([256, 64, 384], f16), T([256, 64, 384], f16)), {}) +cnt: 1, ((T([64, 384, 16, 16], f16), T([64, 384, 16, 16], f16)), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([64, 384, 32, 32], f16)), {}) +cnt: 2, ((T([64, 96, 32, 32], f16), T([64, 96, 32, 32], f16)), {}) +cnt: 1, ((T([64, 96, 32, 32], f16, stride=(196608, 1024, 32, 1)), T([64, 96, 32, 32], f16)), {}) +cnt: 2, ((T([256, 256, 288], f16), T([256, 256, 288], f16)), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16)), {}) +cnt: 5, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16)), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16)), {}) +cnt: 1, ((T([64, 128, 128, 128], f16), T([64, 128, 128, 128], f16)), {}) +cnt: 2, ((T([64, 64, 128, 128], f16), T([64, 64, 128, 128], f16)), {}) +cnt: 1, ((T([64, 16, 128, 128], f16), T([64, 16, 128, 128], f16)), {}) +Operator: aten.stack.default +cnt: 3, (([T([256, 4, 16, 60], f16), T([256, 4, 16, 60], f16, stride=(3840, 960, 1, 16)), T([256, 4, 16, 60], f16)],), {}) +cnt: 4, (([T([256, 4, 64, 48], f16), T([256, 4, 64, 48], f16, stride=(12288, 3072, 1, 64)), T([256, 4, 64, 48], f16)],), {}) +cnt: 2, (([T([256, 4, 256, 36], f16), T([256, 4, 256, 36], f16, stride=(36864, 9216, 1, 256)), T([256, 4, 256, 36], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 6, ((T([4096, 240], f16), [0], True), {}) +cnt: 3, ((T([4096, 480], f16), [0], True), {}) +cnt: 3, ((T([4096, 720], f16), [0], True), {}) +cnt: 8, ((T([16384, 192], f16), [0], True), {}) +cnt: 4, ((T([16384, 384], f16), [0], True), {}) +cnt: 4, ((T([16384, 576], f16), [0], True), {}) +cnt: 4, ((T([65536, 144], f16), [0], True), {}) +cnt: 2, ((T([65536, 288], f16), [0], True), {}) +cnt: 2, ((T([65536, 432], f16), [0], True), {}) +Operator: aten.unbind.int +cnt: 2, ((T([3, 256, 4, 256, 36], f16, stride=(144, 110592, 36, 432, 1)),), {}) +cnt: 4, ((T([3, 256, 4, 64, 48], f16, stride=(192, 36864, 48, 576, 1)),), {}) +cnt: 3, ((T([3, 256, 4, 16, 60], f16, stride=(240, 11520, 60, 720, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nasnetalarge_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nasnetalarge_training.txt new file mode 100644 index 000000000..908397ba8 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nasnetalarge_training.txt @@ -0,0 +1,309 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([16, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([16, 1000], f16), T([16, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([], i64), 1), {}) +cnt: 6, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16)), {}) +cnt: 6, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16)), {}) +cnt: 66, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16)), {}) +cnt: 72, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16)), {}) +cnt: 72, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16)), {}) +cnt: 12, ((T([16, 672, 11, 11], f16, stride=(487872, 121, 11, 1)), T([16, 672, 11, 11], f16)), {}) +cnt: 6, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16, stride=(487872, 121, 11, 1))), {}) +cnt: 4, ((T([16, 4032, 11, 11], f16), T([16, 4032, 11, 11], f16)), {}) +cnt: 1, ((T([16, 2688, 11, 11], f16), T([16, 2688, 11, 11], f16)), {}) +cnt: 7, ((T([16, 2016, 21, 21], f16), T([16, 2016, 21, 21], f16)), {}) +cnt: 1, ((T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1)), T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1))), {}) +cnt: 5, ((T([16, 672, 21, 21], f16), T([16, 672, 21, 21], f16)), {}) +cnt: 12, ((T([16, 336, 21, 21], f16, stride=(889056, 441, 21, 1)), T([16, 336, 21, 21], f16)), {}) +cnt: 6, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16, stride=(889056, 441, 21, 1))), {}) +cnt: 1, ((T([16, 1344, 21, 21], f16), T([16, 1344, 21, 21], f16)), {}) +cnt: 7, ((T([16, 1008, 42, 42], f16), T([16, 1008, 42, 42], f16)), {}) +cnt: 1, ((T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1)), T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1))), {}) +cnt: 6, ((T([16, 336, 42, 42], f16), T([16, 336, 42, 42], f16)), {}) +cnt: 12, ((T([16, 168, 42, 42], f16, stride=(1778112, 1764, 42, 1)), T([16, 168, 42, 42], f16)), {}) +cnt: 6, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16, stride=(1778112, 1764, 42, 1))), {}) +cnt: 2, ((T([16, 168, 83, 83], f16), T([16, 168, 83, 83], f16)), {}) +cnt: 1, ((T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1)), T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1))), {}) +cnt: 5, ((T([16, 84, 83, 83], f16), T([16, 84, 83, 83], f16)), {}) +cnt: 5, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16)), {}) +cnt: 1, ((T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1)), T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1))), {}) +cnt: 3, ((T([16, 42, 165, 165], f16), T([16, 42, 165, 165], f16)), {}) +Operator: aten.add_.Tensor +cnt: 263, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([16, 4032], f16), T([4032, 1000], f16, stride=(1, 4032))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([16, 42, 167, 167], f16), [3, 3], [2, 2], [0, 0], False, False), {}) +cnt: 1, ((T([16, 42, 83, 83], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 2, ((T([16, 96, 165, 165], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 1, ((T([16, 84, 85, 85], f16), [3, 3], [2, 2], [0, 0], False, False), {}) +cnt: 1, ((T([16, 84, 42, 42], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 2, ((T([16, 168, 83, 83], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 18, ((T([16, 168, 42, 42], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 1, ((T([16, 336, 43, 43], f16), [3, 3], [2, 2], [0, 0], False, False), {}) +cnt: 19, ((T([16, 336, 21, 21], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 2, ((T([16, 1008, 42, 42], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 1, ((T([16, 672, 23, 23], f16), [3, 3], [2, 2], [0, 0], False, False), {}) +cnt: 19, ((T([16, 672, 11, 11], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 2, ((T([16, 2016, 21, 21], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 18, ((T([16, 672, 11, 11], f16, stride=(487872, 121, 11, 1)), T([16, 672, 11, 11], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 2, ((T([16, 2016, 11, 11], f16), T([16, 2016, 21, 21], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 1, ((T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1)), T([16, 672, 11, 11], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 1, ((T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1)), T([16, 672, 23, 23], f16), [3, 3], [2, 2], [0, 0], False, False, None), {}) +cnt: 18, ((T([16, 336, 21, 21], f16, stride=(889056, 441, 21, 1)), T([16, 336, 21, 21], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 2, ((T([16, 1008, 21, 21], f16), T([16, 1008, 42, 42], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 1, ((T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1)), T([16, 336, 21, 21], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 1, ((T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1)), T([16, 336, 43, 43], f16), [3, 3], [2, 2], [0, 0], False, False, None), {}) +cnt: 18, ((T([16, 168, 42, 42], f16, stride=(1778112, 1764, 42, 1)), T([16, 168, 42, 42], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 2, ((T([16, 168, 42, 42], f16), T([16, 168, 83, 83], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 1, ((T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1)), T([16, 84, 42, 42], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 1, ((T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1)), T([16, 84, 85, 85], f16), [3, 3], [2, 2], [0, 0], False, False, None), {}) +cnt: 2, ((T([16, 96, 83, 83], f16), T([16, 96, 165, 165], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 1, ((T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1)), T([16, 42, 83, 83], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 1, ((T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1)), T([16, 42, 167, 167], f16), [3, 3], [2, 2], [0, 0], False, False, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16)], 1), {}) +cnt: 1, (([T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16)], 1), {}) +cnt: 1, (([T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16)], 1), {}) +cnt: 1, (([T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16)], 1), {}) +cnt: 6, (([T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16)], 1), {}) +cnt: 1, (([T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16)], 1), {}) +cnt: 1, (([T([16, 168, 21, 21], f16), T([16, 168, 21, 21], f16)], 1), {}) +cnt: 6, (([T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16)], 1), {}) +cnt: 1, (([T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16)], 1), {}) +cnt: 1, (([T([16, 336, 11, 11], f16), T([16, 336, 11, 11], f16)], 1), {}) +cnt: 6, (([T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([16, 3, 331, 331], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([16, 42, 165, 165], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([16, 96, 165, 165], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 42, 165, 165], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 42, 165, 165], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 84, 83, 83], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([16, 84, 83, 83], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 84, 83, 83], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 84, 83, 83], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 168, 83, 83], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), [2, 3, 2, 3], 0.0), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), [0, 1, 0, 1], -inf), {}) +cnt: 1, ((T([16, 336, 42, 42], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([16, 1008, 42, 42], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 672, 21, 21], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 2016, 21, 21], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 1, ((T([16, 2016, 21, 21], f16), [1, -1, 1, -1]), {}) +cnt: 3, ((T([16, 672, 23, 23], f16), [-1, -1, -1, -1]), {}) +cnt: 2, ((T([16, 672, 25, 25], f16), [-2, -2, -2, -2]), {}) +cnt: 2, ((T([16, 672, 27, 27], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 1008, 42, 42], f16), [1, -1, 1, -1]), {}) +cnt: 3, ((T([16, 336, 43, 43], f16), [0, -1, 0, -1]), {}) +cnt: 2, ((T([16, 336, 45, 45], f16), [-1, -2, -1, -2]), {}) +cnt: 2, ((T([16, 336, 47, 47], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([16, 168, 83, 83], f16), [1, -1, 1, -1]), {}) +cnt: 3, ((T([16, 84, 85, 85], f16), [-1, -1, -1, -1]), {}) +cnt: 2, ((T([16, 84, 87, 87], f16), [-2, -2, -2, -2]), {}) +cnt: 2, ((T([16, 84, 89, 89], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [1, -1, 1, -1]), {}) +cnt: 3, ((T([16, 42, 167, 167], f16), [-1, -1, -1, -1]), {}) +cnt: 1, ((T([16, 96, 169, 169], f16), [-2, -2, -2, -2]), {}) +cnt: 2, ((T([16, 96, 171, 171], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 42, 169, 169], f16), [-2, -2, -2, -2]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([16, 3, 331, 331], f16), T([96, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([42, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 42, 169, 169], f16), T([42, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 42), {}) +cnt: 7, ((T([16, 42, 83, 83], f16), T([42, 42, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([42, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 42), {}) +cnt: 2, ((T([16, 96, 171, 171], f16), T([96, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 5, ((T([16, 96, 83, 83], f16), T([42, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([42, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 42), {}) +cnt: 1, ((T([16, 96, 169, 169], f16), T([96, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([42, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 42), {}) +cnt: 1, ((T([16, 168, 83, 83], f16), T([84, 168, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 84, 87, 87], f16), T([84, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 84), {}) +cnt: 10, ((T([16, 84, 42, 42], f16), T([84, 84, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([84, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 84), {}) +cnt: 2, ((T([16, 84, 89, 89], f16), T([84, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 84), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([84, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 84), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([84, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 84), {}) +cnt: 2, ((T([16, 168, 42, 42], f16), T([84, 168, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), T([168, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 24, ((T([16, 168, 42, 42], f16), T([168, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 168), {}) +cnt: 60, ((T([16, 168, 42, 42], f16), T([168, 168, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 36, ((T([16, 168, 42, 42], f16), T([168, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 168), {}) +cnt: 9, ((T([16, 1008, 42, 42], f16), T([168, 1008, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 1008, 42, 42], f16), T([336, 1008, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 336, 45, 45], f16), T([336, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 336), {}) +cnt: 70, ((T([16, 336, 21, 21], f16), T([336, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 26, ((T([16, 336, 21, 21], f16), T([336, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 336), {}) +cnt: 2, ((T([16, 336, 47, 47], f16), T([336, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 336), {}) +cnt: 2, ((T([16, 336, 21, 21], f16), T([336, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 336), {}) +cnt: 38, ((T([16, 336, 21, 21], f16), T([336, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 336), {}) +cnt: 2, ((T([16, 1008, 21, 21], f16), T([168, 1008, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 1344, 21, 21], f16), T([336, 1344, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([16, 2016, 21, 21], f16), T([336, 2016, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 2016, 21, 21], f16), T([672, 2016, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 672, 25, 25], f16), T([672, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 672), {}) +cnt: 70, ((T([16, 672, 11, 11], f16), T([672, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 26, ((T([16, 672, 11, 11], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([16, 672, 27, 27], f16), T([672, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([16, 672, 11, 11], f16), T([672, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 672), {}) +cnt: 38, ((T([16, 672, 11, 11], f16), T([672, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([16, 2016, 11, 11], f16), T([336, 2016, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 2688, 11, 11], f16), T([672, 2688, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([16, 4032, 11, 11], f16), T([672, 4032, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 70, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([672, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 38, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([672, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 26, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 9, ((T([16, 672, 11, 11], f16), T([16, 4032, 11, 11], f16), T([672, 4032, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 672, 11, 11], f16), T([16, 2688, 11, 11], f16), T([672, 2688, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 11, 11], f16, stride=(81312, 121, 11, 1)), T([16, 2016, 11, 11], f16), T([336, 2016, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 672, 11, 11], f16), T([16, 672, 25, 25], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([672, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([16, 672, 11, 11], f16), T([16, 672, 27, 27], f16), T([672, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), T([16, 2016, 21, 21], f16), T([672, 2016, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 70, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([336, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 38, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([336, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 26, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([336, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 9, ((T([16, 336, 21, 21], f16), T([16, 2016, 21, 21], f16), T([336, 2016, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 21, 21], f16), T([16, 1344, 21, 21], f16), T([336, 1344, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 168, 21, 21], f16, stride=(148176, 441, 21, 1)), T([16, 1008, 21, 21], f16), T([168, 1008, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 21, 21], f16), T([16, 336, 45, 45], f16), T([336, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([336, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 21, 21], f16), T([16, 336, 47, 47], f16), T([336, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 336, [True, True, False]), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), T([16, 1008, 42, 42], f16), T([336, 1008, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 60, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([168, 168, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 36, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([168, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 24, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([168, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 9, ((T([16, 168, 42, 42], f16), T([16, 1008, 42, 42], f16), T([168, 1008, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 168, 42, 42], f16), T([16, 336, 42, 42], f16), T([168, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16, stride=(296352, 1764, 42, 1)), T([16, 168, 42, 42], f16), T([84, 168, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([84, 84, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([84, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 84, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([84, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 84, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([16, 84, 87, 87], f16), T([84, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 84, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([84, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 84, [True, True, False]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16), T([16, 84, 89, 89], f16), T([84, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 84, [True, True, False]), {}) +cnt: 2, ((T([16, 42, 83, 83], f16, stride=(578676, 6889, 83, 1)), T([16, 96, 83, 83], f16), T([42, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 84, 83, 83], f16), T([16, 168, 83, 83], f16), T([84, 168, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([42, 42, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([42, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 42, [True, True, False]), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([42, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 42, [True, True, False]), {}) +cnt: 3, ((T([16, 42, 83, 83], f16), T([16, 96, 83, 83], f16), T([42, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 96, 83, 83], f16), T([16, 96, 169, 169], f16), T([96, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 2, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([42, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 42, [True, True, False]), {}) +cnt: 2, ((T([16, 96, 83, 83], f16), T([16, 96, 171, 171], f16), T([96, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([16, 42, 83, 83], f16), T([16, 42, 169, 169], f16), T([42, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 42, [True, True, False]), {}) +cnt: 1, ((T([16, 42, 165, 165], f16), T([16, 96, 165, 165], f16), T([42, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([16, 3, 331, 331], f16), T([96, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([16, 3, 331, 331], f16), T([16, 3, 331, 331], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([16, 4032, 11, 11], f16, stride=(4032, 1, 0, 0)), 121), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([16], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 2, ((T([16, 42, 167, 167], f16), [3, 3], [2, 2]), {}) +cnt: 2, ((T([16, 84, 85, 85], f16), [3, 3], [2, 2]), {}) +cnt: 2, ((T([16, 336, 43, 43], f16), [3, 3], [2, 2]), {}) +cnt: 2, ((T([16, 672, 23, 23], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1)), T([16, 672, 23, 23], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 672, 11, 11], i64)), {}) +cnt: 1, ((T([16, 672, 11, 11], f16), T([16, 672, 23, 23], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 672, 11, 11], i64)), {}) +cnt: 1, ((T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1)), T([16, 336, 43, 43], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 336, 21, 21], i64)), {}) +cnt: 1, ((T([16, 336, 21, 21], f16), T([16, 336, 43, 43], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 336, 21, 21], i64)), {}) +cnt: 1, ((T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1)), T([16, 84, 85, 85], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 84, 42, 42], i64)), {}) +cnt: 1, ((T([16, 84, 42, 42], f16), T([16, 84, 85, 85], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 84, 42, 42], i64)), {}) +cnt: 1, ((T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1)), T([16, 42, 167, 167], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 42, 83, 83], i64)), {}) +cnt: 1, ((T([16, 42, 83, 83], f16), T([16, 42, 167, 167], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 42, 83, 83], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([16, 4032, 11, 11], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([16, 1000], f16), T([1000, 4032], f16)), {}) +cnt: 1, ((T([1000, 16], f16, stride=(1, 1000)), T([16, 4032], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([16, 96, 165, 165], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([16, 42, 165, 165], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f16), True, 0.1, 0.001), {}) +cnt: 10, ((T([16, 42, 83, 83], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 84, 83, 83], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f16), True, 0.1, 0.001), {}) +cnt: 10, ((T([16, 84, 42, 42], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f16), True, 0.1, 0.001), {}) +cnt: 72, ((T([16, 168, 42, 42], f16), T([168], f16), T([168], f16), T([168], f16), T([168], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 0.001), {}) +cnt: 82, ((T([16, 336, 21, 21], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 0.001), {}) +cnt: 82, ((T([16, 672, 11, 11], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 30, ((T([16, 672, 11, 11], f16, stride=(487872, 121, 11, 1)), T([16, 672, 11, 11], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 50, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 672, 11, 11], f16, stride=(325248, 121, 11, 1)), T([16, 672, 11, 11], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 672, 21, 21], f16), T([16, 672, 21, 21], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 30, ((T([16, 336, 21, 21], f16, stride=(889056, 441, 21, 1)), T([16, 336, 21, 21], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 50, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 336, 21, 21], f16, stride=(592704, 441, 21, 1)), T([16, 336, 21, 21], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 336, 42, 42], f16), T([16, 336, 42, 42], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 30, ((T([16, 168, 42, 42], f16, stride=(1778112, 1764, 42, 1)), T([16, 168, 42, 42], f16), T([168], f16), T([168], f16), T([168], f16), T([168], f32), T([168], f32), True, 0.001, [True, True, True]), {}) +cnt: 42, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), T([168], f16), T([168], f16), T([168], f16), T([168], f32), T([168], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 84, 42, 42], f16, stride=(592704, 1764, 42, 1)), T([16, 84, 42, 42], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f32), T([84], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f32), T([84], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 84, 83, 83], f16), T([16, 84, 83, 83], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f32), T([84], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 42, 83, 83], f16, stride=(1157352, 6889, 83, 1)), T([16, 42, 83, 83], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f32), T([42], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f32), T([42], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([16, 42, 165, 165], f16), T([16, 42, 165, 165], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f32), T([42], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([16, 1000], f16), T([16], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([16, 1000], f16), T([16], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 5, ((T([16, 96, 165, 165], f16),), {}) +cnt: 1, ((T([16, 42, 165, 165], f16),), {}) +cnt: 1, ((T([16, 42, 83, 83], f16),), {}) +cnt: 2, ((T([16, 168, 83, 83], f16),), {}) +cnt: 4, ((T([16, 84, 83, 83], f16),), {}) +cnt: 1, ((T([16, 84, 42, 42], f16),), {}) +cnt: 6, ((T([16, 336, 42, 42], f16),), {}) +cnt: 30, ((T([16, 168, 42, 42], f16),), {}) +cnt: 12, ((T([16, 1008, 42, 42], f16),), {}) +cnt: 31, ((T([16, 336, 21, 21], f16),), {}) +cnt: 2, ((T([16, 1344, 21, 21], f16),), {}) +cnt: 12, ((T([16, 2016, 21, 21], f16),), {}) +cnt: 4, ((T([16, 672, 21, 21], f16),), {}) +cnt: 31, ((T([16, 672, 11, 11], f16),), {}) +cnt: 2, ((T([16, 2688, 11, 11], f16),), {}) +cnt: 9, ((T([16, 4032, 11, 11], f16),), {}) +Operator: aten.relu_.default +cnt: 5, ((T([16, 42, 83, 83], f16),), {}) +cnt: 5, ((T([16, 84, 42, 42], f16),), {}) +cnt: 30, ((T([16, 168, 42, 42], f16),), {}) +cnt: 35, ((T([16, 336, 21, 21], f16),), {}) +cnt: 35, ((T([16, 672, 11, 11], f16),), {}) +cnt: 1, ((T([16, 4032, 11, 11], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([16, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 10, ((T([16, 4032, 11, 11], f16), T([16, 4032, 11, 11], f16), 0), {}) +cnt: 66, ((T([16, 672, 11, 11], f16), T([16, 672, 11, 11], f16), 0), {}) +cnt: 2, ((T([16, 2688, 11, 11], f16), T([16, 2688, 11, 11], f16), 0), {}) +cnt: 12, ((T([16, 2016, 21, 21], f16), T([16, 2016, 21, 21], f16), 0), {}) +cnt: 4, ((T([16, 672, 21, 21], f16), T([16, 672, 21, 21], f16), 0), {}) +cnt: 66, ((T([16, 336, 21, 21], f16), T([16, 336, 21, 21], f16), 0), {}) +cnt: 2, ((T([16, 1344, 21, 21], f16), T([16, 1344, 21, 21], f16), 0), {}) +cnt: 12, ((T([16, 1008, 42, 42], f16), T([16, 1008, 42, 42], f16), 0), {}) +cnt: 6, ((T([16, 336, 42, 42], f16), T([16, 336, 42, 42], f16), 0), {}) +cnt: 60, ((T([16, 168, 42, 42], f16), T([16, 168, 42, 42], f16), 0), {}) +cnt: 2, ((T([16, 168, 83, 83], f16), T([16, 168, 83, 83], f16), 0), {}) +cnt: 6, ((T([16, 84, 42, 42], f16), T([16, 84, 42, 42], f16), 0), {}) +cnt: 4, ((T([16, 84, 83, 83], f16), T([16, 84, 83, 83], f16), 0), {}) +cnt: 5, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16), 0), {}) +cnt: 6, ((T([16, 42, 83, 83], f16), T([16, 42, 83, 83], f16), 0), {}) +cnt: 1, ((T([16, 42, 165, 165], f16), T([16, 42, 165, 165], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nfnet_l0_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nfnet_l0_training.txt new file mode 100644 index 000000000..ae315ada2 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/nfnet_l0_training.txt @@ -0,0 +1,267 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 6, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 18, ((T([128, 1536, 14, 14], f16), T([128, 1536, 14, 14], f16)), {}) +cnt: 8, ((T([128, 1536, 7, 7], f16), T([128, 1536, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2304], f16), T([2304, 1000], f16, stride=(1, 2304))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 1536, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 1536, 7, 7], f16), T([128, 1536, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([128, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([128, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 2, ((T([128, 64, 56, 56], f16),), {}) +cnt: 1, ((T([128, 128, 56, 56], f16),), {}) +cnt: 3, ((T([128, 128, 28, 28], f16),), {}) +cnt: 1, ((T([128, 384, 28, 28], f16),), {}) +cnt: 12, ((T([128, 384, 14, 14], f16),), {}) +cnt: 5, ((T([128, 384, 7, 7], f16),), {}) +cnt: 1, ((T([128, 2304, 7, 7], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), T([16], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([64, 32, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([256, 128, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([64, 128, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([64, 256, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 28, 28], f16), T([512, 256, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 3, ((T([128, 128, 28, 28], f16), T([128, 64, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 2, ((T([128, 128, 28, 28], f16), T([512, 128, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 1, 1], f16), T([512, 128, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 14, 14], f16), T([1536, 512, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([384, 512, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 28, 28], f16), T([384, 64, 3, 3], f16), T([384], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 11, ((T([128, 384, 14, 14], f16), T([384, 64, 3, 3], f16), T([384], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 6, ((T([128, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([384, 1536, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 384, 1, 1], f16), T([1536, 384, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 7, 7], f16), T([1536, 1536, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([384, 64, 3, 3], f16), T([384], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 5, ((T([128, 384, 7, 7], f16), T([384, 64, 3, 3], f16), T([384], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 3, ((T([128, 384, 7, 7], f16), T([1536, 384, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 1536, 7, 7], f16), T([384, 1536, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 7, 7], f16), T([2304, 1536, 1, 1], f16), T([2304], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 2304, 7, 7], f16), T([128, 1536, 7, 7], f16), T([2304, 1536, 1, 1], f16), [2304], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 384, 1, 1], f16), T([1536, 384, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 384, 1, 1], f16), T([128, 1536, 1, 1], f16), T([384, 1536, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 1536, 7, 7], f16), T([128, 384, 7, 7], f16), T([1536, 384, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 384, 7, 7], f16), T([128, 384, 7, 7], f16), T([384, 64, 3, 3], f16), [384], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 2, ((T([128, 384, 7, 7], f16), T([128, 1536, 7, 7], f16), T([384, 1536, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 7, 7], f16), T([128, 384, 14, 14], f16), T([384, 64, 3, 3], f16), [384], [2, 2], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 6, ((T([128, 384, 14, 14], f16), T([128, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 7, 7], f16), T([128, 1536, 7, 7], f16), T([1536, 1536, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), T([128, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 11, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 64, 3, 3], f16), [384], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([128, 384, 28, 28], f16), T([384, 64, 3, 3], f16), [384], [2, 2], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 28, 28], f16), T([128, 512, 28, 28], f16), T([384, 512, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 14, 14], f16), T([128, 512, 14, 14], f16), T([1536, 512, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 128, 1, 1], f16), T([512, 128, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 1, 1], f16), T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), T([128, 128, 28, 28], f16), T([512, 128, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), T([128, 64, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([128, 512, 28, 28], f16), T([128, 512, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([128, 128, 56, 56], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 256, 28, 28], f16), T([512, 256, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 64, 1, 1], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 256, 1, 1], f16), T([64, 256, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 128, 56, 56], f16), T([64, 128, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 128, 56, 56], f16), T([256, 128, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 64, 112, 112], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 32, 112, 112], f16), T([64, 32, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [16], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2304, 7, 7], f16, stride=(2304, 1, 0, 0)), 49), {}) +cnt: 3, ((T([128, 1536, 7, 7], f16, stride=(1536, 1, 0, 0)), 49), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16, stride=(1536, 1, 0, 0)), 196), {}) +cnt: 2, ((T([128, 512, 28, 28], f16, stride=(512, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 256, 56, 56], f16, stride=(256, 1, 0, 0)), 3136), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 256, 56, 56], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 1536, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 2304, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2304], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2304], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([16, 1, 1, 1], f16), 0.34412564994580647), {}) +cnt: 2, ((T([32, 1, 1, 1], f16), 0.1490107774734497), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.10536653122135592), {}) +cnt: 10, ((T([128, 1, 1, 1], f16), 0.07450538873672485), {}) +cnt: 2, ((T([128, 128, 56, 56], f16), 1.0), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.1580497968320339), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.1580497968320339), {}) +cnt: 4, ((T([64, 1, 1, 1], f16), 0.07450538873672485), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.22351616621017456), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), 2.0), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), 0.2), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), 0.9805806756909201), {}) +cnt: 2, ((T([512, 1, 1, 1], f16), 0.11175808310508728), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.11175808310508728), {}) +cnt: 4, ((T([512, 1, 1, 1], f16), 0.1580497968320339), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), 2.0), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), 0.2), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), 0.9805806756909201), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.07902489841601695), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), 0.9622504486493761), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.07902489841601695), {}) +cnt: 2, ((T([384, 1, 1, 1], f16), 0.07902489841601695), {}) +cnt: 36, ((T([384, 1, 1, 1], f16), 0.07450538873672485), {}) +cnt: 18, ((T([1536, 1, 1, 1], f16), 0.09125009274634042), {}) +cnt: 12, ((T([128, 1536, 14, 14], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 12, ((T([128, 1536, 14, 14], f16), 2.0), {}) +cnt: 12, ((T([128, 1536, 14, 14], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.9805806756909201), {}) +cnt: 16, ((T([384, 1, 1, 1], f16), 0.04562504637317021), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.9622504486493761), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.9449111825230679), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.9284766908852592), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.9128709291752768), {}) +cnt: 2, ((T([128, 1536, 14, 14], f16), 0.8980265101338745), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.04562504637317021), {}) +cnt: 6, ((T([128, 1536, 7, 7], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 6, ((T([128, 1536, 7, 7], f16), 2.0), {}) +cnt: 6, ((T([128, 1536, 7, 7], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 7, 7], f16), 0.9805806756909201), {}) +cnt: 2, ((T([128, 1536, 7, 7], f16), 0.9622504486493761), {}) +cnt: 2, ((T([2304, 1, 1, 1], f16), 0.04562504637317021), {}) +cnt: 3, ((T([128, 1536, 7, 7], f16), T([128, 1536, 7, 7], f16)), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), T([128, 1536, 14, 14], f16)), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([1, 16, 27], f16), T([16], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 32, 144], f16), T([32], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 64, 288], f16), T([64], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 5, ((T([1, 128, 576], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 128], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 64, 128], f16), T([64], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 64, 576], f16), T([64], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 64], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 512, 256], f16), T([512], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 256], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 512, 128], f16), T([512], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 512], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 384, 512], f16), T([384], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 18, ((T([1, 384, 576], f16), T([384], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 9, ((T([1, 1536, 384], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 8, ((T([1, 384, 1536], f16), T([384], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 2304, 1536], f16), T([2304], f16), None, None, None, True, 0.0, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([1, 2304, 1536], f16), T([1, 2304, 1536], f16), T([2304], f16), None, None, T([2304], f32), T([2304], f32), True, 1e-05, [True, True, False]), {}) +cnt: 9, ((T([1, 1536, 384], f16), T([1, 1536, 384], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 18, ((T([1, 384, 576], f16), T([1, 384, 576], f16), T([384], f16), None, None, T([384], f32), T([384], f32), True, 1e-05, [True, True, False]), {}) +cnt: 8, ((T([1, 384, 1536], f16), T([1, 384, 1536], f16), T([384], f16), None, None, T([384], f32), T([384], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1, 1536, 1536], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 384, 512], f16), T([1, 384, 512], f16), T([384], f16), None, None, T([384], f32), T([384], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1, 1536, 512], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 512, 128], f16), T([1, 512, 128], f16), T([512], f16), None, None, T([512], f32), T([512], f32), True, 1e-05, [True, True, False]), {}) +cnt: 5, ((T([1, 128, 576], f16), T([1, 128, 576], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 512], f16), T([1, 128, 512], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 256], f16), T([1, 128, 256], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 512, 256], f16), T([1, 512, 256], f16), T([512], f16), None, None, T([512], f32), T([512], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 64], f16), T([1, 256, 64], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 64, 576], f16), T([1, 64, 576], f16), T([64], f16), None, None, T([64], f32), T([64], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 64, 128], f16), T([1, 64, 128], f16), T([64], f16), None, None, T([64], f32), T([64], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 128], f16), T([1, 256, 128], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 64, 288], f16), T([1, 64, 288], f16), T([64], f16), None, None, T([64], f32), T([64], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 32, 144], f16), T([1, 32, 144], f16), T([32], f16), None, None, T([32], f32), T([32], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 27], f16), T([1, 16, 27], f16), T([16], f16), None, None, T([16], f32), T([16], f32), True, 1e-05, [True, True, False]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 1, 1], f16),), {}) +cnt: 2, ((T([128, 128, 1, 1], f16),), {}) +cnt: 9, ((T([128, 384, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 256, 1, 1], f16),), {}) +cnt: 2, ((T([128, 512, 1, 1], f16),), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16)), {}) +Operator: aten.silu.default +cnt: 1, ((T([128, 128, 56, 56], f16),), {}) +cnt: 1, ((T([128, 64, 56, 56], f16),), {}) +cnt: 1, ((T([128, 256, 56, 56], f16),), {}) +cnt: 2, ((T([128, 128, 28, 28], f16),), {}) +cnt: 2, ((T([128, 512, 28, 28], f16),), {}) +cnt: 6, ((T([128, 384, 14, 14], f16),), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16),), {}) +cnt: 3, ((T([128, 384, 7, 7], f16),), {}) +cnt: 2, ((T([128, 1536, 7, 7], f16),), {}) +Operator: aten.silu_.default +cnt: 1, ((T([128, 16, 112, 112], f16),), {}) +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 2, ((T([128, 64, 56, 56], f16),), {}) +cnt: 1, ((T([128, 128, 56, 56], f16),), {}) +cnt: 3, ((T([128, 128, 28, 28], f16),), {}) +cnt: 1, ((T([128, 384, 28, 28], f16),), {}) +cnt: 12, ((T([128, 384, 14, 14], f16),), {}) +cnt: 5, ((T([128, 384, 7, 7], f16),), {}) +cnt: 1, ((T([128, 2304, 7, 7], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([128, 2304, 7, 7], f16), T([128, 2304, 7, 7], f16)), {}) +cnt: 8, ((T([128, 384, 7, 7], f16), T([128, 384, 7, 7], f16)), {}) +cnt: 2, ((T([128, 1536, 7, 7], f16), T([128, 1536, 7, 7], f16)), {}) +cnt: 18, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16)), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), T([128, 1536, 14, 14], f16)), {}) +cnt: 1, ((T([128, 384, 28, 28], f16), T([128, 384, 28, 28], f16)), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 5, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16)), {}) +cnt: 2, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16)), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 3, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 3, ((T([128, 1536, 7, 7], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 9, ((T([128, 384, 1, 1], f16), T([128, 384, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 128, 1, 1], f16), T([128, 128, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 64, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pit_b_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pit_b_224_training.txt new file mode 100644 index 000000000..d26a9ef24 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pit_b_224_training.txt @@ -0,0 +1,185 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 3, ((T([64, 4, 962, 962], f16), -1, False), {}) +cnt: 6, ((T([64, 8, 257, 257], f16), -1, False), {}) +cnt: 4, ((T([64, 16, 65, 65], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 4, ((T([64, 16, 65, 65], f16), T([64, 16, 65, 65], f16), -1, f16), {}) +cnt: 6, ((T([64, 8, 257, 257], f16), T([64, 8, 257, 257], f16), -1, f16), {}) +cnt: 3, ((T([64, 4, 962, 962], f16), T([64, 4, 962, 962], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 9, ((T([64, 4, 962, 64], f16), [256, 962, 64]), {}) +cnt: 3, ((T([64, 4, 64, 962], f16), [256, 64, 962]), {}) +cnt: 3, ((T([256, 962, 962], f16), [64, 4, 962, 962]), {}) +cnt: 3, ((T([256, 962, 64], f16), [64, 4, 962, 64]), {}) +cnt: 3, ((T([64, 962, 4, 64], f16), [64, 962, 256]), {}) +cnt: 1, ((T([64, 512], f16), [64, 1, 512]), {}) +cnt: 18, ((T([64, 8, 257, 64], f16), [512, 257, 64]), {}) +cnt: 6, ((T([64, 8, 64, 257], f16), [512, 64, 257]), {}) +cnt: 6, ((T([512, 257, 257], f16), [64, 8, 257, 257]), {}) +cnt: 6, ((T([512, 257, 64], f16), [64, 8, 257, 64]), {}) +cnt: 6, ((T([64, 257, 8, 64], f16), [64, 257, 512]), {}) +cnt: 1, ((T([64, 1024], f16), [64, 1, 1024]), {}) +cnt: 12, ((T([64, 16, 65, 64], f16), [1024, 65, 64]), {}) +cnt: 4, ((T([64, 16, 64, 65], f16), [1024, 64, 65]), {}) +cnt: 4, ((T([1024, 65, 65], f16), [64, 16, 65, 65]), {}) +cnt: 4, ((T([1024, 65, 64], f16), [64, 16, 65, 64]), {}) +cnt: 4, ((T([64, 65, 16, 64], f16), [64, 65, 1024]), {}) +cnt: 4, ((T([64, 65, 3, 16, 64], f16), [64, 65, 3072]), {}) +cnt: 6, ((T([64, 257, 3, 8, 64], f16), [64, 257, 1536]), {}) +cnt: 3, ((T([64, 962, 3, 4, 64], f16), [64, 962, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 256, 31, 31], f16), T([1, 256, 31, 31], f16)), {}) +cnt: 13, ((T([64, 962, 256], f16), T([64, 962, 256], f16)), {}) +cnt: 1, ((T([64, 1, 512], f16), T([512], f16)), {}) +cnt: 25, ((T([64, 257, 512], f16), T([64, 257, 512], f16)), {}) +cnt: 1, ((T([64, 1, 1024], f16), T([1024], f16)), {}) +cnt: 16, ((T([64, 65, 1024], f16), T([64, 65, 1024], f16)), {}) +Operator: aten.addmm.default +cnt: 3, ((T([768], f16), T([61568, 256], f16), T([256, 768], f16, stride=(1, 256))), {}) +cnt: 3, ((T([256], f16), T([61568, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 3, ((T([1024], f16), T([61568, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 3, ((T([256], f16), T([61568, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 6, ((T([1536], f16), T([16448, 512], f16), T([512, 1536], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([16448, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 6, ((T([2048], f16), T([16448, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([16448, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 4, ((T([3072], f16), T([4160, 1024], f16), T([1024, 3072], f16, stride=(1, 1024))), {}) +cnt: 4, ((T([1024], f16), T([4160, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 4, ((T([4096], f16), T([4160, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 4, ((T([1024], f16), T([4160, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([1000], f16), T([64, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.bmm.default +cnt: 3, ((T([256, 962, 64], f16), T([256, 64, 962], f16)), {}) +cnt: 3, ((T([256, 962, 962], f16), T([256, 962, 64], f16)), {}) +cnt: 6, ((T([512, 257, 64], f16), T([512, 64, 257], f16)), {}) +cnt: 6, ((T([512, 257, 257], f16), T([512, 257, 64], f16)), {}) +cnt: 4, ((T([1024, 65, 64], f16), T([1024, 64, 65], f16)), {}) +cnt: 4, ((T([1024, 65, 65], f16), T([1024, 65, 64], f16)), {}) +cnt: 4, ((T([1024, 65, 65], f16, stride=(4225, 1, 65)), T([1024, 65, 64], f16)), {}) +cnt: 4, ((T([1024, 65, 64], f16), T([1024, 64, 65], f16, stride=(4160, 1, 64))), {}) +cnt: 4, ((T([1024, 64, 65], f16, stride=(4160, 1, 64)), T([1024, 65, 65], f16)), {}) +cnt: 4, ((T([1024, 65, 65], f16), T([1024, 65, 64], f16, stride=(4160, 1, 65))), {}) +cnt: 6, ((T([512, 257, 257], f16, stride=(66049, 1, 257)), T([512, 257, 64], f16)), {}) +cnt: 6, ((T([512, 257, 64], f16), T([512, 64, 257], f16, stride=(16448, 1, 64))), {}) +cnt: 6, ((T([512, 64, 257], f16, stride=(16448, 1, 64)), T([512, 257, 257], f16)), {}) +cnt: 6, ((T([512, 257, 257], f16), T([512, 257, 64], f16, stride=(16448, 1, 257))), {}) +cnt: 3, ((T([256, 962, 962], f16, stride=(925444, 1, 962)), T([256, 962, 64], f16)), {}) +cnt: 3, ((T([256, 962, 64], f16), T([256, 64, 962], f16, stride=(61568, 1, 64))), {}) +cnt: 3, ((T([256, 64, 962], f16, stride=(61568, 1, 64)), T([256, 962, 962], f16)), {}) +cnt: 3, ((T([256, 962, 962], f16), T([256, 962, 64], f16, stride=(61568, 1, 962))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 256], f16, stride=(0, 256, 1)), T([64, 961, 256], f16, stride=(246016, 1, 961))], 1), {}) +cnt: 1, (([T([64, 1, 512], f16), T([64, 256, 512], f16, stride=(131072, 1, 256))], 1), {}) +cnt: 1, (([T([64, 1, 1024], f16), T([64, 64, 1024], f16, stride=(65536, 1, 64))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([256, 3, 14, 14], f16), T([256], f16), [7, 7], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([512, 1, 3, 3], f16), T([512], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 256), {}) +cnt: 1, ((T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([1024, 1, 3, 3], f16), T([1024], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 512), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 1024, 8, 8], f16, stride=(66560, 1, 8192, 1024)), T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([1024, 1, 3, 3], f16), [1024], [2, 2], [1, 1], [1, 1], False, [0, 0], 512, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16, stride=(131584, 1, 8192, 512)), T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([512, 1, 3, 3], f16), [512], [2, 2], [1, 1], [1, 1], False, [0, 0], 256, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), T([64, 3, 224, 224], f16), T([256, 3, 14, 14], f16), [256], [7, 7], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.gelu.default +cnt: 3, ((T([64, 962, 1024], f16),), {}) +cnt: 6, ((T([64, 257, 2048], f16),), {}) +cnt: 4, ((T([64, 65, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 4, ((T([64, 65, 4096], f16), T([64, 65, 4096], f16)), {}) +cnt: 6, ((T([64, 257, 2048], f16), T([64, 257, 2048], f16)), {}) +cnt: 3, ((T([64, 962, 1024], f16), T([64, 962, 1024], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 256], f16, stride=(246272, 1)), T([256, 512], f16, stride=(1, 256))), {}) +cnt: 1, ((T([64, 512], f16, stride=(131584, 1)), T([512, 1024], f16, stride=(1, 512))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1024], f16)), {}) +cnt: 4, ((T([4160, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 4, ((T([1024, 4160], f16, stride=(1, 1024)), T([4160, 4096], f16)), {}) +cnt: 4, ((T([4160, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 4, ((T([4096, 4160], f16, stride=(1, 4096)), T([4160, 1024], f16)), {}) +cnt: 4, ((T([4160, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 4, ((T([1024, 4160], f16, stride=(1, 1024)), T([4160, 1024], f16)), {}) +cnt: 4, ((T([4160, 3072], f16), T([3072, 1024], f16)), {}) +cnt: 4, ((T([3072, 4160], f16, stride=(1, 3072)), T([4160, 1024], f16)), {}) +cnt: 1, ((T([1024, 64], f16, stride=(1, 66560)), T([64, 512], f16, stride=(131584, 1))), {}) +cnt: 1, ((T([64, 1024], f16, stride=(66560, 1)), T([1024, 512], f16)), {}) +cnt: 6, ((T([16448, 512], f16), T([512, 2048], f16)), {}) +cnt: 6, ((T([512, 16448], f16, stride=(1, 512)), T([16448, 2048], f16)), {}) +cnt: 6, ((T([16448, 2048], f16), T([2048, 512], f16)), {}) +cnt: 6, ((T([2048, 16448], f16, stride=(1, 2048)), T([16448, 512], f16)), {}) +cnt: 6, ((T([16448, 512], f16), T([512, 512], f16)), {}) +cnt: 6, ((T([512, 16448], f16, stride=(1, 512)), T([16448, 512], f16)), {}) +cnt: 6, ((T([16448, 1536], f16), T([1536, 512], f16)), {}) +cnt: 6, ((T([1536, 16448], f16, stride=(1, 1536)), T([16448, 512], f16)), {}) +cnt: 1, ((T([512, 64], f16, stride=(1, 131584)), T([64, 256], f16, stride=(246272, 1))), {}) +cnt: 1, ((T([64, 512], f16, stride=(131584, 1)), T([512, 256], f16)), {}) +cnt: 3, ((T([61568, 256], f16), T([256, 1024], f16)), {}) +cnt: 3, ((T([256, 61568], f16, stride=(1, 256)), T([61568, 1024], f16)), {}) +cnt: 3, ((T([61568, 1024], f16), T([1024, 256], f16)), {}) +cnt: 3, ((T([1024, 61568], f16, stride=(1, 1024)), T([61568, 256], f16)), {}) +cnt: 3, ((T([61568, 256], f16), T([256, 256], f16)), {}) +cnt: 3, ((T([256, 61568], f16, stride=(1, 256)), T([61568, 256], f16)), {}) +cnt: 3, ((T([61568, 768], f16), T([768, 256], f16)), {}) +cnt: 3, ((T([768, 61568], f16, stride=(1, 768)), T([61568, 256], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([64, 4, 962, 962], f16), 0.125), {}) +cnt: 12, ((T([64, 8, 257, 257], f16), 0.125), {}) +cnt: 8, ((T([64, 16, 65, 65], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 6, ((T([64, 962, 256], f16), [256], T([256], f16), T([256], f16), 1e-06), {}) +cnt: 12, ((T([64, 257, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +cnt: 8, ((T([64, 65, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-06), {}) +cnt: 1, ((T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [1024], T([1024], f16), T([1024], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 1, ((T([64, 1, 1024], f16), T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [1024], T([64, 1, 1], f32), T([64, 1, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 8, ((T([64, 65, 1024], f16), T([64, 65, 1024], f16), [1024], T([64, 65, 1], f32), T([64, 65, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 12, ((T([64, 257, 512], f16), T([64, 257, 512], f16), [512], T([64, 257, 1], f32), T([64, 257, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 6, ((T([64, 962, 256], f16), T([64, 962, 256], f16), [256], T([64, 962, 1], f32), T([64, 962, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 1024], f16), [64, 1, 1024], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 1, 1024], f16), [64, 1, 1024], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 1, 1024], f16), [64, 65, 1024], 1, 0, 1, 1), {}) +cnt: 1, ((T([64, 65, 1024], f16), [64, 65, 1024], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 256, 512], f16), [64, 257, 512], 1, 1, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 257, 512], f16), [64, 257, 512], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 1, 512], f16), [64, 257, 512], 1, 0, 1, 1), {}) +cnt: 1, ((T([64, 961, 256], f16), [64, 962, 256], 1, 1, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 962, 256], f16), [64, 962, 256], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([64, 1, 256], f16), [64, 962, 256], 1, 0, 1, 1), {}) +Operator: aten.stack.default +cnt: 4, (([T([64, 16, 65, 64], f16), T([64, 16, 65, 64], f16, stride=(66560, 4160, 1, 65)), T([64, 16, 65, 64], f16)],), {}) +cnt: 6, (([T([64, 8, 257, 64], f16), T([64, 8, 257, 64], f16, stride=(131584, 16448, 1, 257)), T([64, 8, 257, 64], f16)],), {}) +cnt: 3, (([T([64, 4, 962, 64], f16), T([64, 4, 962, 64], f16, stride=(246272, 61568, 1, 962)), T([64, 4, 962, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 8, ((T([4160, 1024], f16), [0], True), {}) +cnt: 4, ((T([4160, 4096], f16), [0], True), {}) +cnt: 4, ((T([4160, 3072], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 1024], f16, stride=(66560, 1024, 1)), [0, 1], True), {}) +cnt: 12, ((T([16448, 512], f16), [0], True), {}) +cnt: 6, ((T([16448, 2048], f16), [0], True), {}) +cnt: 6, ((T([16448, 1536], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 512], f16, stride=(131584, 512, 1)), [0, 1], True), {}) +cnt: 6, ((T([61568, 256], f16), [0], True), {}) +cnt: 3, ((T([61568, 1024], f16), [0], True), {}) +cnt: 3, ((T([61568, 768], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 256], f16, stride=(246272, 256, 1)), [0], True), {}) +cnt: 1, ((T([64, 256, 31, 31], f16, stride=(246272, 1, 7936, 256)), [0], True), {}) +Operator: aten.unbind.int +cnt: 3, ((T([3, 64, 4, 962, 64], f16, stride=(256, 738816, 64, 768, 1)),), {}) +cnt: 6, ((T([3, 64, 8, 257, 64], f16, stride=(512, 394752, 64, 1536, 1)),), {}) +cnt: 4, ((T([3, 64, 16, 65, 64], f16, stride=(1024, 199680, 64, 3072, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pnasnet5large_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pnasnet5large_training.txt new file mode 100644 index 000000000..c6d164aa5 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/pnasnet5large_training.txt @@ -0,0 +1,293 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([16, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([16, 1000], f16), T([16, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([], i64), 1), {}) +cnt: 5, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16)), {}) +cnt: 5, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16)), {}) +cnt: 44, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16)), {}) +cnt: 38, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16)), {}) +cnt: 38, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16)), {}) +cnt: 7, ((T([16, 864, 11, 11], f16, stride=(522720, 121, 11, 1)), T([16, 864, 11, 11], f16)), {}) +cnt: 2, ((T([16, 4320, 11, 11], f16), T([16, 4320, 11, 11], f16)), {}) +cnt: 5, ((T([16, 2160, 21, 21], f16), T([16, 2160, 21, 21], f16)), {}) +cnt: 7, ((T([16, 864, 21, 21], f16), T([16, 864, 21, 21], f16)), {}) +cnt: 7, ((T([16, 432, 21, 21], f16, stride=(952560, 441, 21, 1)), T([16, 432, 21, 21], f16)), {}) +cnt: 5, ((T([16, 1080, 42, 42], f16), T([16, 1080, 42, 42], f16)), {}) +cnt: 7, ((T([16, 432, 42, 42], f16), T([16, 432, 42, 42], f16)), {}) +cnt: 8, ((T([16, 216, 42, 42], f16, stride=(1905120, 1764, 42, 1)), T([16, 216, 42, 42], f16)), {}) +cnt: 1, ((T([16, 540, 42, 42], f16), T([16, 540, 42, 42], f16)), {}) +cnt: 2, ((T([16, 270, 83, 83], f16), T([16, 270, 83, 83], f16)), {}) +cnt: 7, ((T([16, 108, 83, 83], f16), T([16, 108, 83, 83], f16)), {}) +cnt: 1, ((T([16, 108, 42, 42], f16, stride=(952560, 1764, 42, 1)), T([16, 108, 42, 42], f16)), {}) +cnt: 5, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16)), {}) +cnt: 5, ((T([16, 54, 165, 165], f16), T([16, 54, 165, 165], f16)), {}) +cnt: 1, ((T([16, 54, 83, 83], f16, stride=(1860030, 6889, 83, 1)), T([16, 54, 83, 83], f16)), {}) +Operator: aten.add_.Tensor +cnt: 200, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([16, 4320], f16), T([4320, 1000], f16, stride=(1, 4320))), {}) +Operator: aten.avg_pool2d.default +cnt: 2, ((T([16, 96, 165, 165], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 2, ((T([16, 270, 83, 83], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 2, ((T([16, 1080, 42, 42], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +cnt: 2, ((T([16, 2160, 21, 21], f16), [1, 1], [2, 2], [0, 0], False, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 2, ((T([16, 2160, 11, 11], f16), T([16, 2160, 21, 21], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 2, ((T([16, 1080, 21, 21], f16), T([16, 1080, 42, 42], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 2, ((T([16, 270, 42, 42], f16), T([16, 270, 83, 83], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +cnt: 2, ((T([16, 96, 83, 83], f16), T([16, 96, 165, 165], f16), [1, 1], [2, 2], [0, 0], False, False, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16)], 1), {}) +cnt: 1, (([T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16)], 1), {}) +cnt: 1, (([T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16)], 1), {}) +cnt: 1, (([T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16)], 1), {}) +cnt: 4, (([T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16)], 1), {}) +cnt: 4, (([T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16)], 1), {}) +cnt: 1, (([T([16, 216, 21, 21], f16), T([16, 216, 21, 21], f16)], 1), {}) +cnt: 4, (([T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16)], 1), {}) +cnt: 1, (([T([16, 432, 11, 11], f16), T([16, 432, 11, 11], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([16, 3, 331, 331], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([16, 96, 165, 165], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 54, 165, 165], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 108, 83, 83], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 3, ((T([16, 108, 83, 83], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 108, 83, 83], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 108, 83, 83], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 270, 83, 83], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 432, 42, 42], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 3, ((T([16, 432, 42, 42], f16), [0, 1, 0, 1], -inf), {}) +cnt: 1, ((T([16, 432, 42, 42], f16), [2, 3, 2, 3], 0.0), {}) +cnt: 2, ((T([16, 432, 42, 42], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([16, 1080, 42, 42], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 2, ((T([16, 864, 21, 21], f16), [2, 2, 2, 2], 0.0), {}) +cnt: 3, ((T([16, 864, 21, 21], f16), [1, 1, 1, 1], -inf), {}) +cnt: 1, ((T([16, 864, 21, 21], f16), [3, 3, 3, 3], 0.0), {}) +cnt: 2, ((T([16, 864, 21, 21], f16), [1, 1, 1, 1], 0.0), {}) +cnt: 1, ((T([16, 2160, 21, 21], f16), [-1, 1, -1, 1], 0.0), {}) +cnt: 1, ((T([16, 2160, 21, 21], f16), [1, -1, 1, -1]), {}) +cnt: 5, ((T([16, 864, 23, 23], f16), [-1, -1, -1, -1]), {}) +cnt: 2, ((T([16, 864, 25, 25], f16), [-2, -2, -2, -2]), {}) +cnt: 1, ((T([16, 864, 27, 27], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 1080, 42, 42], f16), [1, -1, 1, -1]), {}) +cnt: 5, ((T([16, 432, 43, 43], f16), [0, -1, 0, -1]), {}) +cnt: 2, ((T([16, 432, 45, 45], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([16, 432, 47, 47], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([16, 270, 83, 83], f16), [1, -1, 1, -1]), {}) +cnt: 5, ((T([16, 108, 85, 85], f16), [-1, -1, -1, -1]), {}) +cnt: 2, ((T([16, 108, 87, 87], f16), [-2, -2, -2, -2]), {}) +cnt: 1, ((T([16, 108, 89, 89], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), [1, -1, 1, -1]), {}) +cnt: 2, ((T([16, 96, 167, 167], f16), [-1, -1, -1, -1]), {}) +cnt: 3, ((T([16, 54, 167, 167], f16), [-1, -1, -1, -1]), {}) +cnt: 1, ((T([16, 54, 169, 169], f16), [-2, -2, -2, -2]), {}) +cnt: 1, ((T([16, 54, 171, 171], f16), [-3, -3, -3, -3]), {}) +cnt: 1, ((T([16, 96, 169, 169], f16), [-2, -2, -2, -2]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([16, 3, 331, 331], f16), T([96, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([54, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 96, 169, 169], f16), T([96, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 5, ((T([16, 96, 83, 83], f16), T([54, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 54, 83, 83], f16), T([54, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 54), {}) +cnt: 10, ((T([16, 54, 83, 83], f16), T([54, 54, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 54, 171, 171], f16), T([54, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 54), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([54, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 54), {}) +cnt: 1, ((T([16, 54, 169, 169], f16), T([54, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 54), {}) +cnt: 1, ((T([16, 54, 167, 167], f16), T([54, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 54), {}) +cnt: 4, ((T([16, 54, 83, 83], f16), T([54, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 54), {}) +cnt: 1, ((T([16, 96, 167, 167], f16), T([96, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), T([54, 54, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 270, 83, 83], f16), T([108, 270, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 108, 87, 87], f16), T([108, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 108), {}) +cnt: 12, ((T([16, 108, 42, 42], f16), T([108, 108, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 108, 42, 42], f16), T([108, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 108), {}) +cnt: 1, ((T([16, 108, 89, 89], f16), T([108, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 108), {}) +cnt: 1, ((T([16, 108, 42, 42], f16), T([108, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 108), {}) +cnt: 2, ((T([16, 108, 85, 85], f16), T([108, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 108), {}) +cnt: 4, ((T([16, 108, 42, 42], f16), T([108, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 108), {}) +cnt: 1, ((T([16, 108, 83, 83], f16), T([108, 108, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 270, 42, 42], f16), T([108, 270, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 540, 42, 42], f16), T([216, 540, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([16, 216, 42, 42], f16), T([216, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 216), {}) +cnt: 48, ((T([16, 216, 42, 42], f16), T([216, 216, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([16, 216, 42, 42], f16), T([216, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 216), {}) +cnt: 24, ((T([16, 216, 42, 42], f16), T([216, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 216), {}) +cnt: 5, ((T([16, 1080, 42, 42], f16), T([216, 1080, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 1080, 42, 42], f16), T([432, 1080, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 432, 45, 45], f16), T([432, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 432), {}) +cnt: 48, ((T([16, 432, 21, 21], f16), T([432, 432, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 14, ((T([16, 432, 21, 21], f16), T([432, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 432), {}) +cnt: 1, ((T([16, 432, 47, 47], f16), T([432, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 432), {}) +cnt: 7, ((T([16, 432, 21, 21], f16), T([432, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 432), {}) +cnt: 2, ((T([16, 432, 43, 43], f16), T([432, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 432), {}) +cnt: 22, ((T([16, 432, 21, 21], f16), T([432, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 432), {}) +cnt: 1, ((T([16, 432, 42, 42], f16), T([432, 432, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 1080, 21, 21], f16), T([216, 1080, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([16, 2160, 21, 21], f16), T([432, 2160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 2160, 21, 21], f16), T([864, 2160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 864, 25, 25], f16), T([864, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 864), {}) +cnt: 48, ((T([16, 864, 11, 11], f16), T([864, 864, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 14, ((T([16, 864, 11, 11], f16), T([864, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 864), {}) +cnt: 1, ((T([16, 864, 27, 27], f16), T([864, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 864), {}) +cnt: 7, ((T([16, 864, 11, 11], f16), T([864, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 864), {}) +cnt: 2, ((T([16, 864, 23, 23], f16), T([864, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 864), {}) +cnt: 22, ((T([16, 864, 11, 11], f16), T([864, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 864), {}) +cnt: 1, ((T([16, 864, 21, 21], f16), T([864, 864, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([16, 2160, 11, 11], f16), T([432, 2160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([16, 4320, 11, 11], f16), T([864, 4320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 48, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([864, 864, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([864, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 14, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([864, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 7, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([864, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 5, ((T([16, 864, 11, 11], f16), T([16, 4320, 11, 11], f16), T([864, 4320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 432, 11, 11], f16, stride=(104544, 121, 11, 1)), T([16, 2160, 11, 11], f16), T([432, 2160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 864, 11, 11], f16), T([16, 864, 21, 21], f16), T([864, 864, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 864, 11, 11], f16), T([16, 864, 23, 23], f16), T([864, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 2, ((T([16, 864, 11, 11], f16), T([16, 864, 25, 25], f16), T([864, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 1, ((T([16, 864, 11, 11], f16), T([16, 864, 27, 27], f16), T([864, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 864, [True, True, False]), {}) +cnt: 2, ((T([16, 864, 21, 21], f16), T([16, 2160, 21, 21], f16), T([864, 2160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 48, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([432, 432, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([432, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 14, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([432, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 7, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([432, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 5, ((T([16, 432, 21, 21], f16), T([16, 2160, 21, 21], f16), T([432, 2160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 216, 21, 21], f16, stride=(190512, 441, 21, 1)), T([16, 1080, 21, 21], f16), T([216, 1080, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 432, 21, 21], f16), T([16, 432, 42, 42], f16), T([432, 432, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 432, 21, 21], f16), T([16, 432, 43, 43], f16), T([432, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 2, ((T([16, 432, 21, 21], f16), T([16, 432, 45, 45], f16), T([432, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 1, ((T([16, 432, 21, 21], f16), T([16, 432, 47, 47], f16), T([432, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 2, ((T([16, 432, 42, 42], f16), T([16, 1080, 42, 42], f16), T([432, 1080, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 48, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([216, 216, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 24, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([216, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 216, [True, True, False]), {}) +cnt: 16, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([216, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 216, [True, True, False]), {}) +cnt: 8, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([216, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 216, [True, True, False]), {}) +cnt: 5, ((T([16, 216, 42, 42], f16), T([16, 1080, 42, 42], f16), T([216, 1080, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 216, 42, 42], f16), T([16, 540, 42, 42], f16), T([216, 540, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 108, 42, 42], f16, stride=(381024, 1764, 42, 1)), T([16, 270, 42, 42], f16), T([108, 270, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 108, 42, 42], f16), T([16, 108, 83, 83], f16), T([108, 108, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([108, 108, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([108, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 2, ((T([16, 108, 42, 42], f16), T([16, 108, 85, 85], f16), T([108, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 2, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([108, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 2, ((T([16, 108, 42, 42], f16), T([16, 108, 87, 87], f16), T([108, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 1, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([108, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 1, ((T([16, 108, 42, 42], f16), T([16, 108, 89, 89], f16), T([108, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 108, [True, True, False]), {}) +cnt: 1, ((T([16, 108, 83, 83], f16), T([16, 270, 83, 83], f16), T([108, 270, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([16, 54, 83, 83], f16, stride=(744012, 6889, 83, 1)), T([16, 96, 83, 83], f16), T([54, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([16, 54, 165, 165], f16), T([54, 54, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([54, 54, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([54, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 3, ((T([16, 54, 83, 83], f16), T([16, 96, 83, 83], f16), T([54, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 96, 83, 83], f16), T([16, 96, 167, 167], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([16, 54, 167, 167], f16), T([54, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 2, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([54, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([16, 54, 169, 169], f16), T([54, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([54, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 83, 83], f16), T([16, 54, 171, 171], f16), T([54, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 54, [True, True, False]), {}) +cnt: 1, ((T([16, 96, 83, 83], f16), T([16, 96, 169, 169], f16), T([96, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), T([16, 96, 165, 165], f16), T([54, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([16, 3, 331, 331], f16), T([96, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([16, 3, 331, 331], f16), T([16, 3, 331, 331], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([16, 4320, 11, 11], f16, stride=(4320, 1, 0, 0)), 121), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([16], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([16, 96, 167, 167], f16), [3, 3], [2, 2]), {}) +cnt: 2, ((T([16, 54, 167, 167], f16), [3, 3], [2, 2]), {}) +cnt: 3, ((T([16, 108, 85, 85], f16), [3, 3], [2, 2]), {}) +cnt: 12, ((T([16, 216, 42, 42], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 3, ((T([16, 432, 43, 43], f16), [3, 3], [2, 2]), {}) +cnt: 9, ((T([16, 432, 21, 21], f16), [3, 3], [1, 1], [1, 1]), {}) +cnt: 3, ((T([16, 864, 23, 23], f16), [3, 3], [2, 2]), {}) +cnt: 9, ((T([16, 864, 11, 11], f16), [3, 3], [1, 1], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 9, ((T([16, 864, 11, 11], f16, stride=(522720, 121, 11, 1)), T([16, 864, 11, 11], f16), [3, 3], [1, 1], [1, 1], [1, 1], False, T([16, 864, 11, 11], i64)), {}) +cnt: 3, ((T([16, 864, 11, 11], f16, stride=(522720, 121, 11, 1)), T([16, 864, 23, 23], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 864, 11, 11], i64)), {}) +cnt: 9, ((T([16, 432, 21, 21], f16, stride=(952560, 441, 21, 1)), T([16, 432, 21, 21], f16), [3, 3], [1, 1], [1, 1], [1, 1], False, T([16, 432, 21, 21], i64)), {}) +cnt: 3, ((T([16, 432, 21, 21], f16, stride=(952560, 441, 21, 1)), T([16, 432, 43, 43], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 432, 21, 21], i64)), {}) +cnt: 12, ((T([16, 216, 42, 42], f16, stride=(1905120, 1764, 42, 1)), T([16, 216, 42, 42], f16), [3, 3], [1, 1], [1, 1], [1, 1], False, T([16, 216, 42, 42], i64)), {}) +cnt: 3, ((T([16, 108, 42, 42], f16, stride=(952560, 1764, 42, 1)), T([16, 108, 85, 85], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 108, 42, 42], i64)), {}) +cnt: 2, ((T([16, 54, 83, 83], f16, stride=(1860030, 6889, 83, 1)), T([16, 54, 167, 167], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 54, 83, 83], i64)), {}) +cnt: 1, ((T([16, 96, 83, 83], f16), T([16, 96, 167, 167], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([16, 96, 83, 83], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([16, 4320, 11, 11], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([16, 1000], f16), T([1000, 4320], f16)), {}) +cnt: 1, ((T([1000, 16], f16, stride=(1, 1000)), T([16, 4320], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([16, 96, 165, 165], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), T([54], f16), T([54], f16), T([54], f16), T([54], f16), True, 0.1, 0.001), {}) +cnt: 14, ((T([16, 54, 83, 83], f16), T([54], f16), T([54], f16), T([54], f16), T([54], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 108, 83, 83], f16), T([108], f16), T([108], f16), T([108], f16), T([108], f16), True, 0.1, 0.001), {}) +cnt: 13, ((T([16, 108, 42, 42], f16), T([108], f16), T([108], f16), T([108], f16), T([108], f16), True, 0.1, 0.001), {}) +cnt: 56, ((T([16, 216, 42, 42], f16), T([216], f16), T([216], f16), T([216], f16), T([216], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 432, 42, 42], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f16), True, 0.1, 0.001), {}) +cnt: 55, ((T([16, 432, 21, 21], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([16, 864, 21, 21], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), True, 0.1, 0.001), {}) +cnt: 55, ((T([16, 864, 11, 11], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 17, ((T([16, 864, 11, 11], f16, stride=(522720, 121, 11, 1)), T([16, 864, 11, 11], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), True, 0.001, [True, True, True]), {}) +cnt: 38, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 864, 21, 21], f16), T([16, 864, 21, 21], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), True, 0.001, [True, True, True]), {}) +cnt: 17, ((T([16, 432, 21, 21], f16, stride=(952560, 441, 21, 1)), T([16, 432, 21, 21], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f32), T([432], f32), True, 0.001, [True, True, True]), {}) +cnt: 38, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f32), T([432], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 432, 42, 42], f16), T([16, 432, 42, 42], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f32), T([432], f32), True, 0.001, [True, True, True]), {}) +cnt: 16, ((T([16, 216, 42, 42], f16, stride=(1905120, 1764, 42, 1)), T([16, 216, 42, 42], f16), T([216], f16), T([216], f16), T([216], f16), T([216], f32), T([216], f32), True, 0.001, [True, True, True]), {}) +cnt: 40, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), T([216], f16), T([216], f16), T([216], f16), T([216], f32), T([216], f32), True, 0.001, [True, True, True]), {}) +cnt: 5, ((T([16, 108, 42, 42], f16, stride=(952560, 1764, 42, 1)), T([16, 108, 42, 42], f16), T([108], f16), T([108], f16), T([108], f16), T([108], f32), T([108], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), T([108], f16), T([108], f16), T([108], f16), T([108], f32), T([108], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([16, 108, 83, 83], f16), T([16, 108, 83, 83], f16), T([108], f16), T([108], f16), T([108], f16), T([108], f32), T([108], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([16, 54, 83, 83], f16, stride=(1860030, 6889, 83, 1)), T([16, 54, 83, 83], f16), T([54], f16), T([54], f16), T([54], f16), T([54], f32), T([54], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), T([54], f16), T([54], f16), T([54], f16), T([54], f32), T([54], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([16, 54, 165, 165], f16), T([16, 54, 165, 165], f16), T([54], f16), T([54], f16), T([54], f16), T([54], f32), T([54], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([16, 1000], f16), T([16], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([16, 1000], f16), T([16], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 4, ((T([16, 96, 165, 165], f16),), {}) +cnt: 7, ((T([16, 54, 83, 83], f16),), {}) +cnt: 4, ((T([16, 54, 165, 165], f16),), {}) +cnt: 2, ((T([16, 270, 83, 83], f16),), {}) +cnt: 6, ((T([16, 108, 83, 83], f16),), {}) +cnt: 7, ((T([16, 108, 42, 42], f16),), {}) +cnt: 2, ((T([16, 540, 42, 42], f16),), {}) +cnt: 48, ((T([16, 216, 42, 42], f16),), {}) +cnt: 8, ((T([16, 1080, 42, 42], f16),), {}) +cnt: 6, ((T([16, 432, 42, 42], f16),), {}) +cnt: 43, ((T([16, 432, 21, 21], f16),), {}) +cnt: 8, ((T([16, 2160, 21, 21], f16),), {}) +cnt: 6, ((T([16, 864, 21, 21], f16),), {}) +cnt: 43, ((T([16, 864, 11, 11], f16),), {}) +cnt: 6, ((T([16, 4320, 11, 11], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([16, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([16, 4320, 11, 11], f16), T([16, 4320, 11, 11], f16), 0), {}) +cnt: 43, ((T([16, 864, 11, 11], f16), T([16, 864, 11, 11], f16), 0), {}) +cnt: 8, ((T([16, 2160, 21, 21], f16), T([16, 2160, 21, 21], f16), 0), {}) +cnt: 6, ((T([16, 864, 21, 21], f16), T([16, 864, 21, 21], f16), 0), {}) +cnt: 43, ((T([16, 432, 21, 21], f16), T([16, 432, 21, 21], f16), 0), {}) +cnt: 8, ((T([16, 1080, 42, 42], f16), T([16, 1080, 42, 42], f16), 0), {}) +cnt: 6, ((T([16, 432, 42, 42], f16), T([16, 432, 42, 42], f16), 0), {}) +cnt: 48, ((T([16, 216, 42, 42], f16), T([16, 216, 42, 42], f16), 0), {}) +cnt: 2, ((T([16, 540, 42, 42], f16), T([16, 540, 42, 42], f16), 0), {}) +cnt: 2, ((T([16, 270, 83, 83], f16), T([16, 270, 83, 83], f16), 0), {}) +cnt: 6, ((T([16, 108, 83, 83], f16), T([16, 108, 83, 83], f16), 0), {}) +cnt: 7, ((T([16, 108, 42, 42], f16), T([16, 108, 42, 42], f16), 0), {}) +cnt: 4, ((T([16, 96, 165, 165], f16), T([16, 96, 165, 165], f16), 0), {}) +cnt: 4, ((T([16, 54, 165, 165], f16), T([16, 54, 165, 165], f16), 0), {}) +cnt: 7, ((T([16, 54, 83, 83], f16), T([16, 54, 83, 83], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/poolformer_m36_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/poolformer_m36_training.txt new file mode 100644 index 000000000..2cbc4a779 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/poolformer_m36_training.txt @@ -0,0 +1,111 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 30, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16)), {}) +cnt: 30, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16)), {}) +cnt: 90, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16)), {}) +cnt: 30, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 768], f16), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.avg_pool2d.default +cnt: 6, ((T([64, 96, 56, 56], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 6, ((T([64, 192, 28, 28], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 18, ((T([64, 384, 14, 14], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +cnt: 6, ((T([64, 768, 7, 7], f16), [3, 3], [1, 1], [1, 1], False, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 6, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 18, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 6, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +cnt: 6, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16), [3, 3], [1, 1], [1, 1], False, False, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([96, 3, 7, 7], f16), T([96], f16), [4, 4], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 96, 56, 56], f16), T([384, 96, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 384, 56, 56], f16), T([96, 384, 1, 1], f16), T([96], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([192, 96, 3, 3], f16), T([192], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 192, 28, 28], f16), T([768, 192, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 768, 28, 28], f16), T([192, 768, 1, 1], f16), T([192], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([384, 192, 3, 3], f16), T([384], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 18, ((T([64, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 18, ((T([64, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), T([384], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([768, 384, 3, 3], f16), T([768], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 768, 7, 7], f16), T([3072, 768, 1, 1], f16), T([3072], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 3072, 7, 7], f16), T([768, 3072, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 6, ((T([64, 768, 7, 7], f16), T([64, 3072, 7, 7], f16), T([768, 3072, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([64, 3072, 7, 7], f16), T([64, 768, 7, 7], f16), T([3072, 768, 1, 1], f16), [3072], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 768, 7, 7], f16), T([64, 384, 14, 14], f16), T([768, 384, 3, 3], f16), [768], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 18, ((T([64, 384, 14, 14], f16), T([64, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 18, ((T([64, 1536, 14, 14], f16), T([64, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 384, 14, 14], f16), T([64, 192, 28, 28], f16), T([384, 192, 3, 3], f16), [384], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([64, 192, 28, 28], f16), T([64, 768, 28, 28], f16), T([192, 768, 1, 1], f16), [192], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([64, 768, 28, 28], f16), T([64, 192, 28, 28], f16), T([768, 192, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([64, 96, 56, 56], f16), T([192, 96, 3, 3], f16), [192], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([64, 96, 56, 56], f16), T([64, 384, 56, 56], f16), T([96, 384, 1, 1], f16), [96], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([64, 384, 56, 56], f16), T([64, 96, 56, 56], f16), T([384, 96, 1, 1], f16), [384], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 96, 56, 56], f16), T([64, 3, 224, 224], f16), T([96, 3, 7, 7], f16), [96], [4, 4], [2, 2], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 768, 7, 7], f16, stride=(768, 1, 0, 0)), 49), {}) +Operator: aten.gelu.default +cnt: 6, ((T([64, 384, 56, 56], f16),), {}) +cnt: 6, ((T([64, 768, 28, 28], f16),), {}) +cnt: 18, ((T([64, 1536, 14, 14], f16),), {}) +cnt: 6, ((T([64, 3072, 7, 7], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 6, ((T([64, 3072, 7, 7], f16), T([64, 3072, 7, 7], f16)), {}) +cnt: 18, ((T([64, 1536, 14, 14], f16), T([64, 1536, 14, 14], f16)), {}) +cnt: 6, ((T([64, 768, 28, 28], f16), T([64, 768, 28, 28], f16)), {}) +cnt: 6, ((T([64, 384, 56, 56], f16), T([64, 384, 56, 56], f16)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 768, 7, 7], f16), [-2, -1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 12, ((T([96, 1, 1], f16), T([64, 96, 56, 56], f16)), {}) +cnt: 12, ((T([192, 1, 1], f16), T([64, 192, 28, 28], f16)), {}) +cnt: 36, ((T([384, 1, 1], f16), T([64, 384, 14, 14], f16)), {}) +cnt: 12, ((T([768, 1, 1], f16), T([64, 768, 7, 7], f16)), {}) +cnt: 12, ((T([64, 768, 7, 7], f16), T([768, 1, 1], f16)), {}) +cnt: 12, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16)), {}) +cnt: 36, ((T([64, 384, 14, 14], f16), T([384, 1, 1], f16)), {}) +cnt: 36, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16)), {}) +cnt: 12, ((T([64, 192, 28, 28], f16), T([192, 1, 1], f16)), {}) +cnt: 12, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16)), {}) +cnt: 12, ((T([64, 96, 56, 56], f16), T([96, 1, 1], f16)), {}) +cnt: 12, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16)), {}) +Operator: aten.native_group_norm.default +cnt: 12, ((T([64, 96, 56, 56], f16), T([96], f16), T([96], f16), 64, 96, 3136, 1, 1e-05), {}) +cnt: 12, ((T([64, 192, 28, 28], f16), T([192], f16), T([192], f16), 64, 192, 784, 1, 1e-05), {}) +cnt: 36, ((T([64, 384, 14, 14], f16), T([384], f16), T([384], f16), 64, 384, 196, 1, 1e-05), {}) +cnt: 13, ((T([64, 768, 7, 7], f16), T([768], f16), T([768], f16), 64, 768, 49, 1, 1e-05), {}) +Operator: aten.native_group_norm_backward.default +cnt: 13, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16), T([64, 1], f16), T([64, 1], f16), T([768], f16), 64, 768, 49, 1, [True, True, True]), {}) +cnt: 36, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16), T([64, 1], f16), T([64, 1], f16), T([384], f16), 64, 384, 196, 1, [True, True, True]), {}) +cnt: 12, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16), T([64, 1], f16), T([64, 1], f16), T([192], f16), 64, 192, 784, 1, [True, True, True]), {}) +cnt: 12, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16), T([64, 1], f16), T([64, 1], f16), T([96], f16), 64, 96, 3136, 1, [True, True, True]), {}) +Operator: aten.neg.default +cnt: 6, ((T([64, 768, 7, 7], f16),), {}) +cnt: 18, ((T([64, 384, 14, 14], f16),), {}) +cnt: 6, ((T([64, 192, 28, 28], f16),), {}) +cnt: 6, ((T([64, 96, 56, 56], f16),), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.sub.Tensor +cnt: 6, ((T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16)), {}) +cnt: 6, ((T([64, 192, 28, 28], f16), T([64, 192, 28, 28], f16)), {}) +cnt: 18, ((T([64, 384, 14, 14], f16), T([64, 384, 14, 14], f16)), {}) +cnt: 6, ((T([64, 768, 7, 7], f16), T([64, 768, 7, 7], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 12, ((T([64, 768, 7, 7], f16), [0, 2, 3], True), {}) +cnt: 36, ((T([64, 384, 14, 14], f16), [0, 2, 3], True), {}) +cnt: 12, ((T([64, 192, 28, 28], f16), [0, 2, 3], True), {}) +cnt: 12, ((T([64, 96, 56, 56], f16), [0, 2, 3], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/regnety_002_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/regnety_002_training.txt new file mode 100644 index 000000000..99d7f8ac9 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/regnety_002_training.txt @@ -0,0 +1,181 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 44, ((T([], i64), 1), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 3, ((T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16)), {}) +cnt: 12, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16)), {}) +cnt: 20, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 368], f16), T([368, 1000], f16, stride=(1, 368))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([24, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([24, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 3), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([8, 24, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([24, 8, 1, 1], f16), T([24], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([24, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([24, 32, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([56, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 56, 56, 56], f16), T([56, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 7), {}) +cnt: 1, ((T([128, 56, 1, 1], f16), T([6, 56, 1, 1], f16), T([6], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 6, 1, 1], f16), T([56, 6, 1, 1], f16), T([56], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([56, 56, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([56, 24, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([152, 56, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 152, 28, 28], f16), T([152, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 19), {}) +cnt: 1, ((T([128, 152, 1, 1], f16), T([14, 152, 1, 1], f16), T([14], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 14, 1, 1], f16), T([152, 14, 1, 1], f16), T([152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 152, 14, 14], f16), T([152, 152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([152, 56, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 152, 14, 14], f16), T([152, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 19), {}) +cnt: 3, ((T([128, 152, 1, 1], f16), T([38, 152, 1, 1], f16), T([38], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 38, 1, 1], f16), T([152, 38, 1, 1], f16), T([152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 152, 14, 14], f16), T([368, 152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 368, 14, 14], f16), T([368, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 46), {}) +cnt: 1, ((T([128, 368, 1, 1], f16), T([38, 368, 1, 1], f16), T([38], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 38, 1, 1], f16), T([368, 38, 1, 1], f16), T([368], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 13, ((T([128, 368, 7, 7], f16), T([368, 368, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 152, 14, 14], f16), T([368, 152, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 368, 7, 7], f16), T([368, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 46), {}) +cnt: 6, ((T([128, 368, 1, 1], f16), T([92, 368, 1, 1], f16), T([92], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 92, 1, 1], f16), T([368, 92, 1, 1], f16), T([368], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 13, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16), T([368, 368, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 368, 1, 1], f16), T([128, 92, 1, 1], f16), T([368, 92, 1, 1], f16), [368], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([128, 92, 1, 1], f16), T([128, 368, 1, 1], f16), T([92, 368, 1, 1], f16), [92], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16), T([368, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 46, [True, True, False]), {}) +cnt: 1, ((T([128, 368, 7, 7], f16), T([128, 152, 14, 14], f16), T([368, 152, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 368, 1, 1], f16), T([128, 38, 1, 1], f16), T([368, 38, 1, 1], f16), [368], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 38, 1, 1], f16), T([128, 368, 1, 1], f16), T([38, 368, 1, 1], f16), [38], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 368, 7, 7], f16), T([128, 368, 14, 14], f16), T([368, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 46, [True, True, False]), {}) +cnt: 1, ((T([128, 368, 14, 14], f16), T([128, 152, 14, 14], f16), T([368, 152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), T([152, 152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 152, 1, 1], f16), T([128, 38, 1, 1], f16), T([152, 38, 1, 1], f16), [152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 38, 1, 1], f16), T([128, 152, 1, 1], f16), T([38, 152, 1, 1], f16), [38], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), T([152, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 19, [True, True, False]), {}) +cnt: 1, ((T([128, 152, 14, 14], f16), T([128, 56, 28, 28], f16), T([152, 56, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 152, 1, 1], f16), T([128, 14, 1, 1], f16), T([152, 14, 1, 1], f16), [152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 14, 1, 1], f16), T([128, 152, 1, 1], f16), T([14, 152, 1, 1], f16), [14], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 152, 14, 14], f16), T([128, 152, 28, 28], f16), T([152, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 19, [True, True, False]), {}) +cnt: 1, ((T([128, 152, 28, 28], f16), T([128, 56, 28, 28], f16), T([152, 56, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([128, 24, 56, 56], f16), T([56, 24, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([56, 56, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 1, 1], f16), T([128, 6, 1, 1], f16), T([56, 6, 1, 1], f16), [56], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 6, 1, 1], f16), T([128, 56, 1, 1], f16), T([6, 56, 1, 1], f16), [6], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([128, 56, 56, 56], f16), T([56, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 7, [True, True, False]), {}) +cnt: 1, ((T([128, 56, 56, 56], f16), T([128, 24, 56, 56], f16), T([56, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 32, 112, 112], f16), T([24, 32, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 8, 1, 1], f16), T([24, 8, 1, 1], f16), [24], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 24, 1, 1], f16), T([8, 24, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 112, 112], f16), T([24, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 3, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 32, 112, 112], f16), T([24, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 8, ((T([128, 368, 7, 7], f16, stride=(368, 1, 0, 0)), 49), {}) +cnt: 4, ((T([128, 152, 14, 14], f16, stride=(152, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 56, 28, 28], f16, stride=(56, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 24, 56, 56], f16, stride=(24, 1, 0, 0)), 3136), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 24, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), [2, 3], True), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), [2, 3], True), {}) +cnt: 7, ((T([128, 368, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 368, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 368], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 368], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 1, 1], f16)), {}) +cnt: 2, ((T([128, 56, 28, 28], f16), T([128, 56, 1, 1], f16)), {}) +cnt: 8, ((T([128, 152, 14, 14], f16), T([128, 152, 1, 1], f16)), {}) +cnt: 14, ((T([128, 368, 7, 7], f16), T([128, 368, 1, 1], f16)), {}) +cnt: 7, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16)), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16)), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16)), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 56, 56, 56], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 152, 28, 28], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f16), True, 0.1, 1e-05), {}) +cnt: 12, ((T([128, 152, 14, 14], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 368, 14, 14], f16), T([368], f16), T([368], f16), T([368], f16), T([368], f16), True, 0.1, 1e-05), {}) +cnt: 21, ((T([128, 368, 7, 7], f16), T([368], f16), T([368], f16), T([368], f16), T([368], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 21, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16), T([368], f16), T([368], f16), T([368], f16), T([368], f32), T([368], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 368, 14, 14], f16), T([128, 368, 14, 14], f16), T([368], f16), T([368], f16), T([368], f16), T([368], f32), T([368], f32), True, 1e-05, [True, True, True]), {}) +cnt: 12, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f32), T([152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 152, 28, 28], f16), T([128, 152, 28, 28], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f32), T([152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 56, 56, 56], f16), T([128, 56, 56, 56], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu.default +cnt: 1, ((T([128, 24, 56, 56], f16),), {}) +cnt: 1, ((T([128, 56, 28, 28], f16),), {}) +cnt: 4, ((T([128, 152, 14, 14], f16),), {}) +cnt: 7, ((T([128, 368, 7, 7], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 24, 112, 112], f16),), {}) +cnt: 1, ((T([128, 24, 56, 56], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 1, ((T([128, 56, 56, 56], f16),), {}) +cnt: 1, ((T([128, 56, 28, 28], f16),), {}) +cnt: 1, ((T([128, 6, 1, 1], f16),), {}) +cnt: 1, ((T([128, 152, 28, 28], f16),), {}) +cnt: 7, ((T([128, 152, 14, 14], f16),), {}) +cnt: 1, ((T([128, 14, 1, 1], f16),), {}) +cnt: 4, ((T([128, 38, 1, 1], f16),), {}) +cnt: 1, ((T([128, 368, 14, 14], f16),), {}) +cnt: 13, ((T([128, 368, 7, 7], f16),), {}) +cnt: 6, ((T([128, 92, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 24, 1, 1], f16),), {}) +cnt: 1, ((T([128, 56, 1, 1], f16),), {}) +cnt: 4, ((T([128, 152, 1, 1], f16),), {}) +cnt: 7, ((T([128, 368, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 7, ((T([128, 368, 1, 1], f16), T([128, 368, 1, 1], f16)), {}) +cnt: 4, ((T([128, 152, 1, 1], f16), T([128, 152, 1, 1], f16)), {}) +cnt: 1, ((T([128, 56, 1, 1], f16), T([128, 56, 1, 1], f16)), {}) +cnt: 1, ((T([128, 24, 1, 1], f16), T([128, 24, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 7, ((T([128, 368, 7, 7], f16), [2, 3], True), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 56, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 20, ((T([128, 368, 7, 7], f16), T([128, 368, 7, 7], f16), 0), {}) +cnt: 6, ((T([128, 92, 1, 1], f16), T([128, 92, 1, 1], f16), 0), {}) +cnt: 4, ((T([128, 38, 1, 1], f16), T([128, 38, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 368, 14, 14], f16), T([128, 368, 14, 14], f16), 0), {}) +cnt: 11, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 14, 1, 1], f16), T([128, 14, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 152, 28, 28], f16), T([128, 152, 28, 28], f16), 0), {}) +cnt: 2, ((T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 6, 1, 1], f16), T([128, 6, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 56, 56, 56], f16), T([128, 56, 56, 56], f16), 0), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 8, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), 0), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/repvgg_a2_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/repvgg_a2_training.txt new file mode 100644 index 000000000..ff6a44e15 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/repvgg_a2_training.txt @@ -0,0 +1,90 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 61, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16)), {}) +cnt: 6, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16)), {}) +cnt: 14, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16)), {}) +cnt: 54, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16)), {}) +cnt: 1, ((T([128, 1408, 7, 7], f16), T([128, 1408, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1408], f16), T([1408, 1000], f16, stride=(1, 1408))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([96, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([96, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96, 96, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([192, 96, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([192, 96, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([192, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([192, 192, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([384, 192, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([384, 192, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 13, ((T([128, 384, 14, 14], f16), T([384, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 13, ((T([128, 384, 14, 14], f16), T([384, 384, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([1408, 384, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([1408, 384, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1408, 7, 7], f16), T([128, 384, 14, 14], f16), T([1408, 384, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1408, 7, 7], f16), T([128, 384, 14, 14], f16), T([1408, 384, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 13, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 384, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 13, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([128, 192, 28, 28], f16), T([384, 192, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([128, 192, 28, 28], f16), T([384, 192, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192, 192, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([128, 96, 56, 56], f16), T([192, 96, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([128, 96, 56, 56], f16), T([192, 96, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96, 96, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 64, 112, 112], f16), T([96, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 64, 112, 112], f16), T([96, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1408, 7, 7], f16, stride=(1408, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1408, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1408], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1408], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 11, ((T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 41, ((T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 1408, 7, 7], f16), T([1408], f16), T([1408], f16), T([1408], f16), T([1408], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([128, 1408, 7, 7], f16), T([128, 1408, 7, 7], f16), T([1408], f16), T([1408], f16), T([1408], f16), T([1408], f32), T([1408], f32), True, 1e-05, [True, True, True]), {}) +cnt: 41, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 11, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 2, ((T([128, 96, 56, 56], f16),), {}) +cnt: 4, ((T([128, 192, 28, 28], f16),), {}) +cnt: 14, ((T([128, 384, 14, 14], f16),), {}) +cnt: 1, ((T([128, 1408, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1408, 7, 7], f16), T([128, 1408, 7, 7], f16), 0), {}) +cnt: 14, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), 0), {}) +cnt: 4, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), 0), {}) +cnt: 2, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net101_26w_4s_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net101_26w_4s_training.txt new file mode 100644 index 000000000..c669ec356 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net101_26w_4s_training.txt @@ -0,0 +1,209 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1))), {}) +cnt: 6, ((T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1))), {}) +cnt: 44, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1))), {}) +cnt: 4, ((T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1))), {}) +cnt: 4, ((T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1)), T([64, 208, 7, 7], f16)), {}) +cnt: 2, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16)), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16)), {}) +cnt: 44, ((T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1)), T([64, 104, 14, 14], f16)), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16)), {}) +cnt: 6, ((T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1)), T([64, 52, 28, 28], f16)), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16)), {}) +cnt: 4, ((T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), T([64, 26, 56, 56], f16)), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 170, ((T([], i64), 1), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16)), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16)), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([64, 52, 56, 56], f16, stride=(652288, 3136, 56, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([64, 104, 28, 28], f16, stride=(326144, 784, 28, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([64, 208, 14, 14], f16, stride=(163072, 196, 14, 1)), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1)), T([64, 208, 14, 14], f16, stride=(163072, 196, 14, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1)), T([64, 104, 28, 28], f16, stride=(326144, 784, 28, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1)), T([64, 52, 56, 56], f16, stride=(652288, 3136, 56, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 2, (([T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16)], 1), {}) +cnt: 4, (([T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16)], 1), {}) +cnt: 6, (([T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1))], 1), {}) +cnt: 1, (([T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16)], 1), {}) +cnt: 44, (([T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1))], 1), {}) +cnt: 1, (([T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16)], 1), {}) +cnt: 4, (([T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1))], 1), {}) +cnt: 1, (([T([64, 208, 14, 14], f16), T([64, 208, 14, 14], f16), T([64, 208, 14, 14], f16), T([64, 208, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 104, 28, 28], f16), T([64, 104, 28, 28], f16), T([64, 104, 28, 28], f16), T([64, 104, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 52, 56, 56], f16), T([64, 52, 56, 56], f16), T([64, 52, 56, 56], f16), T([64, 52, 56, 56], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([104, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), T([26, 26, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 104, 56, 56], f16), T([256, 104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 256, 56, 56], f16), T([104, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 26, 56, 56], f16), T([26, 26, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([208, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 52, 56, 56], f16, stride=(652288, 3136, 56, 1)), T([52, 52, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 208, 28, 28], f16), T([512, 208, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 28, 28], f16), T([208, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1)), T([52, 52, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 52, 28, 28], f16), T([52, 52, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([416, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 104, 28, 28], f16, stride=(326144, 784, 28, 1)), T([104, 104, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 23, ((T([64, 416, 14, 14], f16), T([1024, 416, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([64, 1024, 14, 14], f16), T([416, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1)), T([104, 104, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 44, ((T([64, 104, 14, 14], f16), T([104, 104, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([832, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 208, 14, 14], f16, stride=(163072, 196, 14, 1)), T([208, 208, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 832, 7, 7], f16), T([2048, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 2048, 7, 7], f16), T([832, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1)), T([208, 208, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([64, 208, 7, 7], f16), T([208, 208, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 832, 7, 7], f16), T([2048, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([208, 208, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1)), T([208, 208, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 832, 7, 7], f16), T([64, 2048, 7, 7], f16), T([832, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 2048, 7, 7], f16), T([64, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 208, 7, 7], f16), T([64, 208, 14, 14], f16, stride=(163072, 196, 14, 1)), T([208, 208, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([64, 1024, 14, 14], f16), T([832, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 416, 14, 14], f16), T([1024, 416, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 44, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([104, 104, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1)), T([104, 104, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([64, 416, 14, 14], f16), T([64, 1024, 14, 14], f16), T([416, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1024, 14, 14], f16), T([64, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 104, 14, 14], f16), T([64, 104, 28, 28], f16, stride=(326144, 784, 28, 1)), T([104, 104, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([64, 512, 28, 28], f16), T([416, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 208, 28, 28], f16), T([512, 208, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([52, 52, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1)), T([52, 52, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 208, 28, 28], f16), T([64, 512, 28, 28], f16), T([208, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([64, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 52, 28, 28], f16), T([64, 52, 56, 56], f16, stride=(652288, 3136, 56, 1)), T([52, 52, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 208, 56, 56], f16), T([64, 256, 56, 56], f16), T([208, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 104, 56, 56], f16), T([256, 104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([26, 26, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), T([26, 26, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 104, 56, 56], f16), T([64, 256, 56, 56], f16), T([104, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([64, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 104, 56, 56], f16), T([64, 64, 56, 56], f16), T([104, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 64, 56, 56], f16), T([64, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([64, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 104, 56, 56], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([64, 26, 56, 56], f16), T([26], f16), T([26], f16), T([26], f16), T([26], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 208, 56, 56], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f16), True, 0.1, 1e-05), {}) +cnt: 12, ((T([64, 52, 28, 28], f16), T([52], f16), T([52], f16), T([52], f16), T([52], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 208, 28, 28], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), True, 0.1, 1e-05), {}) +cnt: 69, ((T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f16), True, 0.1, 1e-05), {}) +cnt: 24, ((T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 22, ((T([64, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([64, 208, 7, 7], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([64, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f32), T([208], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([64, 832, 7, 7], f16), T([64, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([64, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), True, 1e-05, [True, True, True]), {}) +cnt: 24, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 69, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f32), T([104], f32), True, 1e-05, [True, True, True]), {}) +cnt: 22, ((T([64, 416, 14, 14], f16), T([64, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([64, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 12, ((T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), T([52], f16), T([52], f16), T([52], f16), T([52], f32), T([52], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 208, 28, 28], f16), T([64, 208, 28, 28], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f32), T([208], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 208, 56, 56], f16), T([64, 208, 56, 56], f16), T([208], f16), T([208], f16), T([208], f16), T([208], f32), T([208], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), T([26], f16), T([26], f16), T([26], f16), T([26], f32), T([26], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 104, 56, 56], f16), T([64, 104, 56, 56], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f32), T([104], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([64, 64, 112, 112], f16),), {}) +cnt: 3, ((T([64, 104, 56, 56], f16),), {}) +cnt: 9, ((T([64, 26, 56, 56], f16),), {}) +cnt: 3, ((T([64, 256, 56, 56], f16),), {}) +cnt: 1, ((T([64, 208, 56, 56], f16),), {}) +cnt: 12, ((T([64, 52, 28, 28], f16),), {}) +cnt: 4, ((T([64, 512, 28, 28], f16),), {}) +cnt: 3, ((T([64, 208, 28, 28], f16),), {}) +cnt: 1, ((T([64, 416, 28, 28], f16),), {}) +cnt: 69, ((T([64, 104, 14, 14], f16),), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16),), {}) +cnt: 22, ((T([64, 416, 14, 14], f16),), {}) +cnt: 1, ((T([64, 832, 14, 14], f16),), {}) +cnt: 9, ((T([64, 208, 7, 7], f16),), {}) +cnt: 3, ((T([64, 2048, 7, 7], f16),), {}) +cnt: 2, ((T([64, 832, 7, 7], f16),), {}) +Operator: aten.split.Tensor +cnt: 3, ((T([64, 104, 56, 56], f16), 26, 1), {}) +cnt: 1, ((T([64, 208, 56, 56], f16), 52, 1), {}) +cnt: 3, ((T([64, 208, 28, 28], f16), 52, 1), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), 104, 1), {}) +cnt: 22, ((T([64, 416, 14, 14], f16), 104, 1), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), 208, 1), {}) +cnt: 2, ((T([64, 832, 7, 7], f16), 208, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([64, 2048, 7, 7], f16), T([64, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([64, 208, 7, 7], f16, stride=(40768, 49, 7, 1)), T([64, 208, 7, 7], f16), 0), {}) +cnt: 4, ((T([64, 208, 7, 7], f16), T([64, 208, 7, 7], f16), 0), {}) +cnt: 2, ((T([64, 832, 7, 7], f16), T([64, 832, 7, 7], f16), 0), {}) +cnt: 1, ((T([64, 832, 14, 14], f16), T([64, 832, 14, 14], f16), 0), {}) +cnt: 23, ((T([64, 1024, 14, 14], f16), T([64, 1024, 14, 14], f16), 0), {}) +cnt: 25, ((T([64, 104, 14, 14], f16, stride=(81536, 196, 14, 1)), T([64, 104, 14, 14], f16), 0), {}) +cnt: 44, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), 0), {}) +cnt: 22, ((T([64, 416, 14, 14], f16), T([64, 416, 14, 14], f16), 0), {}) +cnt: 1, ((T([64, 416, 28, 28], f16), T([64, 416, 28, 28], f16), 0), {}) +cnt: 4, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), 0), {}) +cnt: 6, ((T([64, 52, 28, 28], f16, stride=(163072, 784, 28, 1)), T([64, 52, 28, 28], f16), 0), {}) +cnt: 6, ((T([64, 52, 28, 28], f16), T([64, 52, 28, 28], f16), 0), {}) +cnt: 3, ((T([64, 208, 28, 28], f16), T([64, 208, 28, 28], f16), 0), {}) +cnt: 1, ((T([64, 208, 56, 56], f16), T([64, 208, 56, 56], f16), 0), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), 0), {}) +cnt: 5, ((T([64, 26, 56, 56], f16, stride=(326144, 3136, 56, 1)), T([64, 26, 56, 56], f16), 0), {}) +cnt: 4, ((T([64, 26, 56, 56], f16), T([64, 26, 56, 56], f16), 0), {}) +cnt: 3, ((T([64, 104, 56, 56], f16), T([64, 104, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net50_14w_8s_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net50_14w_8s_training.txt new file mode 100644 index 000000000..88b8cd464 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2net50_14w_8s_training.txt @@ -0,0 +1,209 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 12, ((T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1))), {}) +cnt: 18, ((T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1))), {}) +cnt: 30, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1))), {}) +cnt: 12, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1))), {}) +cnt: 12, ((T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1)), T([128, 112, 7, 7], f16)), {}) +cnt: 2, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16)), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16)), {}) +cnt: 30, ((T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1)), T([128, 56, 14, 14], f16)), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 18, ((T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1)), T([128, 28, 28, 28], f16)), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 12, ((T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), T([128, 14, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 149, ((T([], i64), 1), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 28, 56, 56], f16, stride=(702464, 3136, 56, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([128, 56, 28, 28], f16, stride=(351232, 784, 28, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16, stride=(175616, 196, 14, 1)), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1)), T([128, 112, 14, 14], f16, stride=(175616, 196, 14, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1)), T([128, 56, 28, 28], f16, stride=(351232, 784, 28, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1)), T([128, 28, 56, 56], f16, stride=(702464, 3136, 56, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 2, (([T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16)], 1), {}) +cnt: 4, (([T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16)], 1), {}) +cnt: 6, (([T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1))], 1), {}) +cnt: 1, (([T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16)], 1), {}) +cnt: 10, (([T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16)], 1), {}) +cnt: 4, (([T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1))], 1), {}) +cnt: 1, (([T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16), T([128, 56, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16), T([128, 28, 56, 56], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([112, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), T([14, 14, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 112, 56, 56], f16), T([256, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), T([112, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([128, 14, 56, 56], f16), T([14, 14, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([224, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 28, 56, 56], f16, stride=(702464, 3136, 56, 1)), T([28, 28, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 224, 28, 28], f16), T([512, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 512, 28, 28], f16), T([224, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1)), T([28, 28, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 18, ((T([128, 28, 28, 28], f16), T([28, 28, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([448, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 56, 28, 28], f16, stride=(351232, 784, 28, 1)), T([56, 56, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 448, 14, 14], f16), T([1024, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 1024, 14, 14], f16), T([448, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1)), T([56, 56, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 30, ((T([128, 56, 14, 14], f16), T([56, 56, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([896, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 112, 14, 14], f16, stride=(175616, 196, 14, 1)), T([112, 112, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 896, 7, 7], f16), T([2048, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 2048, 7, 7], f16), T([896, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1)), T([112, 112, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([128, 112, 7, 7], f16), T([112, 112, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 896, 7, 7], f16), T([2048, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([112, 112, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1)), T([112, 112, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 896, 7, 7], f16), T([128, 2048, 7, 7], f16), T([896, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 2048, 7, 7], f16), T([128, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 112, 7, 7], f16), T([128, 112, 14, 14], f16, stride=(175616, 196, 14, 1)), T([112, 112, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 896, 14, 14], f16), T([128, 1024, 14, 14], f16), T([896, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 448, 14, 14], f16), T([1024, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 30, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([56, 56, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1)), T([56, 56, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 448, 14, 14], f16), T([128, 1024, 14, 14], f16), T([448, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([128, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 56, 14, 14], f16), T([128, 56, 28, 28], f16, stride=(351232, 784, 28, 1)), T([56, 56, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 448, 28, 28], f16), T([128, 512, 28, 28], f16), T([448, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 224, 28, 28], f16), T([512, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 18, ((T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([28, 28, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1)), T([28, 28, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 224, 28, 28], f16), T([128, 512, 28, 28], f16), T([224, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 28, 28, 28], f16), T([128, 28, 56, 56], f16, stride=(702464, 3136, 56, 1)), T([28, 28, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 224, 56, 56], f16), T([128, 256, 56, 56], f16), T([224, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 112, 56, 56], f16), T([256, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([14, 14, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 9, ((T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), T([14, 14, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 112, 56, 56], f16), T([128, 256, 56, 56], f16), T([112, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 56, 56], f16), T([128, 64, 56, 56], f16), T([112, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 112, 56, 56], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 21, ((T([128, 14, 56, 56], f16), T([14], f16), T([14], f16), T([14], f16), T([14], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 28, ((T([128, 28, 28, 28], f16), T([28], f16), T([28], f16), T([28], f16), T([28], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 1e-05), {}) +cnt: 42, ((T([128, 56, 14, 14], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), True, 0.1, 1e-05), {}) +cnt: 21, ((T([128, 112, 7, 7], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 21, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 896, 7, 7], f16), T([128, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 896, 14, 14], f16), T([128, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 42, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 448, 14, 14], f16), T([128, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 448, 28, 28], f16), T([128, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 28, ((T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), T([28], f16), T([28], f16), T([28], f16), T([28], f32), T([28], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 224, 28, 28], f16), T([128, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 224, 56, 56], f16), T([128, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 21, ((T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), T([14], f16), T([14], f16), T([14], f16), T([14], f32), T([14], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 112, 56, 56], f16), T([128, 112, 56, 56], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 3, ((T([128, 112, 56, 56], f16),), {}) +cnt: 21, ((T([128, 14, 56, 56], f16),), {}) +cnt: 3, ((T([128, 256, 56, 56], f16),), {}) +cnt: 1, ((T([128, 224, 56, 56], f16),), {}) +cnt: 28, ((T([128, 28, 28, 28], f16),), {}) +cnt: 4, ((T([128, 512, 28, 28], f16),), {}) +cnt: 3, ((T([128, 224, 28, 28], f16),), {}) +cnt: 1, ((T([128, 448, 28, 28], f16),), {}) +cnt: 42, ((T([128, 56, 14, 14], f16),), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16),), {}) +cnt: 5, ((T([128, 448, 14, 14], f16),), {}) +cnt: 1, ((T([128, 896, 14, 14], f16),), {}) +cnt: 21, ((T([128, 112, 7, 7], f16),), {}) +cnt: 3, ((T([128, 2048, 7, 7], f16),), {}) +cnt: 2, ((T([128, 896, 7, 7], f16),), {}) +Operator: aten.split.Tensor +cnt: 3, ((T([128, 112, 56, 56], f16), 14, 1), {}) +cnt: 1, ((T([128, 224, 56, 56], f16), 28, 1), {}) +cnt: 3, ((T([128, 224, 28, 28], f16), 28, 1), {}) +cnt: 1, ((T([128, 448, 28, 28], f16), 56, 1), {}) +cnt: 5, ((T([128, 448, 14, 14], f16), 56, 1), {}) +cnt: 1, ((T([128, 896, 14, 14], f16), 112, 1), {}) +cnt: 2, ((T([128, 896, 7, 7], f16), 112, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), 0), {}) +cnt: 9, ((T([128, 112, 7, 7], f16, stride=(43904, 49, 7, 1)), T([128, 112, 7, 7], f16), 0), {}) +cnt: 12, ((T([128, 112, 7, 7], f16), T([128, 112, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 896, 7, 7], f16), T([128, 896, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 896, 14, 14], f16), T([128, 896, 14, 14], f16), 0), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16), 0), {}) +cnt: 12, ((T([128, 56, 14, 14], f16, stride=(87808, 196, 14, 1)), T([128, 56, 14, 14], f16), 0), {}) +cnt: 30, ((T([128, 56, 14, 14], f16), T([128, 56, 14, 14], f16), 0), {}) +cnt: 5, ((T([128, 448, 14, 14], f16), T([128, 448, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 448, 28, 28], f16), T([128, 448, 28, 28], f16), 0), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), 0), {}) +cnt: 10, ((T([128, 28, 28, 28], f16, stride=(175616, 784, 28, 1)), T([128, 28, 28, 28], f16), 0), {}) +cnt: 18, ((T([128, 28, 28, 28], f16), T([128, 28, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 224, 28, 28], f16), T([128, 224, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 224, 56, 56], f16), T([128, 224, 56, 56], f16), 0), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), 0), {}) +cnt: 9, ((T([128, 14, 56, 56], f16, stride=(351232, 3136, 56, 1)), T([128, 14, 56, 56], f16), 0), {}) +cnt: 12, ((T([128, 14, 56, 56], f16), T([128, 14, 56, 56], f16), 0), {}) +cnt: 3, ((T([128, 112, 56, 56], f16), T([128, 112, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2next50_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2next50_training.txt new file mode 100644 index 000000000..d498c8050 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/res2next50_training.txt @@ -0,0 +1,197 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1))), {}) +cnt: 6, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1))), {}) +cnt: 10, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1))), {}) +cnt: 4, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1))), {}) +cnt: 4, ((T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1)), T([128, 256, 7, 7], f16)), {}) +cnt: 2, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16)), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16)), {}) +cnt: 10, ((T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1)), T([128, 128, 14, 14], f16)), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 6, ((T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), T([128, 64, 28, 28], f16)), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 4, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 32, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 85, ((T([], i64), 1), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16)), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16)), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), [3, 3], [1, 1], [1, 1]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([128, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1)), T([128, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1)), T([128, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), T([128, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), [3, 3], [1, 1], [1, 1], False, True, None), {}) +Operator: aten.cat.default +cnt: 2, (([T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16)], 1), {}) +cnt: 4, (([T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16)], 1), {}) +cnt: 6, (([T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1))], 1), {}) +cnt: 1, (([T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16)], 1), {}) +cnt: 10, (([T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16)], 1), {}) +cnt: 4, (([T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1))], 1), {}) +cnt: 1, (([T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([32, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([32, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 4, ((T([128, 256, 28, 28], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), T([64, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 6, ((T([128, 64, 28, 28], f16), T([64, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), T([128, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 6, ((T([128, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1)), T([128, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 10, ((T([128, 128, 14, 14], f16), T([128, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), T([256, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 3, ((T([128, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1)), T([256, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 4, ((T([128, 256, 7, 7], f16), T([256, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 2, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1)), T([256, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([128, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 2048, 7, 7], f16), T([128, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 7, 7], f16), T([128, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), T([256, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 1024, 14, 14], f16), T([128, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 5, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1)), T([128, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 5, ((T([128, 512, 14, 14], f16), T([128, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), T([128, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 128, 14, 14], f16), T([128, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), T([128, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 512, 28, 28], f16), T([128, 256, 28, 28], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), T([64, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([128, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), T([128, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 64, 28, 28], f16), T([128, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([64, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 56, 56], f16), T([128, 128, 56, 56], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 5, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([32, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 2, ((T([128, 128, 56, 56], f16), T([128, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), T([128, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 12, ((T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 18, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 18, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 12, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 3, ((T([128, 128, 56, 56], f16),), {}) +cnt: 9, ((T([128, 32, 56, 56], f16),), {}) +cnt: 4, ((T([128, 256, 56, 56], f16),), {}) +cnt: 12, ((T([128, 64, 28, 28], f16),), {}) +cnt: 5, ((T([128, 512, 28, 28], f16),), {}) +cnt: 3, ((T([128, 256, 28, 28], f16),), {}) +cnt: 18, ((T([128, 128, 14, 14], f16),), {}) +cnt: 7, ((T([128, 1024, 14, 14], f16),), {}) +cnt: 5, ((T([128, 512, 14, 14], f16),), {}) +cnt: 9, ((T([128, 256, 7, 7], f16),), {}) +cnt: 3, ((T([128, 2048, 7, 7], f16),), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16),), {}) +Operator: aten.split.Tensor +cnt: 3, ((T([128, 128, 56, 56], f16), 32, 1), {}) +cnt: 1, ((T([128, 256, 56, 56], f16), 64, 1), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), 64, 1), {}) +cnt: 1, ((T([128, 512, 28, 28], f16), 128, 1), {}) +cnt: 5, ((T([128, 512, 14, 14], f16), 128, 1), {}) +cnt: 1, ((T([128, 1024, 14, 14], f16), 256, 1), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), 256, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([128, 2048, 7, 7], f16), T([128, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([128, 256, 7, 7], f16, stride=(50176, 49, 7, 1)), T([128, 256, 7, 7], f16), 0), {}) +cnt: 4, ((T([128, 256, 7, 7], f16), T([128, 256, 7, 7], f16), 0), {}) +cnt: 2, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), 0), {}) +cnt: 7, ((T([128, 1024, 14, 14], f16), T([128, 1024, 14, 14], f16), 0), {}) +cnt: 8, ((T([128, 128, 14, 14], f16, stride=(100352, 196, 14, 1)), T([128, 128, 14, 14], f16), 0), {}) +cnt: 10, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), 0), {}) +cnt: 5, ((T([128, 512, 14, 14], f16), T([128, 512, 14, 14], f16), 0), {}) +cnt: 5, ((T([128, 512, 28, 28], f16), T([128, 512, 28, 28], f16), 0), {}) +cnt: 6, ((T([128, 64, 28, 28], f16, stride=(200704, 784, 28, 1)), T([128, 64, 28, 28], f16), 0), {}) +cnt: 6, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 256, 28, 28], f16), T([128, 256, 28, 28], f16), 0), {}) +cnt: 4, ((T([128, 256, 56, 56], f16), T([128, 256, 56, 56], f16), 0), {}) +cnt: 5, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 32, 56, 56], f16), 0), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), 0), {}) +cnt: 3, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resmlp_12_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resmlp_12_224_training.txt new file mode 100644 index 000000000..3c47d598f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resmlp_12_224_training.txt @@ -0,0 +1,75 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([128, 196, 1536], f16), [128, 196, 1536]), {}) +cnt: 12, ((T([128, 384, 196], f16), [49152, 196]), {}) +Operator: aten.add.Tensor +cnt: 12, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +cnt: 12, ((T([128, 196, 1536], f16), T([1536], f16)), {}) +cnt: 12, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), T([128, 196, 384], f16)), {}) +cnt: 12, ((T([128, 196, 384], f16), T([128, 196, 384], f16)), {}) +cnt: 12, ((T([128, 196, 384], f16), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +Operator: aten.addcmul.default +cnt: 25, ((T([1, 1, 384], f16), T([1, 1, 384], f16), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +Operator: aten.addmm.default +cnt: 12, ((T([196], f16), T([49152, 196], f16), T([196, 196], f16, stride=(1, 196))), {}) +cnt: 12, ((T([384], f16), T([25088, 1536], f16), T([1536, 384], f16, stride=(1, 1536))), {}) +cnt: 1, ((T([1000], f16), T([128, 384], f16), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), T([128, 384, 1536], f16, stride=(0, 1, 384))), {}) +cnt: 12, ((T([128, 384, 196], f16), T([128, 196, 1536], f16)), {}) +cnt: 12, ((T([128, 196, 1536], f16), T([128, 1536, 384], f16, stride=(0, 384, 1))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([384, 3, 16, 16], f16), T([384], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 384, 14, 14], f16, stride=(75264, 1, 5376, 384)), T([128, 3, 224, 224], f16), T([384, 3, 16, 16], f16), [384], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +cnt: 12, ((T([1536, 384], f16), T([1536, 384], f16, stride=(1, 1536))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 196, 384], f16, stride=(384, 0, 1)), 196), {}) +Operator: aten.gelu.default +cnt: 12, ((T([128, 196, 1536], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([128, 196, 1536], f16), T([128, 196, 1536], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 384], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 384], f16)), {}) +cnt: 12, ((T([25088, 384], f16), T([384, 1536], f16)), {}) +cnt: 12, ((T([384, 25088], f16, stride=(1, 384)), T([25088, 1536], f16)), {}) +cnt: 12, ((T([49152, 196], f16), T([196, 196], f16)), {}) +cnt: 12, ((T([196, 49152], f16, stride=(1, 196)), T([49152, 196], f16)), {}) +Operator: aten.mul.Scalar +cnt: 25, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), 1), {}) +cnt: 25, ((T([1, 1, 384], f16), 1), {}) +Operator: aten.mul.Tensor +cnt: 12, ((T([384], f16), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +cnt: 12, ((T([384], f16), T([128, 196, 384], f16)), {}) +cnt: 25, ((T([128, 196, 384], f16), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +cnt: 13, ((T([128, 196, 384], f16), T([1, 1, 384], f16)), {}) +cnt: 24, ((T([128, 196, 384], f16), T([384], f16)), {}) +cnt: 12, ((T([128, 196, 384], f16), T([128, 196, 384], f16)), {}) +cnt: 12, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), T([128, 196, 384], f16, stride=(75264, 1, 196))), {}) +cnt: 12, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), T([1, 1, 384], f16)), {}) +Operator: aten.new_empty_strided.default +cnt: 12, ((T([1536, 384], f16, stride=(1, 1536)), [1536, 384], [384, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 50, ((T([128, 196, 384], f16), [0, 1], True), {}) +cnt: 12, ((T([25088, 384], f16), [0], True), {}) +cnt: 12, ((T([128, 196, 1536], f16), [0, 1], True), {}) +cnt: 12, ((T([128, 384, 1536], f16), [0], True), {}) +cnt: 12, ((T([49152, 196], f16), [0], True), {}) +cnt: 24, ((T([128, 196, 384], f16, stride=(75264, 1, 196)), [0, 1], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnest101e_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnest101e_training.txt new file mode 100644 index 000000000..03e1db4dc --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnest101e_training.txt @@ -0,0 +1,269 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 3, ((T([32, 2, 1, 64], f16), 1, False), {}) +cnt: 4, ((T([32, 2, 1, 128], f16), 1, False), {}) +cnt: 23, ((T([32, 2, 1, 256], f16), 1, False), {}) +cnt: 3, ((T([32, 2, 1, 512], f16), 1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 3, ((T([32, 2, 1, 512], f16), T([32, 2, 1, 512], f16), 1, f16), {}) +cnt: 23, ((T([32, 2, 1, 256], f16), T([32, 2, 1, 256], f16), 1, f16), {}) +cnt: 4, ((T([32, 2, 1, 128], f16), T([32, 2, 1, 128], f16), 1, f16), {}) +cnt: 3, ((T([32, 2, 1, 64], f16), T([32, 2, 1, 64], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([32, 2, 512, 8, 8], f16), T([32, 2, 512, 8, 8], f16, stride=(32768, 0, 64, 8, 1))), {}) +cnt: 2, ((T([32, 2048, 8, 8], f16), T([32, 2048, 8, 8], f16)), {}) +cnt: 1, ((T([32, 2, 512, 16, 16], f16), T([32, 2, 512, 16, 16], f16, stride=(131072, 0, 256, 16, 1))), {}) +cnt: 23, ((T([32, 1024, 16, 16], f16), T([32, 1024, 16, 16], f16)), {}) +cnt: 22, ((T([32, 2, 256, 16, 16], f16), T([32, 2, 256, 16, 16], f16, stride=(65536, 0, 256, 16, 1))), {}) +cnt: 1, ((T([32, 2, 256, 32, 32], f16), T([32, 2, 256, 32, 32], f16, stride=(262144, 0, 1024, 32, 1))), {}) +cnt: 4, ((T([32, 512, 32, 32], f16), T([32, 512, 32, 32], f16)), {}) +cnt: 3, ((T([32, 2, 128, 32, 32], f16), T([32, 2, 128, 32, 32], f16, stride=(131072, 0, 1024, 32, 1))), {}) +cnt: 1, ((T([32, 2, 128, 64, 64], f16), T([32, 2, 128, 64, 64], f16, stride=(524288, 0, 4096, 64, 1))), {}) +cnt: 3, ((T([32, 256, 64, 64], f16), T([32, 256, 64, 64], f16)), {}) +cnt: 3, ((T([32, 2, 64, 64, 64], f16), T([32, 2, 64, 64, 64], f16, stride=(262144, 0, 4096, 64, 1))), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), T([32, 128, 64, 64], f16)), {}) +Operator: aten.add_.Tensor +cnt: 139, ((T([], i64), 1), {}) +cnt: 3, ((T([32, 256, 64, 64], f16), T([32, 256, 64, 64], f16)), {}) +cnt: 4, ((T([32, 512, 32, 32], f16), T([32, 512, 32, 32], f16)), {}) +cnt: 23, ((T([32, 1024, 16, 16], f16), T([32, 1024, 16, 16], f16)), {}) +cnt: 3, ((T([32, 2048, 8, 8], f16), T([32, 2048, 8, 8], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([32, 128, 64, 64], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 256, 64, 64], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 512, 32, 32], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 1024, 16, 16], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([32, 1024, 8, 8], f16), T([32, 1024, 16, 16], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 512, 8, 8], f16), T([32, 512, 16, 16], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), T([32, 512, 32, 32], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 256, 16, 16], f16), T([32, 256, 32, 32], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), T([32, 256, 64, 64], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 128, 32, 32], f16), T([32, 128, 64, 64], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 256, 256], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 256, 256], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 128, 128], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 128, 128], f16), T([128, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 64, 64, 64], f16), T([128, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 3, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 32, 1, 1], f16), T([128, 32, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 64, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), T([256, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 4, ((T([32, 128, 1, 1], f16), T([64, 128, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 64, 1, 1], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 128, 32, 32], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 32, 32], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), T([256, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 512, 32, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), T([512, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 23, ((T([32, 256, 1, 1], f16), T([128, 256, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 23, ((T([32, 128, 1, 1], f16), T([512, 128, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 23, ((T([32, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([32, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), T([512, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), T([1024, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 3, ((T([32, 512, 1, 1], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([1024, 256, 1, 1], f16), T([1024], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 8, 8], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), T([1024, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 2048, 8, 8], f16), T([32, 512, 8, 8], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 1024, 1, 1], f16), T([32, 256, 1, 1], f16), T([1024, 256, 1, 1], f16), [1024], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 512, 1, 1], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 1024, 8, 8], f16), T([32, 512, 8, 8], f16), T([1024, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), T([32, 2048, 8, 8], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 8, 8], f16), T([32, 1024, 8, 8], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 16, 16], f16), T([32, 512, 16, 16], f16), T([1024, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), T([32, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([32, 1024, 16, 16], f16), T([32, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([32, 512, 1, 1], f16), T([32, 128, 1, 1], f16), T([512, 128, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 23, ((T([32, 128, 1, 1], f16), T([32, 256, 1, 1], f16), T([128, 256, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 22, ((T([32, 512, 16, 16], f16), T([32, 256, 16, 16], f16), T([512, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), T([32, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 16, 16], f16), T([32, 512, 16, 16], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 32, 32], f16), T([32, 256, 32, 32], f16), T([512, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), T([32, 512, 32, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 512, 32, 32], f16), T([32, 128, 32, 32], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 256, 1, 1], f16), T([32, 64, 1, 1], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([32, 64, 1, 1], f16), T([32, 128, 1, 1], f16), T([64, 128, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 32, 32], f16), T([32, 128, 32, 32], f16), T([256, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), T([32, 512, 32, 32], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 32, 32], f16), T([32, 256, 32, 32], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 64, 64], f16), T([32, 128, 64, 64], f16), T([256, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), T([32, 256, 64, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 256, 64, 64], f16), T([32, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 128, 1, 1], f16), T([32, 32, 1, 1], f16), T([128, 32, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 32, 1, 1], f16), T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 64, 64], f16), T([32, 64, 64, 64], f16), T([128, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 2, ((T([32, 64, 64, 64], f16), T([32, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 64, 64], f16), T([32, 128, 64, 64], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 64, 64], f16), T([32, 128, 64, 64], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 128, 128], f16), T([32, 64, 128, 128], f16), T([128, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 128, 128], f16), T([32, 64, 128, 128], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 128, 128], f16), T([32, 3, 256, 256], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 256, 256], f16), T([32, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2048, 8, 8], f16, stride=(2048, 1, 0, 0)), 64), {}) +cnt: 2, ((T([32, 512, 8, 8], f16, stride=(512, 1, 0, 0)), 64), {}) +cnt: 1, ((T([32, 512, 16, 16], f16, stride=(512, 1, 0, 0)), 256), {}) +cnt: 22, ((T([32, 256, 16, 16], f16, stride=(256, 1, 0, 0)), 256), {}) +cnt: 1, ((T([32, 256, 32, 32], f16, stride=(256, 1, 0, 0)), 1024), {}) +cnt: 3, ((T([32, 128, 32, 32], f16, stride=(128, 1, 0, 0)), 1024), {}) +cnt: 1, ((T([32, 128, 64, 64], f16, stride=(128, 1, 0, 0)), 4096), {}) +cnt: 3, ((T([32, 64, 64, 64], f16, stride=(64, 1, 0, 0)), 4096), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 128, 128, 128], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 128, 64, 64], f16), T([32, 128, 128, 128], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 128, 64, 64], i64)), {}) +Operator: aten.mean.dim +cnt: 3, ((T([32, 64, 64, 64], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 128, 64, 64], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 256, 32, 32], f16), [2, 3], True), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 512, 16, 16], f16), [2, 3], True), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2048, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 2048], f16)), {}) +Operator: aten.mul.Tensor +cnt: 3, ((T([32, 2, 64, 64, 64], f16), T([32, 2, 64, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 128, 64, 64], f16), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 3, ((T([32, 2, 128, 32, 32], f16), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 256, 32, 32], f16), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 22, ((T([32, 2, 256, 16, 16], f16), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 512, 16, 16], f16), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 2, ((T([32, 2, 512, 8, 8], f16), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 2, ((T([32, 2, 512, 8, 8], f16, stride=(32768, 0, 64, 8, 1)), T([32, 2, 512, 8, 8], f16)), {}) +cnt: 2, ((T([32, 2, 512, 8, 8], f16, stride=(32768, 0, 64, 8, 1)), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 512, 16, 16], f16, stride=(131072, 0, 256, 16, 1)), T([32, 2, 512, 16, 16], f16)), {}) +cnt: 1, ((T([32, 2, 512, 16, 16], f16, stride=(131072, 0, 256, 16, 1)), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 22, ((T([32, 2, 256, 16, 16], f16, stride=(65536, 0, 256, 16, 1)), T([32, 2, 256, 16, 16], f16)), {}) +cnt: 22, ((T([32, 2, 256, 16, 16], f16, stride=(65536, 0, 256, 16, 1)), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 256, 32, 32], f16, stride=(262144, 0, 1024, 32, 1)), T([32, 2, 256, 32, 32], f16)), {}) +cnt: 1, ((T([32, 2, 256, 32, 32], f16, stride=(262144, 0, 1024, 32, 1)), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 3, ((T([32, 2, 128, 32, 32], f16, stride=(131072, 0, 1024, 32, 1)), T([32, 2, 128, 32, 32], f16)), {}) +cnt: 3, ((T([32, 2, 128, 32, 32], f16, stride=(131072, 0, 1024, 32, 1)), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 128, 64, 64], f16, stride=(524288, 0, 4096, 64, 1)), T([32, 2, 128, 64, 64], f16)), {}) +cnt: 1, ((T([32, 2, 128, 64, 64], f16, stride=(524288, 0, 4096, 64, 1)), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 3, ((T([32, 2, 64, 64, 64], f16, stride=(262144, 0, 4096, 64, 1)), T([32, 2, 64, 64, 64], f16)), {}) +cnt: 3, ((T([32, 2, 64, 64, 64], f16, stride=(262144, 0, 4096, 64, 1)), T([32, 2, 64, 1, 1], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 32, 1, 1], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 23, ((T([32, 128, 1, 1], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 25, ((T([32, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 23, ((T([32, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 1024, 8, 8], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([32, 2048, 8, 8], f16), T([32, 2048, 8, 8], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 1024, 8, 8], f16), T([32, 1024, 8, 8], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), T([32, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 25, ((T([32, 1024, 16, 16], f16), T([32, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 23, ((T([32, 512, 16, 16], f16), T([32, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 23, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), T([32, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 512, 32, 32], f16), T([32, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 256, 32, 32], f16), T([32, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), T([32, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 256, 64, 64], f16), T([32, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 128, 64, 64], f16), T([32, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 64, 64, 64], f16), T([32, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 128, 128], f16), T([32, 128, 128, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 128, 128], f16), T([32, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 64, 128, 128], f16),), {}) +cnt: 1, ((T([32, 128, 128, 128], f16),), {}) +cnt: 3, ((T([32, 64, 64, 64], f16),), {}) +cnt: 4, ((T([32, 128, 64, 64], f16),), {}) +cnt: 3, ((T([32, 32, 1, 1], f16),), {}) +cnt: 4, ((T([32, 256, 64, 64], f16),), {}) +cnt: 4, ((T([32, 64, 1, 1], f16),), {}) +cnt: 5, ((T([32, 512, 32, 32], f16),), {}) +cnt: 3, ((T([32, 128, 32, 32], f16),), {}) +cnt: 4, ((T([32, 256, 32, 32], f16),), {}) +cnt: 23, ((T([32, 128, 1, 1], f16),), {}) +cnt: 24, ((T([32, 1024, 16, 16], f16),), {}) +cnt: 22, ((T([32, 256, 16, 16], f16),), {}) +cnt: 23, ((T([32, 512, 16, 16], f16),), {}) +cnt: 3, ((T([32, 256, 1, 1], f16),), {}) +cnt: 3, ((T([32, 2048, 8, 8], f16),), {}) +cnt: 2, ((T([32, 512, 8, 8], f16),), {}) +cnt: 2, ((T([32, 1024, 8, 8], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +cnt: 2, ((T([32, 2, 512, 8, 8], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 512, 16, 16], f16), [3, 4], True), {}) +cnt: 22, ((T([32, 2, 256, 16, 16], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 256, 32, 32], f16), [3, 4], True), {}) +cnt: 3, ((T([32, 2, 128, 32, 32], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 128, 64, 64], f16), [3, 4], True), {}) +cnt: 3, ((T([32, 2, 64, 64, 64], f16), [3, 4], True), {}) +Operator: aten.sum.dim_IntList +cnt: 6, ((T([32, 2, 64, 64, 64], f16), [1]), {}) +cnt: 2, ((T([32, 2, 128, 64, 64], f16), [1]), {}) +cnt: 6, ((T([32, 2, 128, 32, 32], f16), [1]), {}) +cnt: 2, ((T([32, 2, 256, 32, 32], f16), [1]), {}) +cnt: 44, ((T([32, 2, 256, 16, 16], f16), [1]), {}) +cnt: 2, ((T([32, 2, 512, 16, 16], f16), [1]), {}) +cnt: 4, ((T([32, 2, 512, 8, 8], f16), [1]), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([32, 2048, 8, 8], f16), T([32, 2048, 8, 8], f16), 0), {}) +cnt: 3, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 1024, 8, 8], f16), T([32, 1024, 8, 8], f16), 0), {}) +cnt: 2, ((T([32, 512, 8, 8], f16), T([32, 512, 8, 8], f16), 0), {}) +cnt: 24, ((T([32, 1024, 16, 16], f16), T([32, 1024, 16, 16], f16), 0), {}) +cnt: 23, ((T([32, 512, 16, 16], f16), T([32, 512, 16, 16], f16), 0), {}) +cnt: 23, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), 0), {}) +cnt: 22, ((T([32, 256, 16, 16], f16), T([32, 256, 16, 16], f16), 0), {}) +cnt: 5, ((T([32, 512, 32, 32], f16), T([32, 512, 32, 32], f16), 0), {}) +cnt: 4, ((T([32, 256, 32, 32], f16), T([32, 256, 32, 32], f16), 0), {}) +cnt: 4, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), 0), {}) +cnt: 3, ((T([32, 128, 32, 32], f16), T([32, 128, 32, 32], f16), 0), {}) +cnt: 4, ((T([32, 256, 64, 64], f16), T([32, 256, 64, 64], f16), 0), {}) +cnt: 4, ((T([32, 128, 64, 64], f16), T([32, 128, 64, 64], f16), 0), {}) +cnt: 3, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), 0), {}) +cnt: 3, ((T([32, 64, 64, 64], f16), T([32, 64, 64, 64], f16), 0), {}) +cnt: 1, ((T([32, 128, 128, 128], f16), T([32, 128, 128, 128], f16), 0), {}) +cnt: 2, ((T([32, 64, 128, 128], f16), T([32, 64, 128, 128], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnet18_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnet18_training.txt new file mode 100644 index 000000000..ef201d6c1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/resnet18_training.txt @@ -0,0 +1,88 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16)), {}) +cnt: 2, ((T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16)), {}) +cnt: 2, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16)), {}) +cnt: 3, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 20, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16)), {}) +cnt: 2, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16)), {}) +cnt: 2, ((T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16)), {}) +cnt: 2, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 512], f16), T([512, 1000], f16, stride=(1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 128, 28, 28], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([256, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 14, 14], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([256, 128, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 14, 14], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 512, 7, 7], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 14, 14], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 7, 7], f16), T([128, 256, 14, 14], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 512, 7, 7], f16), T([128, 256, 14, 14], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 14, 14], f16), T([128, 128, 28, 28], f16), T([256, 128, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 256, 14, 14], f16), T([128, 128, 28, 28], f16), T([256, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([128, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 28, 28], f16), T([128, 64, 56, 56], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 512, 7, 7], f16, stride=(512, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 512, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 512], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 512], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 5, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 112, 112], f16),), {}) +cnt: 4, ((T([128, 64, 56, 56], f16),), {}) +cnt: 4, ((T([128, 128, 28, 28], f16),), {}) +cnt: 4, ((T([128, 256, 14, 14], f16),), {}) +cnt: 4, ((T([128, 512, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([128, 512, 7, 7], f16), T([128, 512, 7, 7], f16), 0), {}) +cnt: 4, ((T([128, 256, 14, 14], f16), T([128, 256, 14, 14], f16), 0), {}) +cnt: 4, ((T([128, 128, 28, 28], f16), T([128, 128, 28, 28], f16), 0), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 64, 112, 112], f16), T([128, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/rexnet_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/rexnet_100_training.txt new file mode 100644 index 000000000..739188b28 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/rexnet_100_training.txt @@ -0,0 +1,573 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 49, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 27, 56, 56], f16, stride=(119168, 3136, 56, 1)), T([128, 27, 56, 56], f16)), {}) +cnt: 2, ((T([128, 50, 28, 28], f16, stride=(47824, 784, 28, 1)), T([128, 50, 28, 28], f16)), {}) +cnt: 2, ((T([128, 72, 14, 14], f16, stride=(16464, 196, 14, 1)), T([128, 72, 14, 14], f16)), {}) +cnt: 2, ((T([128, 84, 14, 14], f16, stride=(18620, 196, 14, 1)), T([128, 84, 14, 14], f16)), {}) +cnt: 2, ((T([128, 95, 14, 14], f16, stride=(20776, 196, 14, 1)), T([128, 95, 14, 14], f16)), {}) +cnt: 2, ((T([128, 106, 14, 14], f16, stride=(22932, 196, 14, 1)), T([128, 106, 14, 14], f16)), {}) +cnt: 2, ((T([128, 117, 14, 14], f16, stride=(25088, 196, 14, 1)), T([128, 117, 14, 14], f16)), {}) +cnt: 2, ((T([128, 140, 7, 7], f16, stride=(7399, 49, 7, 1)), T([128, 140, 7, 7], f16)), {}) +cnt: 2, ((T([128, 151, 7, 7], f16, stride=(7938, 49, 7, 1)), T([128, 151, 7, 7], f16)), {}) +cnt: 2, ((T([128, 162, 7, 7], f16, stride=(8526, 49, 7, 1)), T([128, 162, 7, 7], f16)), {}) +cnt: 2, ((T([128, 174, 7, 7], f16, stride=(9065, 49, 7, 1)), T([128, 174, 7, 7], f16)), {}) +cnt: 1, ((T([128, 185, 7, 7], f16), T([128, 185, 7, 7], f16)), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16)), {}) +cnt: 1, ((T([128, 174, 7, 7], f16), T([128, 174, 7, 7], f16)), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16)), {}) +cnt: 1, ((T([128, 162, 7, 7], f16), T([128, 162, 7, 7], f16)), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16)), {}) +cnt: 1, ((T([128, 151, 7, 7], f16), T([128, 151, 7, 7], f16)), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16)), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16)), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16)), {}) +cnt: 1, ((T([128, 117, 14, 14], f16), T([128, 117, 14, 14], f16)), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16)), {}) +cnt: 1, ((T([128, 106, 14, 14], f16), T([128, 106, 14, 14], f16)), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16)), {}) +cnt: 1, ((T([128, 95, 14, 14], f16), T([128, 95, 14, 14], f16)), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16)), {}) +cnt: 1, ((T([128, 84, 14, 14], f16), T([128, 84, 14, 14], f16)), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16)), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([128, 366, 14, 14], f16)), {}) +cnt: 1, ((T([128, 61, 28, 28], f16), T([128, 61, 28, 28], f16)), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16)), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([128, 228, 28, 28], f16)), {}) +cnt: 1, ((T([128, 38, 56, 56], f16), T([128, 38, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 13, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 27, 56, 56], f16), T([128, 11, 56, 56], f16, stride=(119168, 3136, 56, 1))], 1), {}) +cnt: 1, (([T([128, 50, 28, 28], f16), T([128, 11, 28, 28], f16, stride=(47824, 784, 28, 1))], 1), {}) +cnt: 1, (([T([128, 72, 14, 14], f16), T([128, 12, 14, 14], f16, stride=(16464, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 84, 14, 14], f16), T([128, 11, 14, 14], f16, stride=(18620, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 95, 14, 14], f16), T([128, 11, 14, 14], f16, stride=(20776, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 106, 14, 14], f16), T([128, 11, 14, 14], f16, stride=(22932, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 117, 14, 14], f16), T([128, 11, 14, 14], f16, stride=(25088, 196, 14, 1))], 1), {}) +cnt: 1, (([T([128, 140, 7, 7], f16), T([128, 11, 7, 7], f16, stride=(7399, 49, 7, 1))], 1), {}) +cnt: 1, (([T([128, 151, 7, 7], f16), T([128, 11, 7, 7], f16, stride=(7938, 49, 7, 1))], 1), {}) +cnt: 1, (([T([128, 162, 7, 7], f16), T([128, 12, 7, 7], f16, stride=(8526, 49, 7, 1))], 1), {}) +cnt: 1, (([T([128, 174, 7, 7], f16), T([128, 11, 7, 7], f16, stride=(9065, 49, 7, 1))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 162, 56, 56], f16),), {}) +cnt: 1, ((T([128, 228, 56, 56], f16),), {}) +cnt: 1, ((T([128, 300, 28, 28], f16),), {}) +cnt: 1, ((T([128, 366, 28, 28], f16),), {}) +cnt: 1, ((T([128, 432, 14, 14], f16),), {}) +cnt: 1, ((T([128, 504, 14, 14], f16),), {}) +cnt: 1, ((T([128, 570, 14, 14], f16),), {}) +cnt: 1, ((T([128, 636, 14, 14], f16),), {}) +cnt: 1, ((T([128, 702, 14, 14], f16),), {}) +cnt: 1, ((T([128, 768, 14, 14], f16),), {}) +cnt: 1, ((T([128, 840, 7, 7], f16),), {}) +cnt: 1, ((T([128, 906, 7, 7], f16),), {}) +cnt: 1, ((T([128, 972, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([27, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 27, 56, 56], f16), T([162, 27, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([162, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 162), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([38, 162, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 38, 56, 56], f16), T([228, 38, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 228, 56, 56], f16), T([228, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 228), {}) +cnt: 1, ((T([128, 228, 1, 1], f16), T([19, 228, 1, 1], f16), T([19], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 19, 1, 1], f16), T([228, 19, 1, 1], f16), T([228], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([50, 228, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 50, 28, 28], f16), T([300, 50, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([300, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 300), {}) +cnt: 1, ((T([128, 300, 1, 1], f16), T([25, 300, 1, 1], f16), T([25], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 25, 1, 1], f16), T([300, 25, 1, 1], f16), T([300], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([61, 300, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 61, 28, 28], f16), T([366, 61, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 366, 28, 28], f16), T([366, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 366), {}) +cnt: 1, ((T([128, 366, 1, 1], f16), T([30, 366, 1, 1], f16), T([30], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 30, 1, 1], f16), T([366, 30, 1, 1], f16), T([366], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([72, 366, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([432, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([432, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 432), {}) +cnt: 1, ((T([128, 432, 1, 1], f16), T([36, 432, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 36, 1, 1], f16), T([432, 36, 1, 1], f16), T([432], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([84, 432, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 84, 14, 14], f16), T([504, 84, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([504, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 504), {}) +cnt: 1, ((T([128, 504, 1, 1], f16), T([42, 504, 1, 1], f16), T([42], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 42, 1, 1], f16), T([504, 42, 1, 1], f16), T([504], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([95, 504, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 95, 14, 14], f16), T([570, 95, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([570, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 570), {}) +cnt: 1, ((T([128, 570, 1, 1], f16), T([47, 570, 1, 1], f16), T([47], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 47, 1, 1], f16), T([570, 47, 1, 1], f16), T([570], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([106, 570, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 106, 14, 14], f16), T([636, 106, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([636, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 636), {}) +cnt: 1, ((T([128, 636, 1, 1], f16), T([53, 636, 1, 1], f16), T([53], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 53, 1, 1], f16), T([636, 53, 1, 1], f16), T([636], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([117, 636, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 117, 14, 14], f16), T([702, 117, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([702, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 702), {}) +cnt: 1, ((T([128, 702, 1, 1], f16), T([58, 702, 1, 1], f16), T([58], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 58, 1, 1], f16), T([702, 58, 1, 1], f16), T([702], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([768, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([768, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 768), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([64, 768, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([768, 64, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([140, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 140, 7, 7], f16), T([840, 140, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([840, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 840), {}) +cnt: 1, ((T([128, 840, 1, 1], f16), T([70, 840, 1, 1], f16), T([70], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 70, 1, 1], f16), T([840, 70, 1, 1], f16), T([840], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([151, 840, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 151, 7, 7], f16), T([906, 151, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([906, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 906), {}) +cnt: 1, ((T([128, 906, 1, 1], f16), T([75, 906, 1, 1], f16), T([75], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 75, 1, 1], f16), T([906, 75, 1, 1], f16), T([906], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([162, 906, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 162, 7, 7], f16), T([972, 162, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([972, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 972), {}) +cnt: 1, ((T([128, 972, 1, 1], f16), T([81, 972, 1, 1], f16), T([81], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 81, 1, 1], f16), T([972, 81, 1, 1], f16), T([972], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([174, 972, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 174, 7, 7], f16), T([1044, 174, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([1044, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1044), {}) +cnt: 1, ((T([128, 1044, 1, 1], f16), T([87, 1044, 1, 1], f16), T([87], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 87, 1, 1], f16), T([1044, 87, 1, 1], f16), T([1044], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([185, 1044, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 185, 7, 7], f16), T([1280, 185, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 185, 7, 7], f16), T([1280, 185, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 185, 7, 7], f16), T([128, 1044, 7, 7], f16), T([185, 1044, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1044, 1, 1], f16), T([128, 87, 1, 1], f16), T([1044, 87, 1, 1], f16), [1044], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 87, 1, 1], f16), T([128, 1044, 1, 1], f16), T([87, 1044, 1, 1], f16), [87], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16), T([1044, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1044, [True, True, False]), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 174, 7, 7], f16), T([1044, 174, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 174, 7, 7], f16), T([128, 972, 7, 7], f16), T([174, 972, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 972, 1, 1], f16), T([128, 81, 1, 1], f16), T([972, 81, 1, 1], f16), [972], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 81, 1, 1], f16), T([128, 972, 1, 1], f16), T([81, 972, 1, 1], f16), [81], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16), T([972, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 972, [True, True, False]), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 162, 7, 7], f16), T([972, 162, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 162, 7, 7], f16), T([128, 906, 7, 7], f16), T([162, 906, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 906, 1, 1], f16), T([128, 75, 1, 1], f16), T([906, 75, 1, 1], f16), [906], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 75, 1, 1], f16), T([128, 906, 1, 1], f16), T([75, 906, 1, 1], f16), [75], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16), T([906, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 906, [True, True, False]), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 151, 7, 7], f16), T([906, 151, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 151, 7, 7], f16), T([128, 840, 7, 7], f16), T([151, 840, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 840, 1, 1], f16), T([128, 70, 1, 1], f16), T([840, 70, 1, 1], f16), [840], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 70, 1, 1], f16), T([128, 840, 1, 1], f16), T([70, 840, 1, 1], f16), [70], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16), T([840, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 840, [True, True, False]), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 140, 7, 7], f16), T([840, 140, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 140, 7, 7], f16), T([128, 768, 7, 7], f16), T([140, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([128, 64, 1, 1], f16), T([768, 64, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 768, 1, 1], f16), T([64, 768, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 14, 14], f16), T([768, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 768, [True, True, False]), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 128, 14, 14], f16), T([768, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 702, 14, 14], f16), T([128, 702, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 702, 1, 1], f16), T([128, 58, 1, 1], f16), T([702, 58, 1, 1], f16), [702], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 58, 1, 1], f16), T([128, 702, 1, 1], f16), T([58, 702, 1, 1], f16), [58], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16), T([702, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 702, [True, True, False]), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 117, 14, 14], f16), T([702, 117, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 117, 14, 14], f16), T([128, 636, 14, 14], f16), T([117, 636, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 636, 1, 1], f16), T([128, 53, 1, 1], f16), T([636, 53, 1, 1], f16), [636], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 53, 1, 1], f16), T([128, 636, 1, 1], f16), T([53, 636, 1, 1], f16), [53], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16), T([636, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 636, [True, True, False]), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 106, 14, 14], f16), T([636, 106, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 106, 14, 14], f16), T([128, 570, 14, 14], f16), T([106, 570, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 570, 1, 1], f16), T([128, 47, 1, 1], f16), T([570, 47, 1, 1], f16), [570], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 47, 1, 1], f16), T([128, 570, 1, 1], f16), T([47, 570, 1, 1], f16), [47], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16), T([570, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 570, [True, True, False]), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 95, 14, 14], f16), T([570, 95, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 95, 14, 14], f16), T([128, 504, 14, 14], f16), T([95, 504, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 504, 1, 1], f16), T([128, 42, 1, 1], f16), T([504, 42, 1, 1], f16), [504], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 42, 1, 1], f16), T([128, 504, 1, 1], f16), T([42, 504, 1, 1], f16), [42], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16), T([504, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 504, [True, True, False]), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 84, 14, 14], f16), T([504, 84, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 84, 14, 14], f16), T([128, 432, 14, 14], f16), T([84, 432, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 432, 1, 1], f16), T([128, 36, 1, 1], f16), T([432, 36, 1, 1], f16), [432], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 36, 1, 1], f16), T([128, 432, 1, 1], f16), T([36, 432, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16), T([432, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 432, [True, True, False]), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 72, 14, 14], f16), T([432, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([128, 366, 14, 14], f16), T([72, 366, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 366, 1, 1], f16), T([128, 30, 1, 1], f16), T([366, 30, 1, 1], f16), [366], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 30, 1, 1], f16), T([128, 366, 1, 1], f16), T([30, 366, 1, 1], f16), [30], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([128, 366, 28, 28], f16), T([366, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 366, [True, True, False]), {}) +cnt: 1, ((T([128, 366, 28, 28], f16), T([128, 61, 28, 28], f16), T([366, 61, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 61, 28, 28], f16), T([128, 300, 28, 28], f16), T([61, 300, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 300, 1, 1], f16), T([128, 25, 1, 1], f16), T([300, 25, 1, 1], f16), [300], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 25, 1, 1], f16), T([128, 300, 1, 1], f16), T([25, 300, 1, 1], f16), [25], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16), T([300, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 300, [True, True, False]), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 50, 28, 28], f16), T([300, 50, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 50, 28, 28], f16), T([128, 228, 28, 28], f16), T([50, 228, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 228, 1, 1], f16), T([128, 19, 1, 1], f16), T([228, 19, 1, 1], f16), [228], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 19, 1, 1], f16), T([128, 228, 1, 1], f16), T([19, 228, 1, 1], f16), [19], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([128, 228, 56, 56], f16), T([228, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 228, [True, True, False]), {}) +cnt: 1, ((T([128, 228, 56, 56], f16), T([128, 38, 56, 56], f16), T([228, 38, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 38, 56, 56], f16), T([128, 162, 56, 56], f16), T([38, 162, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([128, 162, 56, 56], f16), T([162, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 162, [True, True, False]), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([128, 27, 56, 56], f16), T([162, 27, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 27, 56, 56], f16), T([128, 96, 56, 56], f16), T([27, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 112, 112], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16, stride=(1044, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 972, 7, 7], f16, stride=(972, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 906, 7, 7], f16, stride=(906, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 840, 7, 7], f16, stride=(840, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 768, 7, 7], f16, stride=(768, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 702, 14, 14], f16, stride=(702, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 636, 14, 14], f16, stride=(636, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 570, 14, 14], f16, stride=(570, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 504, 14, 14], f16, stride=(504, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 432, 14, 14], f16, stride=(432, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 366, 14, 14], f16, stride=(366, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 300, 28, 28], f16, stride=(300, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 228, 28, 28], f16, stride=(228, 1, 0, 0)), 784), {}) +Operator: aten.hardtanh.default +cnt: 1, ((T([128, 32, 112, 112], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), 0.0, 6.0), {}) +Operator: aten.hardtanh_backward.default +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([128, 366, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([128, 228, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([128, 162, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0.0, 6.0), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 228, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 228, 28, 28], f16), T([128, 228, 1, 1], f16)), {}) +cnt: 2, ((T([128, 300, 28, 28], f16), T([128, 300, 1, 1], f16)), {}) +cnt: 2, ((T([128, 366, 14, 14], f16), T([128, 366, 1, 1], f16)), {}) +cnt: 2, ((T([128, 432, 14, 14], f16), T([128, 432, 1, 1], f16)), {}) +cnt: 2, ((T([128, 504, 14, 14], f16), T([128, 504, 1, 1], f16)), {}) +cnt: 2, ((T([128, 570, 14, 14], f16), T([128, 570, 1, 1], f16)), {}) +cnt: 2, ((T([128, 636, 14, 14], f16), T([128, 636, 1, 1], f16)), {}) +cnt: 2, ((T([128, 702, 14, 14], f16), T([128, 702, 1, 1], f16)), {}) +cnt: 2, ((T([128, 768, 7, 7], f16), T([128, 768, 1, 1], f16)), {}) +cnt: 2, ((T([128, 840, 7, 7], f16), T([128, 840, 1, 1], f16)), {}) +cnt: 2, ((T([128, 906, 7, 7], f16), T([128, 906, 1, 1], f16)), {}) +cnt: 2, ((T([128, 972, 7, 7], f16), T([128, 972, 1, 1], f16)), {}) +cnt: 2, ((T([128, 1044, 7, 7], f16), T([128, 1044, 1, 1], f16)), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16)), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16)), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16)), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16)), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16)), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16)), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16)), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16)), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16)), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([128, 366, 14, 14], f16)), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16)), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([128, 228, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 27, 56, 56], f16), T([27], f16), T([27], f16), T([27], f16), T([27], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 162, 56, 56], f16), T([162], f16), T([162], f16), T([162], f16), T([162], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 38, 56, 56], f16), T([38], f16), T([38], f16), T([38], f16), T([38], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 228, 56, 56], f16), T([228], f16), T([228], f16), T([228], f16), T([228], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([228], f16), T([228], f16), T([228], f16), T([228], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 19, 1, 1], f16), T([19], f16), T([19], f16), T([19], f16), T([19], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 50, 28, 28], f16), T([50], f16), T([50], f16), T([50], f16), T([50], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 300, 28, 28], f16), T([300], f16), T([300], f16), T([300], f16), T([300], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 25, 1, 1], f16), T([25], f16), T([25], f16), T([25], f16), T([25], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 61, 28, 28], f16), T([61], f16), T([61], f16), T([61], f16), T([61], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 366, 28, 28], f16), T([366], f16), T([366], f16), T([366], f16), T([366], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([366], f16), T([366], f16), T([366], f16), T([366], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 30, 1, 1], f16), T([30], f16), T([30], f16), T([30], f16), T([30], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 432, 14, 14], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 36, 1, 1], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 84, 14, 14], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 504, 14, 14], f16), T([504], f16), T([504], f16), T([504], f16), T([504], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 42, 1, 1], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 95, 14, 14], f16), T([95], f16), T([95], f16), T([95], f16), T([95], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 570, 14, 14], f16), T([570], f16), T([570], f16), T([570], f16), T([570], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 47, 1, 1], f16), T([47], f16), T([47], f16), T([47], f16), T([47], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 106, 14, 14], f16), T([106], f16), T([106], f16), T([106], f16), T([106], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 636, 14, 14], f16), T([636], f16), T([636], f16), T([636], f16), T([636], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 53, 1, 1], f16), T([53], f16), T([53], f16), T([53], f16), T([53], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 117, 14, 14], f16), T([117], f16), T([117], f16), T([117], f16), T([117], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 702, 14, 14], f16), T([702], f16), T([702], f16), T([702], f16), T([702], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 58, 1, 1], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 140, 7, 7], f16), T([140], f16), T([140], f16), T([140], f16), T([140], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 840, 7, 7], f16), T([840], f16), T([840], f16), T([840], f16), T([840], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 70, 1, 1], f16), T([70], f16), T([70], f16), T([70], f16), T([70], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 151, 7, 7], f16), T([151], f16), T([151], f16), T([151], f16), T([151], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 906, 7, 7], f16), T([906], f16), T([906], f16), T([906], f16), T([906], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 75, 1, 1], f16), T([75], f16), T([75], f16), T([75], f16), T([75], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 162, 7, 7], f16), T([162], f16), T([162], f16), T([162], f16), T([162], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 972, 7, 7], f16), T([972], f16), T([972], f16), T([972], f16), T([972], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 81, 1, 1], f16), T([81], f16), T([81], f16), T([81], f16), T([81], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 174, 7, 7], f16), T([174], f16), T([174], f16), T([174], f16), T([174], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 1044, 7, 7], f16), T([1044], f16), T([1044], f16), T([1044], f16), T([1044], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 87, 1, 1], f16), T([87], f16), T([87], f16), T([87], f16), T([87], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 185, 7, 7], f16), T([185], f16), T([185], f16), T([185], f16), T([185], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 185, 7, 7], f16), T([128, 185, 7, 7], f16), T([185], f16), T([185], f16), T([185], f16), T([185], f32), T([185], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 87, 1, 1], f16), T([128, 87, 1, 1], f16), T([87], f16), T([87], f16), T([87], f16), T([87], f32), T([87], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16), T([1044], f16), T([1044], f16), T([1044], f16), T([1044], f32), T([1044], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 174, 7, 7], f16), T([128, 174, 7, 7], f16), T([174], f16), T([174], f16), T([174], f16), T([174], f32), T([174], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 81, 1, 1], f16), T([128, 81, 1, 1], f16), T([81], f16), T([81], f16), T([81], f16), T([81], f32), T([81], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16), T([972], f16), T([972], f16), T([972], f16), T([972], f32), T([972], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 162, 7, 7], f16), T([128, 162, 7, 7], f16), T([162], f16), T([162], f16), T([162], f16), T([162], f32), T([162], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 75, 1, 1], f16), T([128, 75, 1, 1], f16), T([75], f16), T([75], f16), T([75], f16), T([75], f32), T([75], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16), T([906], f16), T([906], f16), T([906], f16), T([906], f32), T([906], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 151, 7, 7], f16), T([128, 151, 7, 7], f16), T([151], f16), T([151], f16), T([151], f16), T([151], f32), T([151], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 70, 1, 1], f16), T([128, 70, 1, 1], f16), T([70], f16), T([70], f16), T([70], f16), T([70], f32), T([70], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16), T([840], f16), T([840], f16), T([840], f16), T([840], f32), T([840], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 140, 7, 7], f16), T([128, 140, 7, 7], f16), T([140], f16), T([140], f16), T([140], f16), T([140], f32), T([140], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 14, 14], f16), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 58, 1, 1], f16), T([128, 58, 1, 1], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f32), T([58], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16), T([702], f16), T([702], f16), T([702], f16), T([702], f32), T([702], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 117, 14, 14], f16), T([128, 117, 14, 14], f16), T([117], f16), T([117], f16), T([117], f16), T([117], f32), T([117], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 53, 1, 1], f16), T([128, 53, 1, 1], f16), T([53], f16), T([53], f16), T([53], f16), T([53], f32), T([53], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16), T([636], f16), T([636], f16), T([636], f16), T([636], f32), T([636], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 106, 14, 14], f16), T([128, 106, 14, 14], f16), T([106], f16), T([106], f16), T([106], f16), T([106], f32), T([106], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 47, 1, 1], f16), T([128, 47, 1, 1], f16), T([47], f16), T([47], f16), T([47], f16), T([47], f32), T([47], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16), T([570], f16), T([570], f16), T([570], f16), T([570], f32), T([570], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 95, 14, 14], f16), T([128, 95, 14, 14], f16), T([95], f16), T([95], f16), T([95], f16), T([95], f32), T([95], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 42, 1, 1], f16), T([128, 42, 1, 1], f16), T([42], f16), T([42], f16), T([42], f16), T([42], f32), T([42], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16), T([504], f16), T([504], f16), T([504], f16), T([504], f32), T([504], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 84, 14, 14], f16), T([128, 84, 14, 14], f16), T([84], f16), T([84], f16), T([84], f16), T([84], f32), T([84], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 36, 1, 1], f16), T([128, 36, 1, 1], f16), T([36], f16), T([36], f16), T([36], f16), T([36], f32), T([36], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16), T([432], f16), T([432], f16), T([432], f16), T([432], f32), T([432], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 72, 14, 14], f16), T([128, 72, 14, 14], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 30, 1, 1], f16), T([128, 30, 1, 1], f16), T([30], f16), T([30], f16), T([30], f16), T([30], f32), T([30], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), T([128, 366, 14, 14], f16), T([366], f16), T([366], f16), T([366], f16), T([366], f32), T([366], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 366, 28, 28], f16), T([128, 366, 28, 28], f16), T([366], f16), T([366], f16), T([366], f16), T([366], f32), T([366], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 61, 28, 28], f16), T([128, 61, 28, 28], f16), T([61], f16), T([61], f16), T([61], f16), T([61], f32), T([61], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 25, 1, 1], f16), T([128, 25, 1, 1], f16), T([25], f16), T([25], f16), T([25], f16), T([25], f32), T([25], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16), T([300], f16), T([300], f16), T([300], f16), T([300], f32), T([300], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 50, 28, 28], f16), T([128, 50, 28, 28], f16), T([50], f16), T([50], f16), T([50], f16), T([50], f32), T([50], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 19, 1, 1], f16), T([128, 19, 1, 1], f16), T([19], f16), T([19], f16), T([19], f16), T([19], f32), T([19], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), T([128, 228, 28, 28], f16), T([228], f16), T([228], f16), T([228], f16), T([228], f32), T([228], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 228, 56, 56], f16), T([128, 228, 56, 56], f16), T([228], f16), T([228], f16), T([228], f16), T([228], f32), T([228], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 38, 56, 56], f16), T([128, 38, 56, 56], f16), T([38], f16), T([38], f16), T([38], f16), T([38], f32), T([38], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 162, 56, 56], f16), T([128, 162, 56, 56], f16), T([162], f16), T([162], f16), T([162], f16), T([162], f32), T([162], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 27, 56, 56], f16), T([128, 27, 56, 56], f16), T([27], f16), T([27], f16), T([27], f16), T([27], f32), T([27], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 19, 1, 1], f16),), {}) +cnt: 1, ((T([128, 25, 1, 1], f16),), {}) +cnt: 1, ((T([128, 30, 1, 1], f16),), {}) +cnt: 1, ((T([128, 36, 1, 1], f16),), {}) +cnt: 1, ((T([128, 42, 1, 1], f16),), {}) +cnt: 1, ((T([128, 47, 1, 1], f16),), {}) +cnt: 1, ((T([128, 53, 1, 1], f16),), {}) +cnt: 1, ((T([128, 58, 1, 1], f16),), {}) +cnt: 1, ((T([128, 64, 1, 1], f16),), {}) +cnt: 1, ((T([128, 70, 1, 1], f16),), {}) +cnt: 1, ((T([128, 75, 1, 1], f16),), {}) +cnt: 1, ((T([128, 81, 1, 1], f16),), {}) +cnt: 1, ((T([128, 87, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 228, 1, 1], f16),), {}) +cnt: 1, ((T([128, 300, 1, 1], f16),), {}) +cnt: 1, ((T([128, 366, 1, 1], f16),), {}) +cnt: 1, ((T([128, 432, 1, 1], f16),), {}) +cnt: 1, ((T([128, 504, 1, 1], f16),), {}) +cnt: 1, ((T([128, 570, 1, 1], f16),), {}) +cnt: 1, ((T([128, 636, 1, 1], f16),), {}) +cnt: 1, ((T([128, 702, 1, 1], f16),), {}) +cnt: 1, ((T([128, 768, 1, 1], f16),), {}) +cnt: 1, ((T([128, 840, 1, 1], f16),), {}) +cnt: 1, ((T([128, 906, 1, 1], f16),), {}) +cnt: 1, ((T([128, 972, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1044, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([128, 1044, 1, 1], f16), T([128, 1044, 1, 1], f16)), {}) +cnt: 1, ((T([128, 972, 1, 1], f16), T([128, 972, 1, 1], f16)), {}) +cnt: 1, ((T([128, 906, 1, 1], f16), T([128, 906, 1, 1], f16)), {}) +cnt: 1, ((T([128, 840, 1, 1], f16), T([128, 840, 1, 1], f16)), {}) +cnt: 1, ((T([128, 768, 1, 1], f16), T([128, 768, 1, 1], f16)), {}) +cnt: 1, ((T([128, 702, 1, 1], f16), T([128, 702, 1, 1], f16)), {}) +cnt: 1, ((T([128, 636, 1, 1], f16), T([128, 636, 1, 1], f16)), {}) +cnt: 1, ((T([128, 570, 1, 1], f16), T([128, 570, 1, 1], f16)), {}) +cnt: 1, ((T([128, 504, 1, 1], f16), T([128, 504, 1, 1], f16)), {}) +cnt: 1, ((T([128, 432, 1, 1], f16), T([128, 432, 1, 1], f16)), {}) +cnt: 1, ((T([128, 366, 1, 1], f16), T([128, 366, 1, 1], f16)), {}) +cnt: 1, ((T([128, 300, 1, 1], f16), T([128, 300, 1, 1], f16)), {}) +cnt: 1, ((T([128, 228, 1, 1], f16), T([128, 228, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 162, 56, 56], f16),), {}) +cnt: 1, ((T([128, 228, 56, 56], f16),), {}) +cnt: 1, ((T([128, 300, 28, 28], f16),), {}) +cnt: 1, ((T([128, 366, 28, 28], f16),), {}) +cnt: 1, ((T([128, 432, 14, 14], f16),), {}) +cnt: 1, ((T([128, 504, 14, 14], f16),), {}) +cnt: 1, ((T([128, 570, 14, 14], f16),), {}) +cnt: 1, ((T([128, 636, 14, 14], f16),), {}) +cnt: 1, ((T([128, 702, 14, 14], f16),), {}) +cnt: 1, ((T([128, 768, 14, 14], f16),), {}) +cnt: 1, ((T([128, 840, 7, 7], f16),), {}) +cnt: 1, ((T([128, 906, 7, 7], f16),), {}) +cnt: 1, ((T([128, 972, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16)), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), T([128, 1044, 7, 7], f16)), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), T([128, 972, 7, 7], f16)), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), T([128, 906, 7, 7], f16)), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), T([128, 840, 7, 7], f16)), {}) +cnt: 1, ((T([128, 768, 14, 14], f16), T([128, 768, 14, 14], f16)), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), T([128, 702, 14, 14], f16)), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), T([128, 636, 14, 14], f16)), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), T([128, 570, 14, 14], f16)), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), T([128, 504, 14, 14], f16)), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), T([128, 432, 14, 14], f16)), {}) +cnt: 1, ((T([128, 366, 28, 28], f16), T([128, 366, 28, 28], f16)), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), T([128, 300, 28, 28], f16)), {}) +cnt: 1, ((T([128, 228, 56, 56], f16), T([128, 228, 56, 56], f16)), {}) +cnt: 1, ((T([128, 162, 56, 56], f16), T([128, 162, 56, 56], f16)), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([128, 11, 7, 7], f16, stride=(9065, 49, 7, 1)), [128, 185, 7, 7], 1, 174, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 185, 7, 7], f16), [128, 185, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 174, 7, 7], f16, stride=(9065, 49, 7, 1)), [128, 185, 7, 7], 1, 0, 174, 1), {}) +cnt: 1, ((T([128, 12, 7, 7], f16, stride=(8526, 49, 7, 1)), [128, 174, 7, 7], 1, 162, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 174, 7, 7], f16), [128, 174, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 162, 7, 7], f16, stride=(8526, 49, 7, 1)), [128, 174, 7, 7], 1, 0, 162, 1), {}) +cnt: 1, ((T([128, 11, 7, 7], f16, stride=(7938, 49, 7, 1)), [128, 162, 7, 7], 1, 151, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 162, 7, 7], f16), [128, 162, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 151, 7, 7], f16, stride=(7938, 49, 7, 1)), [128, 162, 7, 7], 1, 0, 151, 1), {}) +cnt: 1, ((T([128, 11, 7, 7], f16, stride=(7399, 49, 7, 1)), [128, 151, 7, 7], 1, 140, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 151, 7, 7], f16), [128, 151, 7, 7], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 140, 7, 7], f16, stride=(7399, 49, 7, 1)), [128, 151, 7, 7], 1, 0, 140, 1), {}) +cnt: 1, ((T([128, 11, 14, 14], f16, stride=(25088, 196, 14, 1)), [128, 128, 14, 14], 1, 117, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 128, 14, 14], f16), [128, 128, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 117, 14, 14], f16, stride=(25088, 196, 14, 1)), [128, 128, 14, 14], 1, 0, 117, 1), {}) +cnt: 1, ((T([128, 11, 14, 14], f16, stride=(22932, 196, 14, 1)), [128, 117, 14, 14], 1, 106, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 117, 14, 14], f16), [128, 117, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 106, 14, 14], f16, stride=(22932, 196, 14, 1)), [128, 117, 14, 14], 1, 0, 106, 1), {}) +cnt: 1, ((T([128, 11, 14, 14], f16, stride=(20776, 196, 14, 1)), [128, 106, 14, 14], 1, 95, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 106, 14, 14], f16), [128, 106, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 95, 14, 14], f16, stride=(20776, 196, 14, 1)), [128, 106, 14, 14], 1, 0, 95, 1), {}) +cnt: 1, ((T([128, 11, 14, 14], f16, stride=(18620, 196, 14, 1)), [128, 95, 14, 14], 1, 84, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 95, 14, 14], f16), [128, 95, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 84, 14, 14], f16, stride=(18620, 196, 14, 1)), [128, 95, 14, 14], 1, 0, 84, 1), {}) +cnt: 1, ((T([128, 12, 14, 14], f16, stride=(16464, 196, 14, 1)), [128, 84, 14, 14], 1, 72, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 84, 14, 14], f16), [128, 84, 14, 14], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 72, 14, 14], f16, stride=(16464, 196, 14, 1)), [128, 84, 14, 14], 1, 0, 72, 1), {}) +cnt: 1, ((T([128, 11, 28, 28], f16, stride=(47824, 784, 28, 1)), [128, 61, 28, 28], 1, 50, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 61, 28, 28], f16), [128, 61, 28, 28], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 50, 28, 28], f16, stride=(47824, 784, 28, 1)), [128, 61, 28, 28], 1, 0, 50, 1), {}) +cnt: 1, ((T([128, 11, 56, 56], f16, stride=(119168, 3136, 56, 1)), [128, 38, 56, 56], 1, 27, 9223372036854775807, 1), {}) +cnt: 2, ((T([128, 38, 56, 56], f16), [128, 38, 56, 56], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([128, 27, 56, 56], f16, stride=(119168, 3136, 56, 1)), [128, 38, 56, 56], 1, 0, 27, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 1044, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 972, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 906, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 840, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 702, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 636, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 570, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 504, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 432, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 366, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 300, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 228, 28, 28], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 87, 1, 1], f16), T([128, 87, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 81, 1, 1], f16), T([128, 81, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 75, 1, 1], f16), T([128, 75, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 70, 1, 1], f16), T([128, 70, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 64, 1, 1], f16), T([128, 64, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 58, 1, 1], f16), T([128, 58, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 53, 1, 1], f16), T([128, 53, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 47, 1, 1], f16), T([128, 47, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 42, 1, 1], f16), T([128, 42, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 36, 1, 1], f16), T([128, 36, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 30, 1, 1], f16), T([128, 30, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 25, 1, 1], f16), T([128, 25, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 19, 1, 1], f16), T([128, 19, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/sebotnet33ts_256_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/sebotnet33ts_256_training.txt new file mode 100644 index 000000000..cdfa544bf --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/sebotnet33ts_256_training.txt @@ -0,0 +1,334 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 1, ((T([256, 1024, 1024], f16), -1, False), {}) +cnt: 2, ((T([256, 256, 256], f16), -1, False), {}) +cnt: 1, ((T([256, 64, 64], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([256, 64, 64], f16), T([256, 64, 64], f16), -1, f16), {}) +cnt: 2, ((T([256, 256, 256], f16), T([256, 256, 256], f16), -1, f16), {}) +cnt: 1, ((T([256, 1024, 1024], f16), T([256, 1024, 1024], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 3, ((T([64, 128, 32, 32], f16), [256, 32, 1024]), {}) +cnt: 1, ((T([256, 1024, 1024], f16), [256, 1024, 1024]), {}) +cnt: 2, ((T([256, 32, 32, 32], f16), [262144, 32]), {}) +cnt: 2, ((T([262144, 63], f16), [256, 32, 32, 63]), {}) +cnt: 1, ((T([256, 32, 32, 32, 32], f16), [256, 1024, 1024]), {}) +cnt: 1, ((T([256, 1024, 32], f16), [256, 1024, 32]), {}) +cnt: 3, ((T([256, 32, 1024], f16), [64, 128, 32, 32]), {}) +cnt: 3, ((T([64, 256, 16, 16], f16), [256, 64, 256]), {}) +cnt: 2, ((T([256, 256, 256], f16), [256, 256, 256]), {}) +cnt: 2, ((T([256, 16, 16, 64], f16), [65536, 64]), {}) +cnt: 4, ((T([65536, 31], f16), [256, 16, 16, 31]), {}) +cnt: 2, ((T([256, 16, 16, 16, 16], f16), [256, 256, 256]), {}) +cnt: 1, ((T([256, 256, 64], f16), [256, 256, 64]), {}) +cnt: 3, ((T([256, 64, 256], f16), [64, 256, 16, 16]), {}) +cnt: 3, ((T([64, 512, 16, 16], f16), [256, 128, 256]), {}) +cnt: 2, ((T([256, 16, 16, 128], f16), [65536, 128]), {}) +cnt: 1, ((T([256, 256, 128], f16), [256, 256, 128]), {}) +cnt: 3, ((T([256, 128, 256], f16), [64, 512, 16, 16]), {}) +cnt: 3, ((T([64, 512, 8, 8], f16), [256, 128, 64]), {}) +cnt: 1, ((T([256, 64, 64], f16), [256, 64, 64]), {}) +cnt: 2, ((T([256, 8, 8, 128], f16), [16384, 128]), {}) +cnt: 2, ((T([16384, 15], f16), [256, 8, 8, 15]), {}) +cnt: 1, ((T([256, 8, 8, 8, 8], f16), [256, 64, 64]), {}) +cnt: 1, ((T([256, 64, 128], f16), [256, 64, 128]), {}) +cnt: 3, ((T([256, 128, 64], f16), [64, 512, 8, 8]), {}) +cnt: 1, ((T([256, 8, 8, 128], f16), [256, 64, 128]), {}) +cnt: 1, ((T([256, 16, 16, 128], f16), [256, 256, 128]), {}) +cnt: 1, ((T([256, 16, 16, 64], f16), [256, 256, 64]), {}) +cnt: 1, ((T([256, 32, 32, 32], f16), [256, 1024, 32]), {}) +Operator: aten.add.Tensor +cnt: 38, ((T([], i64), 1), {}) +cnt: 4, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16)), {}) +cnt: 6, ((T([64, 512, 32, 32], f16), T([64, 512, 32, 32], f16)), {}) +cnt: 1, ((T([256, 32, 32, 32, 32], f16, stride=(66528, 63, 2079, 1, 0)), T([256, 32, 32, 32, 32], f16, stride=(66528, 2079, 63, 0, 1))), {}) +cnt: 1, ((T([256, 1024, 1024], f16), T([256, 1024, 1024], f16)), {}) +cnt: 6, ((T([64, 1024, 16, 16], f16), T([64, 1024, 16, 16], f16)), {}) +cnt: 2, ((T([256, 16, 16, 16, 16], f16, stride=(8432, 31, 527, 1, 0)), T([256, 16, 16, 16, 16], f16, stride=(8432, 527, 31, 0, 1))), {}) +cnt: 2, ((T([256, 256, 256], f16), T([256, 256, 256], f16)), {}) +cnt: 3, ((T([64, 1536, 8, 8], f16), T([64, 1536, 8, 8], f16)), {}) +cnt: 1, ((T([256, 8, 8, 8, 8], f16, stride=(1080, 15, 135, 1, 0)), T([256, 8, 8, 8, 8], f16, stride=(1080, 135, 15, 0, 1))), {}) +cnt: 1, ((T([256, 64, 64], f16), T([256, 64, 64], f16)), {}) +cnt: 1, ((T([256, 8, 8, 128], f16, stride=(8192, 128, 1024, 1)), T([256, 8, 8, 128], f16)), {}) +cnt: 1, ((T([256, 64, 128], f16), T([256, 64, 128], f16)), {}) +cnt: 1, ((T([256, 16, 16, 128], f16, stride=(32768, 128, 2048, 1)), T([256, 16, 16, 128], f16)), {}) +cnt: 1, ((T([256, 256, 128], f16), T([256, 256, 128], f16)), {}) +cnt: 1, ((T([256, 16, 16, 64], f16, stride=(16384, 64, 1024, 1)), T([256, 16, 16, 64], f16)), {}) +cnt: 1, ((T([256, 256, 64], f16), T([256, 256, 64], f16)), {}) +cnt: 2, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16)), {}) +cnt: 1, ((T([256, 32, 32, 32], f16, stride=(32768, 32, 1024, 1)), T([256, 32, 32, 32], f16)), {}) +cnt: 1, ((T([256, 1024, 32], f16), T([256, 1024, 32], f16)), {}) +cnt: 2, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16)), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([64, 512, 16, 16], f16), [2, 2], [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 512, 16, 16], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +Operator: aten.bmm.default +cnt: 2, ((T([256, 1024, 32], f16, stride=(32768, 1, 1024)), T([256, 32, 1024], f16)), {}) +cnt: 2, ((T([256, 1024, 1024], f16), T([256, 1024, 32], f16, stride=(32768, 1, 1024))), {}) +cnt: 2, ((T([256, 256, 64], f16, stride=(16384, 1, 256)), T([256, 64, 256], f16)), {}) +cnt: 2, ((T([256, 256, 256], f16), T([256, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 2, ((T([256, 256, 128], f16, stride=(32768, 1, 256)), T([256, 128, 256], f16)), {}) +cnt: 2, ((T([256, 256, 256], f16), T([256, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 2, ((T([256, 64, 128], f16, stride=(8192, 1, 64)), T([256, 128, 64], f16)), {}) +cnt: 2, ((T([256, 64, 64], f16), T([256, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([256, 64, 64], f16, stride=(4096, 1, 64)), T([256, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 1, ((T([256, 128, 64], f16), T([256, 64, 64], f16)), {}) +cnt: 1, ((T([256, 256, 256], f16, stride=(65536, 1, 256)), T([256, 256, 128], f16, stride=(32768, 1, 256))), {}) +cnt: 1, ((T([256, 128, 256], f16), T([256, 256, 256], f16)), {}) +cnt: 1, ((T([256, 256, 256], f16, stride=(65536, 1, 256)), T([256, 256, 64], f16, stride=(16384, 1, 256))), {}) +cnt: 1, ((T([256, 64, 256], f16), T([256, 256, 256], f16)), {}) +cnt: 1, ((T([256, 1024, 1024], f16, stride=(1048576, 1, 1024)), T([256, 1024, 32], f16, stride=(32768, 1, 1024))), {}) +cnt: 1, ((T([256, 32, 1024], f16), T([256, 1024, 1024], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16)], 1), {}) +cnt: 1, (([T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16)], 1), {}) +cnt: 1, (([T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16)], 1), {}) +cnt: 1, (([T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 256, 256], f16),), {}) +cnt: 1, ((T([64, 24, 128, 128], f16),), {}) +cnt: 1, ((T([64, 32, 128, 128], f16),), {}) +cnt: 5, ((T([64, 64, 64, 64], f16),), {}) +cnt: 2, ((T([64, 256, 64, 64], f16),), {}) +cnt: 1, ((T([64, 128, 64, 64], f16),), {}) +cnt: 5, ((T([64, 128, 32, 32], f16),), {}) +cnt: 3, ((T([64, 512, 32, 32], f16),), {}) +cnt: 1, ((T([64, 256, 32, 32], f16),), {}) +cnt: 5, ((T([64, 256, 16, 16], f16),), {}) +cnt: 3, ((T([64, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 16, 16], f16),), {}) +cnt: 3, ((T([64, 512, 8, 8], f16),), {}) +cnt: 2, ((T([64, 1536, 8, 8], f16),), {}) +cnt: 1, ((T([64, 1280, 8, 8], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 2, ((T([8192, 32, 63], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([8192, 2048], f16), [0, 31], 0.0), {}) +cnt: 4, ((T([4096, 16, 31], f16), [0, 1], 0.0), {}) +cnt: 4, ((T([4096, 512], f16), [0, 15], 0.0), {}) +cnt: 2, ((T([2048, 8, 15], f16), [0, 1], 0.0), {}) +cnt: 2, ((T([2048, 128], f16), [0, 7], 0.0), {}) +cnt: 2, ((T([2048, 135], f16), [0, -7]), {}) +cnt: 2, ((T([2048, 8, 16], f16), [0, -1]), {}) +cnt: 4, ((T([4096, 527], f16), [0, -15]), {}) +cnt: 4, ((T([4096, 16, 32], f16), [0, -1]), {}) +cnt: 2, ((T([8192, 2079], f16), [0, -31]), {}) +cnt: 2, ((T([8192, 32, 64], f16), [0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([24, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 24, 128, 128], f16), T([32, 24, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 1, 1], f16), T([8, 64, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 8, 1, 1], f16), T([64, 8, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 64, 64, 64], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 64, 64], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 64, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 128, 1, 1], f16), T([8, 128, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 8, 1, 1], f16), T([128, 8, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 128, 32, 32], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 64, 64], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 512, 32, 32], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([384, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 32, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 256, 1, 1], f16), T([16, 256, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 16, 1, 1], f16), T([256, 16, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([768, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([1536, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 8, 8], f16), T([1536, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1024, 16, 16], f16), T([1536, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1536, 8, 8], f16), T([512, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 1536, 8, 8], f16), T([1280, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 1280, 8, 8], f16), T([64, 1536, 8, 8], f16), T([1280, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1536, 8, 8], f16), T([64, 512, 8, 8], f16), T([1536, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 8, 8], f16), T([64, 1536, 8, 8], f16), T([512, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1536, 8, 8], f16), T([64, 1024, 16, 16], f16), T([1536, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1536, 16, 16], f16), T([64, 512, 16, 16], f16), T([1536, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 1024, 16, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1024, 16, 16], f16), T([64, 256, 16, 16], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 768, 16, 16], f16), T([64, 256, 16, 16], f16), T([768, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 256, 16, 16], f16), T([64, 1024, 16, 16], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 256, 1, 1], f16), T([64, 16, 1, 1], f16), T([256, 16, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 16, 1, 1], f16), T([64, 256, 1, 1], f16), T([16, 256, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 1024, 16, 16], f16), T([64, 512, 32, 32], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 16, 16], f16), T([64, 256, 32, 32], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 512, 32, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 512, 32, 32], f16), T([64, 128, 32, 32], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 384, 32, 32], f16), T([64, 128, 32, 32], f16), T([384, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 128, 32, 32], f16), T([64, 512, 32, 32], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 128, 1, 1], f16), T([64, 8, 1, 1], f16), T([128, 8, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 8, 1, 1], f16), T([64, 128, 1, 1], f16), T([8, 128, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 512, 32, 32], f16), T([64, 256, 64, 64], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 32, 32], f16), T([64, 128, 64, 64], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 256, 64, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 256, 64, 64], f16), T([64, 64, 64, 64], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 64, 1, 1], f16), T([64, 8, 1, 1], f16), T([64, 8, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 8, 1, 1], f16), T([64, 64, 1, 1], f16), T([8, 64, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 256, 64, 64], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 64, 64], f16), T([64, 32, 128, 128], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 24, 128, 128], f16), T([32, 24, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 24, 128, 128], f16), T([64, 3, 256, 256], f16), T([24, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 256, 256], f16), T([64, 3, 256, 256], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1280, 8, 8], f16, stride=(1280, 1, 0, 0)), 64), {}) +cnt: 2, ((T([64, 256, 16, 16], f16, stride=(256, 1, 0, 0)), 256), {}) +cnt: 2, ((T([64, 128, 32, 32], f16, stride=(128, 1, 0, 0)), 1024), {}) +cnt: 2, ((T([64, 64, 64, 64], f16, stride=(64, 1, 0, 0)), 4096), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 2, ((T([64, 64, 64, 64], f16), [2, 3], True), {}) +cnt: 2, ((T([64, 128, 32, 32], f16), [2, 3], True), {}) +cnt: 2, ((T([64, 256, 16, 16], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 1280, 8, 8], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 2, ((T([262144, 32], f16), T([32, 63], f16, stride=(1, 32))), {}) +cnt: 2, ((T([65536, 64], f16), T([64, 31], f16, stride=(1, 64))), {}) +cnt: 2, ((T([65536, 128], f16), T([128, 31], f16, stride=(1, 128))), {}) +cnt: 2, ((T([16384, 128], f16), T([128, 15], f16, stride=(1, 128))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1280], f16)), {}) +cnt: 2, ((T([15, 16384], f16, stride=(1, 15)), T([16384, 128], f16)), {}) +cnt: 2, ((T([16384, 15], f16), T([15, 128], f16)), {}) +cnt: 2, ((T([31, 65536], f16, stride=(1, 31)), T([65536, 128], f16)), {}) +cnt: 2, ((T([65536, 31], f16), T([31, 128], f16)), {}) +cnt: 2, ((T([31, 65536], f16, stride=(1, 31)), T([65536, 64], f16)), {}) +cnt: 2, ((T([65536, 31], f16), T([31, 64], f16)), {}) +cnt: 2, ((T([63, 262144], f16, stride=(1, 63)), T([262144, 32], f16)), {}) +cnt: 2, ((T([262144, 63], f16), T([63, 32], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([64, 64, 64, 64], f16), T([64, 64, 1, 1], f16)), {}) +cnt: 4, ((T([64, 128, 32, 32], f16), T([64, 128, 1, 1], f16)), {}) +cnt: 2, ((T([256, 1024, 1024], f16), 0.1767766952966369), {}) +cnt: 4, ((T([64, 256, 16, 16], f16), T([64, 256, 1, 1], f16)), {}) +cnt: 2, ((T([256, 256, 256], f16), 0.125), {}) +cnt: 2, ((T([256, 256, 256], f16), 0.08838834764831845), {}) +cnt: 2, ((T([256, 64, 64], f16), 0.08838834764831845), {}) +cnt: 2, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16)), {}) +cnt: 2, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16)), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([64, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([64, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([64, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([64, 1536, 8, 8], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([64, 1280, 8, 8], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([64, 1280, 8, 8], f16), T([64, 1280, 8, 8], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 1536, 8, 8], f16), T([64, 1536, 8, 8], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f32), T([1536], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 1024, 16, 16], f16), T([64, 1024, 16, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([64, 512, 32, 32], f16), T([64, 512, 32, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 128, 128], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([64, 24, 128, 128], f16), T([64, 24, 128, 128], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 4, ((T([64, 8, 1, 1], f16),), {}) +cnt: 2, ((T([64, 16, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 2, ((T([64, 64, 1, 1], f16),), {}) +cnt: 2, ((T([64, 128, 1, 1], f16),), {}) +cnt: 2, ((T([64, 256, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 2, ((T([64, 256, 1, 1], f16), T([64, 256, 1, 1], f16)), {}) +cnt: 2, ((T([64, 128, 1, 1], f16), T([64, 128, 1, 1], f16)), {}) +cnt: 2, ((T([64, 64, 1, 1], f16), T([64, 64, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([64, 24, 128, 128], f16),), {}) +cnt: 1, ((T([64, 32, 128, 128], f16),), {}) +cnt: 5, ((T([64, 64, 64, 64], f16),), {}) +cnt: 2, ((T([64, 256, 64, 64], f16),), {}) +cnt: 1, ((T([64, 128, 64, 64], f16),), {}) +cnt: 5, ((T([64, 128, 32, 32], f16),), {}) +cnt: 3, ((T([64, 512, 32, 32], f16),), {}) +cnt: 1, ((T([64, 256, 32, 32], f16),), {}) +cnt: 5, ((T([64, 256, 16, 16], f16),), {}) +cnt: 3, ((T([64, 1024, 16, 16], f16),), {}) +cnt: 1, ((T([64, 512, 16, 16], f16),), {}) +cnt: 3, ((T([64, 512, 8, 8], f16),), {}) +cnt: 2, ((T([64, 1536, 8, 8], f16),), {}) +cnt: 1, ((T([64, 1280, 8, 8], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([64, 1280, 8, 8], f16), T([64, 1280, 8, 8], f16)), {}) +cnt: 2, ((T([64, 1536, 8, 8], f16), T([64, 1536, 8, 8], f16)), {}) +cnt: 3, ((T([64, 512, 8, 8], f16), T([64, 512, 8, 8], f16)), {}) +cnt: 1, ((T([64, 512, 16, 16], f16), T([64, 512, 16, 16], f16)), {}) +cnt: 3, ((T([64, 1024, 16, 16], f16), T([64, 1024, 16, 16], f16)), {}) +cnt: 5, ((T([64, 256, 16, 16], f16), T([64, 256, 16, 16], f16)), {}) +cnt: 1, ((T([64, 256, 32, 32], f16), T([64, 256, 32, 32], f16)), {}) +cnt: 3, ((T([64, 512, 32, 32], f16), T([64, 512, 32, 32], f16)), {}) +cnt: 5, ((T([64, 128, 32, 32], f16), T([64, 128, 32, 32], f16)), {}) +cnt: 1, ((T([64, 128, 64, 64], f16), T([64, 128, 64, 64], f16)), {}) +cnt: 2, ((T([64, 256, 64, 64], f16), T([64, 256, 64, 64], f16)), {}) +cnt: 5, ((T([64, 64, 64, 64], f16), T([64, 64, 64, 64], f16)), {}) +cnt: 1, ((T([64, 32, 128, 128], f16), T([64, 32, 128, 128], f16)), {}) +cnt: 1, ((T([64, 24, 128, 128], f16), T([64, 24, 128, 128], f16)), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([2048, 8, 8], f16), [2048, 8, 15], 2, 7, 9223372036854775807, 1), {}) +cnt: 2, ((T([2048, 8, 15], f16), [2048, 9, 15], 1, 0, 8, 1), {}) +cnt: 2, ((T([2048, 9, 15], f16), [2048, 9, 15], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([4096, 16, 16], f16), [4096, 16, 31], 2, 15, 9223372036854775807, 1), {}) +cnt: 4, ((T([4096, 16, 31], f16), [4096, 17, 31], 1, 0, 16, 1), {}) +cnt: 4, ((T([4096, 17, 31], f16), [4096, 17, 31], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([8192, 32, 32], f16), [8192, 32, 63], 2, 31, 9223372036854775807, 1), {}) +cnt: 2, ((T([8192, 32, 63], f16), [8192, 33, 63], 1, 0, 32, 1), {}) +cnt: 2, ((T([8192, 33, 63], f16), [8192, 33, 63], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([64, 384, 32, 32], f16), [128, 128, 128], 1), {}) +cnt: 1, ((T([64, 768, 16, 16], f16), [256, 256, 256], 1), {}) +cnt: 1, ((T([64, 1536, 16, 16], f16), [512, 512, 512], 1), {}) +cnt: 1, ((T([64, 1536, 8, 8], f16), [512, 512, 512], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 1, ((T([256, 8, 8, 8, 8], f16, stride=(4096, 64, 1, 512, 8)), [2], True), {}) +cnt: 1, ((T([256, 8, 8, 8, 8], f16, stride=(4096, 512, 8, 64, 1)), [2], True), {}) +cnt: 2, ((T([256, 16, 16, 16, 16], f16, stride=(65536, 256, 1, 4096, 16)), [2], True), {}) +cnt: 2, ((T([256, 16, 16, 16, 16], f16, stride=(65536, 4096, 16, 256, 1)), [2], True), {}) +cnt: 2, ((T([64, 256, 16, 16], f16), [2, 3], True), {}) +cnt: 1, ((T([256, 32, 32, 32, 32], f16, stride=(1048576, 1024, 1, 32768, 32)), [2], True), {}) +cnt: 1, ((T([256, 32, 32, 32, 32], f16, stride=(1048576, 32768, 32, 1024, 1)), [2], True), {}) +cnt: 2, ((T([64, 128, 32, 32], f16), [2, 3], True), {}) +cnt: 2, ((T([64, 64, 64, 64], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([64, 16, 1, 1], f16), T([64, 16, 1, 1], f16), 0), {}) +cnt: 4, ((T([64, 8, 1, 1], f16), T([64, 8, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/selecsls42b_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/selecsls42b_training.txt new file mode 100644 index 000000000..bc42466c1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/selecsls42b_training.txt @@ -0,0 +1,167 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128, 152, 14, 14], f16, stride=(178752, 196, 14, 1)), T([128, 152, 14, 14], f16)), {}) +cnt: 2, ((T([128, 304, 14, 14], f16, stride=(178752, 196, 14, 1)), T([128, 304, 14, 14], f16)), {}) +cnt: 1, ((T([128, 152, 14, 14], f16, stride=(119168, 196, 14, 1)), T([128, 152, 14, 14], f16)), {}) +cnt: 1, ((T([128, 304, 14, 14], f16, stride=(119168, 196, 14, 1)), T([128, 304, 14, 14], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(338688, 784, 28, 1)), T([128, 72, 28, 28], f16)), {}) +cnt: 2, ((T([128, 144, 28, 28], f16, stride=(338688, 784, 28, 1)), T([128, 144, 28, 28], f16)), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(225792, 784, 28, 1)), T([128, 72, 28, 28], f16)), {}) +cnt: 1, ((T([128, 144, 28, 28], f16, stride=(225792, 784, 28, 1)), T([128, 144, 28, 28], f16)), {}) +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([128, 32, 56, 56], f16)), {}) +cnt: 2, ((T([128, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([128, 64, 56, 56], f16)), {}) +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 32, 56, 56], f16)), {}) +cnt: 1, ((T([128, 64, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 41, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 64, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([128, 64, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([128, 64, 56, 56], f16)], 1), {}) +cnt: 1, (([T([128, 144, 28, 28], f16), T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 144, 28, 28], f16), T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([128, 144, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 304, 14, 14], f16), T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 304, 14, 14], f16), T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), T([128, 304, 14, 14], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 64, 56, 56], f16), T([32, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 32, 56, 56], f16), T([64, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 56, 56], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([144, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 144, 28, 28], f16), T([144, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 144, 28, 28], f16), T([72, 144, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 72, 28, 28], f16), T([144, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([144, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([144, 144, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 432, 28, 28], f16), T([288, 432, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([304, 288, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 304, 14, 14], f16), T([304, 304, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 304, 14, 14], f16), T([152, 304, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 152, 14, 14], f16), T([304, 152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 608, 14, 14], f16), T([304, 608, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 304, 14, 14], f16), T([304, 304, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 912, 14, 14], f16), T([480, 912, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([960, 480, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([1024, 960, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([1280, 1024, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16), T([1024, 1280, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1024, 4, 4], f16), T([128, 1280, 4, 4], f16), T([1024, 1280, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16), T([128, 1024, 7, 7], f16), T([1280, 1024, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 960, 7, 7], f16), T([1024, 960, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 480, 14, 14], f16), T([960, 480, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 912, 14, 14], f16), T([480, 912, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), T([128, 304, 14, 14], f16), T([152, 304, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 304, 14, 14], f16), T([128, 152, 14, 14], f16), T([304, 152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 304, 14, 14], f16), T([128, 304, 14, 14], f16), T([304, 304, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 304, 14, 14], f16), T([128, 304, 14, 14], f16), T([304, 304, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 304, 14, 14], f16), T([128, 608, 14, 14], f16), T([304, 608, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 304, 14, 14], f16), T([128, 288, 28, 28], f16), T([304, 288, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([128, 432, 28, 28], f16), T([288, 432, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 72, 28, 28], f16), T([128, 144, 28, 28], f16), T([72, 144, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 28, 28], f16), T([128, 72, 28, 28], f16), T([144, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144, 144, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 288, 28, 28], f16), T([144, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 128, 56, 56], f16), T([144, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 192, 56, 56], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([128, 64, 56, 56], f16), T([32, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 32, 56, 56], f16), T([64, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 128, 56, 56], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 64, 56, 56], f16), T([128, 32, 112, 112], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1024, 4, 4], f16, stride=(1024, 1, 0, 0)), 16), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1024, 4, 4], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 304, 14, 14], f16), T([304], f16), T([304], f16), T([304], f16), T([304], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1024, 4, 4], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1024, 4, 4], f16), T([128, 1024, 4, 4], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16), T([128, 1280, 4, 4], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), T([152], f16), T([152], f16), T([152], f16), T([152], f32), T([152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 304, 14, 14], f16), T([128, 304, 14, 14], f16), T([304], f16), T([304], f16), T([304], f16), T([304], f32), T([304], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([128, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +cnt: 7, ((T([128, 64, 56, 56], f16),), {}) +cnt: 4, ((T([128, 32, 56, 56], f16),), {}) +cnt: 1, ((T([128, 128, 56, 56], f16),), {}) +cnt: 7, ((T([128, 144, 28, 28], f16),), {}) +cnt: 4, ((T([128, 72, 28, 28], f16),), {}) +cnt: 1, ((T([128, 288, 28, 28], f16),), {}) +cnt: 7, ((T([128, 304, 14, 14], f16),), {}) +cnt: 4, ((T([128, 152, 14, 14], f16),), {}) +cnt: 1, ((T([128, 480, 14, 14], f16),), {}) +cnt: 1, ((T([128, 960, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16),), {}) +cnt: 1, ((T([128, 1024, 4, 4], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1024, 4, 4], f16), T([128, 1024, 4, 4], f16), 0), {}) +cnt: 1, ((T([128, 1280, 4, 4], f16), T([128, 1280, 4, 4], f16), 0), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 960, 7, 7], f16), T([128, 960, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 152, 14, 14], f16, stride=(178752, 196, 14, 1)), T([128, 152, 14, 14], f16), 0), {}) +cnt: 7, ((T([128, 304, 14, 14], f16), T([128, 304, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 152, 14, 14], f16), T([128, 152, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 152, 14, 14], f16, stride=(119168, 196, 14, 1)), T([128, 152, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 288, 28, 28], f16), T([128, 288, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(338688, 784, 28, 1)), T([128, 72, 28, 28], f16), 0), {}) +cnt: 7, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), 0), {}) +cnt: 2, ((T([128, 72, 28, 28], f16), T([128, 72, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 72, 28, 28], f16, stride=(225792, 784, 28, 1)), T([128, 72, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 128, 56, 56], f16), T([128, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([128, 32, 56, 56], f16), 0), {}) +cnt: 7, ((T([128, 64, 56, 56], f16), T([128, 64, 56, 56], f16), 0), {}) +cnt: 2, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 32, 56, 56], f16, stride=(401408, 3136, 56, 1)), T([128, 32, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/spnasnet_100_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/spnasnet_100_training.txt new file mode 100644 index 000000000..5ffc25e3d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/spnasnet_100_training.txt @@ -0,0 +1,182 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 64, ((T([], i64), 1), {}) +cnt: 4, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 6, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 6, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 6, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16)), {}) +cnt: 6, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 72), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([40, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 120, 28, 28], f16), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 4, ((T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([240, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 240, 14, 14], f16), T([240, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([96, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 96, 14, 14], f16), T([288, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 288, 14, 14], f16), T([288, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 288), {}) +cnt: 3, ((T([128, 288, 14, 14], f16), T([96, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([576, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([192, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 7, 7], f16), T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([128, 576, 7, 7], f16), T([192, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 14, 14], f16), T([576, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([128, 96, 14, 14], f16), T([576, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 96, 14, 14], f16), T([128, 288, 14, 14], f16), T([96, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 288, 14, 14], f16), T([128, 288, 14, 14], f16), T([288, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 288, [True, True, False]), {}) +cnt: 3, ((T([128, 288, 14, 14], f16), T([128, 96, 14, 14], f16), T([288, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 14, 14], f16), T([128, 480, 14, 14], f16), T([96, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 3, ((T([128, 240, 14, 14], f16), T([128, 80, 14, 14], f16), T([240, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 40, 28, 28], f16), T([128, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([128, 120, 28, 28], f16), T([128, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 144, 28, 28], f16), T([40, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 56, 56], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 2, ((T([128, 72, 56, 56], f16), T([128, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 48, 56, 56], f16), T([24, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 112, 112], f16), T([48, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 16, 112, 112], f16), T([48, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([128, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 96, 14, 14], f16), T([128, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 288, 14, 14], f16), T([128, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 112, 112], f16),), {}) +cnt: 1, ((T([128, 48, 56, 56], f16),), {}) +cnt: 4, ((T([128, 72, 56, 56], f16),), {}) +cnt: 1, ((T([128, 144, 56, 56], f16),), {}) +cnt: 1, ((T([128, 144, 28, 28], f16),), {}) +cnt: 6, ((T([128, 120, 28, 28], f16),), {}) +cnt: 1, ((T([128, 240, 28, 28], f16),), {}) +cnt: 7, ((T([128, 240, 14, 14], f16),), {}) +cnt: 2, ((T([128, 480, 14, 14], f16),), {}) +cnt: 6, ((T([128, 288, 14, 14], f16),), {}) +cnt: 1, ((T([128, 576, 14, 14], f16),), {}) +cnt: 1, ((T([128, 576, 7, 7], f16),), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), 0), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 576, 7, 7], f16), T([128, 576, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 576, 14, 14], f16), T([128, 576, 14, 14], f16), 0), {}) +cnt: 6, ((T([128, 288, 14, 14], f16), T([128, 288, 14, 14], f16), 0), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), 0), {}) +cnt: 7, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), 0), {}) +cnt: 6, ((T([128, 120, 28, 28], f16), T([128, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), 0), {}) +cnt: 4, ((T([128, 72, 56, 56], f16), T([128, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 48, 56, 56], f16), T([128, 48, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 48, 112, 112], f16), T([128, 48, 112, 112], f16), 0), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swin_base_patch4_window7_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swin_base_patch4_window7_224_training.txt new file mode 100644 index 000000000..6076086ba --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swin_base_patch4_window7_224_training.txt @@ -0,0 +1,341 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 2, ((T([4096, 4, 49, 49], f16), -1, False), {}) +cnt: 2, ((T([1024, 8, 49, 49], f16), -1, False), {}) +cnt: 18, ((T([256, 16, 49, 49], f16), -1, False), {}) +cnt: 2, ((T([64, 32, 49, 49], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 2, ((T([64, 32, 49, 49], f16), T([64, 32, 49, 49], f16), -1, f16), {}) +cnt: 18, ((T([256, 16, 49, 49], f16), T([256, 16, 49, 49], f16), -1, f16), {}) +cnt: 2, ((T([1024, 8, 49, 49], f16), T([1024, 8, 49, 49], f16), -1, f16), {}) +cnt: 2, ((T([4096, 4, 49, 49], f16), T([4096, 4, 49, 49], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 6, ((T([4096, 4, 49, 32], f16), [16384, 49, 32]), {}) +cnt: 2, ((T([4096, 4, 32, 49], f16), [16384, 32, 49]), {}) +cnt: 2, ((T([16384, 49, 49], f16), [4096, 4, 49, 49]), {}) +cnt: 2, ((T([16384, 49, 32], f16), [4096, 4, 49, 32]), {}) +cnt: 2, ((T([4096, 49, 4, 32], f16), [4096, 49, 128]), {}) +cnt: 1, ((T([50176, 256], f16), [64, 784, 256]), {}) +cnt: 6, ((T([1024, 8, 49, 32], f16), [8192, 49, 32]), {}) +cnt: 2, ((T([1024, 8, 32, 49], f16), [8192, 32, 49]), {}) +cnt: 2, ((T([8192, 49, 49], f16), [1024, 8, 49, 49]), {}) +cnt: 2, ((T([8192, 49, 32], f16), [1024, 8, 49, 32]), {}) +cnt: 2, ((T([1024, 49, 8, 32], f16), [1024, 49, 256]), {}) +cnt: 1, ((T([12544, 512], f16), [64, 196, 512]), {}) +cnt: 54, ((T([256, 16, 49, 32], f16), [4096, 49, 32]), {}) +cnt: 18, ((T([256, 16, 32, 49], f16), [4096, 32, 49]), {}) +cnt: 18, ((T([4096, 49, 49], f16), [256, 16, 49, 49]), {}) +cnt: 18, ((T([4096, 49, 32], f16), [256, 16, 49, 32]), {}) +cnt: 18, ((T([256, 49, 16, 32], f16), [256, 49, 512]), {}) +cnt: 1, ((T([3136, 1024], f16), [64, 49, 1024]), {}) +cnt: 6, ((T([64, 32, 49, 32], f16), [2048, 49, 32]), {}) +cnt: 2, ((T([64, 32, 32, 49], f16), [2048, 32, 49]), {}) +cnt: 2, ((T([2048, 49, 49], f16), [64, 32, 49, 49]), {}) +cnt: 2, ((T([2048, 49, 32], f16), [64, 32, 49, 32]), {}) +cnt: 2, ((T([64, 49, 32, 32], f16), [64, 49, 1024]), {}) +cnt: 2, ((T([64, 49, 3, 32, 32], f16), [64, 49, 3072]), {}) +cnt: 18, ((T([64, 2, 2, 7, 7, 512], f16), [256, 7, 7, 512]), {}) +cnt: 18, ((T([256, 49, 3, 16, 32], f16), [256, 49, 1536]), {}) +cnt: 18, ((T([64, 2, 7, 2, 7, 512], f16), [64, 14, 14, 512]), {}) +cnt: 2, ((T([64, 4, 4, 7, 7, 256], f16), [1024, 7, 7, 256]), {}) +cnt: 2, ((T([1024, 49, 3, 8, 32], f16), [1024, 49, 768]), {}) +cnt: 2, ((T([64, 4, 7, 4, 7, 256], f16), [64, 28, 28, 256]), {}) +cnt: 2, ((T([64, 8, 8, 7, 7, 128], f16), [4096, 7, 7, 128]), {}) +cnt: 2, ((T([4096, 49, 3, 4, 32], f16), [4096, 49, 384]), {}) +cnt: 2, ((T([64, 8, 7, 8, 7, 128], f16), [64, 56, 56, 128]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([4096, 4, 49, 49], f16), T([1, 4, 49, 49], f16)), {}) +cnt: 8, ((T([64, 3136, 128], f16), T([64, 3136, 128], f16)), {}) +cnt: 1, ((T([64, 64, 4, 49, 49], f16), T([1, 64, 1, 49, 49], f16)), {}) +cnt: 2, ((T([1024, 8, 49, 49], f16), T([1, 8, 49, 49], f16)), {}) +cnt: 8, ((T([64, 784, 256], f16), T([64, 784, 256], f16)), {}) +cnt: 1, ((T([64, 16, 8, 49, 49], f16), T([1, 16, 1, 49, 49], f16)), {}) +cnt: 18, ((T([256, 16, 49, 49], f16), T([1, 16, 49, 49], f16)), {}) +cnt: 72, ((T([64, 196, 512], f16), T([64, 196, 512], f16)), {}) +cnt: 9, ((T([64, 4, 16, 49, 49], f16), T([1, 4, 1, 49, 49], f16)), {}) +cnt: 2, ((T([64, 32, 49, 49], f16), T([1, 32, 49, 49], f16)), {}) +cnt: 8, ((T([64, 49, 1024], f16), T([64, 49, 1024], f16)), {}) +cnt: 3, ((T([64, 14, 14, 512], f16), T([64, 14, 14, 512], f16)), {}) +cnt: 3, ((T([64, 28, 28, 256], f16), T([64, 28, 28, 256], f16)), {}) +cnt: 3, ((T([64, 56, 56, 128], f16), T([64, 56, 56, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 2, ((T([384], f16), T([200704, 128], f16), T([128, 384], f16, stride=(1, 128))), {}) +cnt: 2, ((T([128], f16), T([200704, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 2, ((T([512], f16), T([200704, 128], f16), T([128, 512], f16, stride=(1, 128))), {}) +cnt: 2, ((T([128], f16), T([200704, 512], f16), T([512, 128], f16, stride=(1, 512))), {}) +cnt: 2, ((T([768], f16), T([50176, 256], f16), T([256, 768], f16, stride=(1, 256))), {}) +cnt: 2, ((T([256], f16), T([50176, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +cnt: 2, ((T([1024], f16), T([50176, 256], f16), T([256, 1024], f16, stride=(1, 256))), {}) +cnt: 2, ((T([256], f16), T([50176, 1024], f16), T([1024, 256], f16, stride=(1, 1024))), {}) +cnt: 18, ((T([1536], f16), T([12544, 512], f16), T([512, 1536], f16, stride=(1, 512))), {}) +cnt: 18, ((T([512], f16), T([12544, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 18, ((T([2048], f16), T([12544, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 18, ((T([512], f16), T([12544, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 2, ((T([3072], f16), T([3136, 1024], f16), T([1024, 3072], f16, stride=(1, 1024))), {}) +cnt: 2, ((T([1024], f16), T([3136, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 2, ((T([4096], f16), T([3136, 1024], f16), T([1024, 4096], f16, stride=(1, 1024))), {}) +cnt: 2, ((T([1024], f16), T([3136, 4096], f16), T([4096, 1024], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([1000], f16), T([64, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.bernoulli_.float +cnt: 2, ((T([64, 1, 1], f16), 0.9956521736457944), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9913043472915888), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9869565209373832), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9826086945831776), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9782608672976494), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9739130418747663), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9695652164518833), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9652173891663551), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.960869561880827), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9565217345952988), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9521739110350609), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9478260837495327), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9434782564640045), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9391304329037666), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9347826093435287), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9304347857832909), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9260869547724724), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9217391312122345), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.917391300201416), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9130434766411781), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9086956530809402), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9043478220701218), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.8999999985098839), {}) +Operator: aten.bmm.default +cnt: 2, ((T([16384, 49, 32], f16), T([16384, 32, 49], f16)), {}) +cnt: 2, ((T([16384, 49, 49], f16), T([16384, 49, 32], f16)), {}) +cnt: 2, ((T([8192, 49, 32], f16), T([8192, 32, 49], f16)), {}) +cnt: 2, ((T([8192, 49, 49], f16), T([8192, 49, 32], f16)), {}) +cnt: 18, ((T([4096, 49, 32], f16), T([4096, 32, 49], f16)), {}) +cnt: 18, ((T([4096, 49, 49], f16), T([4096, 49, 32], f16)), {}) +cnt: 2, ((T([2048, 49, 32], f16), T([2048, 32, 49], f16)), {}) +cnt: 2, ((T([2048, 49, 49], f16), T([2048, 49, 32], f16)), {}) +cnt: 2, ((T([2048, 49, 49], f16, stride=(2401, 1, 49)), T([2048, 49, 32], f16)), {}) +cnt: 2, ((T([2048, 49, 32], f16), T([2048, 32, 49], f16, stride=(1568, 1, 32))), {}) +cnt: 2, ((T([2048, 32, 49], f16, stride=(1568, 1, 32)), T([2048, 49, 49], f16)), {}) +cnt: 2, ((T([2048, 49, 49], f16), T([2048, 49, 32], f16, stride=(1568, 1, 49))), {}) +cnt: 18, ((T([4096, 49, 49], f16, stride=(2401, 1, 49)), T([4096, 49, 32], f16)), {}) +cnt: 18, ((T([4096, 49, 32], f16), T([4096, 32, 49], f16, stride=(1568, 1, 32))), {}) +cnt: 18, ((T([4096, 32, 49], f16, stride=(1568, 1, 32)), T([4096, 49, 49], f16)), {}) +cnt: 18, ((T([4096, 49, 49], f16), T([4096, 49, 32], f16, stride=(1568, 1, 49))), {}) +cnt: 2, ((T([8192, 49, 49], f16, stride=(2401, 1, 49)), T([8192, 49, 32], f16)), {}) +cnt: 2, ((T([8192, 49, 32], f16), T([8192, 32, 49], f16, stride=(1568, 1, 32))), {}) +cnt: 2, ((T([8192, 32, 49], f16, stride=(1568, 1, 32)), T([8192, 49, 49], f16)), {}) +cnt: 2, ((T([8192, 49, 49], f16), T([8192, 49, 32], f16, stride=(1568, 1, 49))), {}) +cnt: 2, ((T([16384, 49, 49], f16, stride=(2401, 1, 49)), T([16384, 49, 32], f16)), {}) +cnt: 2, ((T([16384, 49, 32], f16), T([16384, 32, 49], f16, stride=(1568, 1, 32))), {}) +cnt: 2, ((T([16384, 32, 49], f16, stride=(1568, 1, 32)), T([16384, 49, 49], f16)), {}) +cnt: 2, ((T([16384, 49, 49], f16), T([16384, 49, 32], f16, stride=(1568, 1, 49))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 28, 28, 128], f16, stride=(401408, 14336, 256, 1)), T([64, 28, 28, 128], f16, stride=(401408, 14336, 256, 1)), T([64, 28, 28, 128], f16, stride=(401408, 14336, 256, 1)), T([64, 28, 28, 128], f16, stride=(401408, 14336, 256, 1))], -1), {}) +cnt: 1, (([T([64, 14, 14, 256], f16, stride=(200704, 14336, 512, 1)), T([64, 14, 14, 256], f16, stride=(200704, 14336, 512, 1)), T([64, 14, 14, 256], f16, stride=(200704, 14336, 512, 1)), T([64, 14, 14, 256], f16, stride=(200704, 14336, 512, 1))], -1), {}) +cnt: 1, (([T([64, 7, 7, 512], f16, stride=(100352, 14336, 1024, 1)), T([64, 7, 7, 512], f16, stride=(100352, 14336, 1024, 1)), T([64, 7, 7, 512], f16, stride=(100352, 14336, 1024, 1)), T([64, 7, 7, 512], f16, stride=(100352, 14336, 1024, 1))], -1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), T([128], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 128, 56, 56], f16, stride=(401408, 1, 7168, 128)), T([64, 3, 224, 224], f16), T([128, 3, 4, 4], f16), [128], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 49, 1024], f16, stride=(1024, 0, 1)), 49), {}) +Operator: aten.div_.Tensor +cnt: 2, ((T([64, 1, 1], f16), 0.9956521736457944), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9913043472915888), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9869565209373832), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9826086945831776), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9782608672976494), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9739130418747663), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9695652164518833), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9652173891663551), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.960869561880827), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9565217345952988), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9521739110350609), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9478260837495327), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9434782564640045), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9391304329037666), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9347826093435287), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9304347857832909), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9260869547724724), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9217391312122345), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.917391300201416), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9130434766411781), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9086956530809402), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.9043478220701218), {}) +cnt: 2, ((T([64, 1, 1], f16), 0.8999999985098839), {}) +Operator: aten.gelu.default +cnt: 2, ((T([64, 3136, 512], f16),), {}) +cnt: 2, ((T([64, 784, 1024], f16),), {}) +cnt: 18, ((T([64, 196, 2048], f16),), {}) +cnt: 2, ((T([64, 49, 4096], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 2, ((T([64, 49, 4096], f16), T([64, 49, 4096], f16)), {}) +cnt: 18, ((T([64, 196, 2048], f16), T([64, 196, 2048], f16)), {}) +cnt: 2, ((T([64, 784, 1024], f16), T([64, 784, 1024], f16)), {}) +cnt: 2, ((T([64, 3136, 512], f16), T([64, 3136, 512], f16)), {}) +Operator: aten.index.Tensor +cnt: 2, ((T([169, 4], f16), [T([2401], i64)]), {}) +cnt: 2, ((T([169, 8], f16), [T([2401], i64)]), {}) +cnt: 18, ((T([169, 16], f16), [T([2401], i64)]), {}) +cnt: 2, ((T([169, 32], f16), [T([2401], i64)]), {}) +Operator: aten.index_put.default +cnt: 2, ((T([169, 32], f16), [T([2401], i64)], T([2401, 32], f16, stride=(1, 2401)), True), {}) +cnt: 18, ((T([169, 16], f16), [T([2401], i64)], T([2401, 16], f16, stride=(1, 2401)), True), {}) +cnt: 2, ((T([169, 8], f16), [T([2401], i64)], T([2401, 8], f16, stride=(1, 2401)), True), {}) +cnt: 2, ((T([169, 4], f16), [T([2401], i64)], T([2401, 4], f16, stride=(1, 2401)), True), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 49, 1024], f16), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([50176, 512], f16), T([512, 256], f16, stride=(1, 512))), {}) +cnt: 1, ((T([12544, 1024], f16), T([1024, 512], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([3136, 2048], f16), T([2048, 1024], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1024], f16)), {}) +cnt: 2, ((T([3136, 1024], f16), T([1024, 4096], f16)), {}) +cnt: 2, ((T([1024, 3136], f16, stride=(1, 1024)), T([3136, 4096], f16)), {}) +cnt: 2, ((T([3136, 4096], f16), T([4096, 1024], f16)), {}) +cnt: 2, ((T([4096, 3136], f16, stride=(1, 4096)), T([3136, 1024], f16)), {}) +cnt: 2, ((T([3136, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 2, ((T([1024, 3136], f16, stride=(1, 1024)), T([3136, 1024], f16)), {}) +cnt: 2, ((T([3136, 3072], f16), T([3072, 1024], f16)), {}) +cnt: 2, ((T([3072, 3136], f16, stride=(1, 3072)), T([3136, 1024], f16)), {}) +cnt: 1, ((T([1024, 3136], f16, stride=(1, 1024)), T([3136, 2048], f16)), {}) +cnt: 1, ((T([3136, 1024], f16), T([1024, 2048], f16)), {}) +cnt: 18, ((T([12544, 512], f16), T([512, 2048], f16)), {}) +cnt: 18, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 2048], f16)), {}) +cnt: 18, ((T([12544, 2048], f16), T([2048, 512], f16)), {}) +cnt: 18, ((T([2048, 12544], f16, stride=(1, 2048)), T([12544, 512], f16)), {}) +cnt: 18, ((T([12544, 512], f16), T([512, 512], f16)), {}) +cnt: 18, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 512], f16)), {}) +cnt: 18, ((T([12544, 1536], f16), T([1536, 512], f16)), {}) +cnt: 18, ((T([1536, 12544], f16, stride=(1, 1536)), T([12544, 512], f16)), {}) +cnt: 1, ((T([512, 12544], f16, stride=(1, 512)), T([12544, 1024], f16)), {}) +cnt: 1, ((T([12544, 512], f16), T([512, 1024], f16)), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 1024], f16)), {}) +cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 1024], f16)), {}) +cnt: 2, ((T([50176, 1024], f16), T([1024, 256], f16)), {}) +cnt: 2, ((T([1024, 50176], f16, stride=(1, 1024)), T([50176, 256], f16)), {}) +cnt: 2, ((T([50176, 256], f16), T([256, 256], f16)), {}) +cnt: 2, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 256], f16)), {}) +cnt: 2, ((T([50176, 768], f16), T([768, 256], f16)), {}) +cnt: 2, ((T([768, 50176], f16, stride=(1, 768)), T([50176, 256], f16)), {}) +cnt: 1, ((T([256, 50176], f16, stride=(1, 256)), T([50176, 512], f16)), {}) +cnt: 1, ((T([50176, 256], f16), T([256, 512], f16)), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 512], f16)), {}) +cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 512], f16)), {}) +cnt: 2, ((T([200704, 512], f16), T([512, 128], f16)), {}) +cnt: 2, ((T([512, 200704], f16, stride=(1, 512)), T([200704, 128], f16)), {}) +cnt: 2, ((T([200704, 128], f16), T([128, 128], f16)), {}) +cnt: 2, ((T([128, 200704], f16, stride=(1, 128)), T([200704, 128], f16)), {}) +cnt: 2, ((T([200704, 384], f16), T([384, 128], f16)), {}) +cnt: 2, ((T([384, 200704], f16, stride=(1, 384)), T([200704, 128], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([4096, 4, 49, 32], f16, stride=(18816, 32, 384, 1)), 0.1767766952966369), {}) +cnt: 4, ((T([64, 3136, 128], f16), T([64, 1, 1], f16)), {}) +cnt: 2, ((T([1024, 8, 49, 32], f16, stride=(37632, 32, 768, 1)), 0.1767766952966369), {}) +cnt: 8, ((T([64, 784, 256], f16), T([64, 1, 1], f16)), {}) +cnt: 18, ((T([256, 16, 49, 32], f16, stride=(75264, 32, 1536, 1)), 0.1767766952966369), {}) +cnt: 72, ((T([64, 196, 512], f16), T([64, 1, 1], f16)), {}) +cnt: 2, ((T([64, 32, 49, 32], f16, stride=(150528, 32, 3072, 1)), 0.1767766952966369), {}) +cnt: 8, ((T([64, 49, 1024], f16), T([64, 1, 1], f16)), {}) +cnt: 2, ((T([64, 32, 49, 32], f16), 0.1767766952966369), {}) +cnt: 18, ((T([256, 16, 49, 32], f16), 0.1767766952966369), {}) +cnt: 2, ((T([1024, 8, 49, 32], f16), 0.1767766952966369), {}) +cnt: 2, ((T([4096, 4, 49, 32], f16), 0.1767766952966369), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([64, 3136, 128], f16, stride=(401408, 1, 3136)), [128], T([128], f16), T([128], f16), 1e-05), {}) +cnt: 4, ((T([64, 3136, 128], f16), [128], T([128], f16), T([128], f16), 1e-05), {}) +cnt: 1, ((T([64, 784, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +cnt: 4, ((T([64, 784, 256], f16), [256], T([256], f16), T([256], f16), 1e-05), {}) +cnt: 1, ((T([64, 196, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +cnt: 36, ((T([64, 196, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +cnt: 1, ((T([64, 49, 2048], f16), [2048], T([2048], f16), T([2048], f16), 1e-05), {}) +cnt: 5, ((T([64, 49, 1024], f16), [1024], T([1024], f16), T([1024], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 5, ((T([64, 49, 1024], f16), T([64, 49, 1024], f16), [1024], T([64, 49, 1], f32), T([64, 49, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 49, 2048], f16), T([64, 49, 2048], f16), [2048], T([64, 49, 1], f32), T([64, 49, 1], f32), T([2048], f16), T([2048], f16), [True, True, True]), {}) +cnt: 36, ((T([64, 196, 512], f16), T([64, 196, 512], f16), [512], T([64, 196, 1], f32), T([64, 196, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 196, 1024], f16), T([64, 196, 1024], f16), [1024], T([64, 196, 1], f32), T([64, 196, 1], f32), T([1024], f16), T([1024], f16), [True, True, True]), {}) +cnt: 4, ((T([64, 784, 256], f16), T([64, 784, 256], f16), [256], T([64, 784, 1], f32), T([64, 784, 1], f32), T([256], f16), T([256], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 784, 512], f16), T([64, 784, 512], f16), [512], T([64, 784, 1], f32), T([64, 784, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 4, ((T([64, 3136, 128], f16), T([64, 3136, 128], f16), [128], T([64, 3136, 1], f32), T([64, 3136, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 3136, 128], f16), T([64, 3136, 128], f16, stride=(401408, 1, 3136)), [128], T([64, 3136, 1], f32), T([64, 3136, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +Operator: aten.new_empty.default +cnt: 2, ((T([64, 3136, 128], f16), [64, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 4, ((T([64, 784, 256], f16), [64, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 36, ((T([64, 196, 512], f16), [64, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 4, ((T([64, 49, 1024], f16), [64, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 2, ((T([2401, 32], f16, stride=(1, 2401)), [169, 32]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 18, ((T([2401, 16], f16, stride=(1, 2401)), [169, 16]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([2401, 8], f16, stride=(1, 2401)), [169, 8]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([2401, 4], f16, stride=(1, 2401)), [169, 4]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.roll.default +cnt: 1, ((T([64, 56, 56, 128], f16), [-3, -3], [1, 2]), {}) +cnt: 1, ((T([64, 56, 56, 128], f16), [3, 3], [1, 2]), {}) +cnt: 1, ((T([64, 28, 28, 256], f16), [-3, -3], [1, 2]), {}) +cnt: 1, ((T([64, 28, 28, 256], f16), [3, 3], [1, 2]), {}) +cnt: 9, ((T([64, 14, 14, 512], f16), [-3, -3], [1, 2]), {}) +cnt: 9, ((T([64, 14, 14, 512], f16), [3, 3], [1, 2]), {}) +cnt: 9, ((T([64, 14, 14, 512], f16), [-3, -3], [2, 1]), {}) +cnt: 9, ((T([64, 14, 14, 512], f16), [3, 3], [2, 1]), {}) +cnt: 1, ((T([64, 28, 28, 256], f16), [-3, -3], [2, 1]), {}) +cnt: 1, ((T([64, 28, 28, 256], f16), [3, 3], [2, 1]), {}) +cnt: 1, ((T([64, 56, 56, 128], f16), [-3, -3], [2, 1]), {}) +cnt: 1, ((T([64, 56, 56, 128], f16), [3, 3], [2, 1]), {}) +Operator: aten.slice_backward.default +cnt: 4, ((T([64, 7, 7, 512], f16, stride=(100352, 14336, 2048, 1)), [64, 7, 7, 512], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 7, 7, 512], f16), [64, 7, 14, 512], 2, 1, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 7, 14, 512], f16), [64, 14, 14, 512], 1, 1, 9223372036854775807, 2), {}) +cnt: 4, ((T([64, 14, 14, 512], f16), [64, 14, 14, 512], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 7, 14, 512], f16), [64, 14, 14, 512], 1, 0, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 7, 7, 512], f16), [64, 7, 14, 512], 2, 0, 9223372036854775807, 2), {}) +cnt: 4, ((T([64, 14, 14, 256], f16, stride=(200704, 14336, 1024, 1)), [64, 14, 14, 256], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 14, 14, 256], f16), [64, 14, 28, 256], 2, 1, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 14, 28, 256], f16), [64, 28, 28, 256], 1, 1, 9223372036854775807, 2), {}) +cnt: 4, ((T([64, 28, 28, 256], f16), [64, 28, 28, 256], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 14, 28, 256], f16), [64, 28, 28, 256], 1, 0, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 14, 14, 256], f16), [64, 14, 28, 256], 2, 0, 9223372036854775807, 2), {}) +cnt: 4, ((T([64, 28, 28, 128], f16, stride=(401408, 14336, 512, 1)), [64, 28, 28, 128], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 28, 28, 128], f16), [64, 28, 56, 128], 2, 1, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 28, 56, 128], f16), [64, 56, 56, 128], 1, 1, 9223372036854775807, 2), {}) +cnt: 4, ((T([64, 56, 56, 128], f16), [64, 56, 56, 128], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 28, 56, 128], f16), [64, 56, 56, 128], 1, 0, 9223372036854775807, 2), {}) +cnt: 2, ((T([64, 28, 28, 128], f16), [64, 28, 56, 128], 2, 0, 9223372036854775807, 2), {}) +Operator: aten.stack.default +cnt: 2, (([T([64, 32, 49, 32], f16), T([64, 32, 49, 32], f16, stride=(50176, 1568, 1, 49)), T([64, 32, 49, 32], f16)],), {}) +cnt: 18, (([T([256, 16, 49, 32], f16), T([256, 16, 49, 32], f16, stride=(25088, 1568, 1, 49)), T([256, 16, 49, 32], f16)],), {}) +cnt: 2, (([T([1024, 8, 49, 32], f16), T([1024, 8, 49, 32], f16, stride=(12544, 1568, 1, 49)), T([1024, 8, 49, 32], f16)],), {}) +cnt: 2, (([T([4096, 4, 49, 32], f16), T([4096, 4, 49, 32], f16, stride=(6272, 1568, 1, 49)), T([4096, 4, 49, 32], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 4, ((T([3136, 1024], f16), [0], True), {}) +cnt: 2, ((T([3136, 4096], f16), [0], True), {}) +cnt: 2, ((T([64, 32, 49, 49], f16), [0], True), {}) +cnt: 2, ((T([3136, 3072], f16), [0], True), {}) +cnt: 36, ((T([12544, 512], f16), [0], True), {}) +cnt: 18, ((T([12544, 2048], f16), [0], True), {}) +cnt: 18, ((T([256, 16, 49, 49], f16), [0], True), {}) +cnt: 18, ((T([12544, 1536], f16), [0], True), {}) +cnt: 4, ((T([50176, 256], f16), [0], True), {}) +cnt: 2, ((T([50176, 1024], f16), [0], True), {}) +cnt: 2, ((T([1024, 8, 49, 49], f16), [0], True), {}) +cnt: 2, ((T([50176, 768], f16), [0], True), {}) +cnt: 4, ((T([200704, 128], f16), [0], True), {}) +cnt: 2, ((T([200704, 512], f16), [0], True), {}) +cnt: 2, ((T([4096, 4, 49, 49], f16), [0], True), {}) +cnt: 2, ((T([200704, 384], f16), [0], True), {}) +Operator: aten.unbind.int +cnt: 2, ((T([3, 4096, 4, 49, 32], f16, stride=(128, 18816, 32, 384, 1)),), {}) +cnt: 2, ((T([3, 1024, 8, 49, 32], f16, stride=(256, 37632, 32, 768, 1)),), {}) +cnt: 18, ((T([3, 256, 16, 49, 32], f16, stride=(512, 75264, 32, 1536, 1)),), {}) +cnt: 2, ((T([3, 64, 32, 49, 32], f16, stride=(1024, 150528, 32, 3072, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swsl_resnext101_32x16d_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swsl_resnext101_32x16d_training.txt new file mode 100644 index 000000000..58d92f4b5 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/swsl_resnext101_32x16d_training.txt @@ -0,0 +1,143 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 23, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 104, ((T([], i64), 1), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 23, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([512, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 56, 56], f16), T([512, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 3, ((T([32, 512, 56, 56], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16), T([1024, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 4, ((T([32, 1024, 28, 28], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 1024, 28, 28], f16), T([1024, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16), T([2048, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 23, ((T([32, 2048, 14, 14], f16), T([1024, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 22, ((T([32, 2048, 14, 14], f16), T([2048, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([4096, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16), T([4096, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 3, ((T([32, 4096, 7, 7], f16), T([2048, 4096, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([4096, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 4096, 7, 7], f16), T([4096, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 4096, 7, 7], f16), T([2048, 4096, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 4096, 7, 7], f16), T([32, 4096, 7, 7], f16), T([4096, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 2, ((T([32, 4096, 7, 7], f16), T([32, 2048, 7, 7], f16), T([4096, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 4096, 7, 7], f16), T([32, 4096, 14, 14], f16), T([4096, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16), T([32, 1024, 14, 14], f16), T([4096, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([32, 1024, 14, 14], f16), T([32, 2048, 14, 14], f16), T([1024, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 22, ((T([32, 2048, 14, 14], f16), T([32, 2048, 14, 14], f16), T([2048, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 22, ((T([32, 2048, 14, 14], f16), T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 14, 14], f16), T([32, 2048, 28, 28], f16), T([2048, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16), T([32, 512, 28, 28], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 1024, 28, 28], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 1024, 28, 28], f16), T([32, 1024, 28, 28], f16), T([1024, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 3, ((T([32, 1024, 28, 28], f16), T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 28, 28], f16), T([32, 1024, 56, 56], f16), T([1024, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16), T([32, 256, 56, 56], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 512, 56, 56], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 512, 56, 56], f16), T([32, 512, 56, 56], f16), T([512, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 2, ((T([32, 512, 56, 56], f16), T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 56, 56], f16), T([32, 64, 56, 56], f16), T([512, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 512, 56, 56], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 1024, 28, 28], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 45, ((T([32, 2048, 14, 14], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +cnt: 24, ((T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 4096, 7, 7], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 4096, 7, 7], f16), T([32, 4096, 7, 7], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f32), T([4096], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16), T([32, 4096, 14, 14], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f32), T([4096], f32), True, 1e-05, [True, True, True]), {}) +cnt: 24, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 45, ((T([32, 2048, 14, 14], f16), T([32, 2048, 14, 14], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16), T([32, 2048, 28, 28], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([32, 1024, 28, 28], f16), T([32, 1024, 28, 28], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16), T([32, 1024, 56, 56], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 512, 56, 56], f16), T([32, 512, 56, 56], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 64, 112, 112], f16),), {}) +cnt: 6, ((T([32, 512, 56, 56], f16),), {}) +cnt: 3, ((T([32, 256, 56, 56], f16),), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16),), {}) +cnt: 7, ((T([32, 1024, 28, 28], f16),), {}) +cnt: 4, ((T([32, 512, 28, 28], f16),), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16),), {}) +cnt: 45, ((T([32, 2048, 14, 14], f16),), {}) +cnt: 23, ((T([32, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16),), {}) +cnt: 5, ((T([32, 4096, 7, 7], f16),), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([32, 4096, 7, 7], f16), T([32, 4096, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 4096, 14, 14], f16), T([32, 4096, 14, 14], f16), 0), {}) +cnt: 23, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), 0), {}) +cnt: 45, ((T([32, 2048, 14, 14], f16), T([32, 2048, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 2048, 28, 28], f16), T([32, 2048, 28, 28], f16), 0), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 7, ((T([32, 1024, 28, 28], f16), T([32, 1024, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 1024, 56, 56], f16), T([32, 1024, 56, 56], f16), 0), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 6, ((T([32, 512, 56, 56], f16), T([32, 512, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_efficientnet_b0_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_efficientnet_b0_training.txt new file mode 100644 index 000000000..b606244e7 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_efficientnet_b0_training.txt @@ -0,0 +1,312 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 49, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16)), {}) +cnt: 4, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16)), {}) +cnt: 4, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16)), {}) +cnt: 6, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16)), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16)), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16)), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +cnt: 2, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 56, 56], f16),), {}) +cnt: 1, ((T([128, 4, 1, 1], f16),), {}) +cnt: 3, ((T([128, 144, 56, 56], f16),), {}) +cnt: 2, ((T([128, 6, 1, 1], f16),), {}) +cnt: 1, ((T([128, 144, 28, 28], f16),), {}) +cnt: 3, ((T([128, 240, 28, 28], f16),), {}) +cnt: 2, ((T([128, 10, 1, 1], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 6, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 20, 1, 1], f16),), {}) +cnt: 5, ((T([128, 672, 14, 14], f16),), {}) +cnt: 3, ((T([128, 28, 1, 1], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 4, ((T([128, 48, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([128, 3, 224, 224], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 672, 14, 14], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([128, 672, 17, 17], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([128, 240, 29, 29], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 144, 59, 59], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([128, 96, 113, 113], f16), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 225, 225], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([8, 32, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([32, 8, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 113, 113], f16), T([96, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([4, 96, 1, 1], f16), T([4], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([96, 4, 1, 1], f16), T([96], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([6, 144, 1, 1], f16), T([6], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([144, 6, 1, 1], f16), T([144], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 59, 59], f16), T([144, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([40, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([10, 240, 1, 1], f16), T([10], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([240, 10, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 29, 29], f16), T([240, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 3, ((T([128, 480, 1, 1], f16), T([20, 480, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 20, 1, 1], f16), T([480, 20, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 3, ((T([128, 672, 1, 1], f16), T([28, 672, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 28, 1, 1], f16), T([672, 28, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 17, 17], f16), T([672, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([192, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 4, ((T([128, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), T([1152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 1, 1], f16), T([128, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), [1152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 48, 1, 1], f16), T([128, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), T([128, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 192, 7, 7], f16), T([128, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 7, 7], f16), T([128, 672, 7, 7], f16), T([192, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 672, 1, 1], f16), T([128, 28, 1, 1], f16), T([672, 28, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 28, 1, 1], f16), T([128, 672, 1, 1], f16), T([28, 672, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 17, 17], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 3, ((T([128, 672, 14, 14], f16), T([128, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 112, 14, 14], f16), T([128, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 14, 14], f16), T([128, 480, 14, 14], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 480, 1, 1], f16), T([128, 20, 1, 1], f16), T([480, 20, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 20, 1, 1], f16), T([128, 480, 1, 1], f16), T([20, 480, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), T([128, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 80, 14, 14], f16), T([128, 480, 14, 14], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 14, 14], f16), T([128, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 10, 1, 1], f16), T([240, 10, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([128, 240, 1, 1], f16), T([10, 240, 1, 1], f16), [10], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 29, 29], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 28, 28], f16), T([128, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 240, 28, 28], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 28, 28], f16), T([128, 144, 28, 28], f16), T([40, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([128, 6, 1, 1], f16), T([144, 6, 1, 1], f16), [144], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([128, 144, 1, 1], f16), T([6, 144, 1, 1], f16), [6], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 59, 59], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 56, 56], f16), T([128, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 144, 56, 56], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 96, 56, 56], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([128, 4, 1, 1], f16), T([96, 4, 1, 1], f16), [96], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([128, 96, 1, 1], f16), T([4, 96, 1, 1], f16), [4], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 113, 113], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 8, 1, 1], f16), T([32, 8, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 32, 1, 1], f16), T([8, 32, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 225, 225], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16, stride=(1152, 1, 0, 0)), 49), {}) +cnt: 1, ((T([128, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 2, ((T([128, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 3, ((T([128, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 240, 14, 14], f16, stride=(240, 1, 0, 0)), 196), {}) +cnt: 1, ((T([128, 240, 28, 28], f16, stride=(240, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 144, 28, 28], f16, stride=(144, 1, 0, 0)), 784), {}) +cnt: 1, ((T([128, 144, 56, 56], f16, stride=(144, 1, 0, 0)), 3136), {}) +cnt: 1, ((T([128, 96, 56, 56], f16, stride=(96, 1, 0, 0)), 3136), {}) +cnt: 1, ((T([128, 32, 112, 112], f16, stride=(32, 1, 0, 0)), 12544), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 32, 112, 112], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 1, 1], f16)), {}) +cnt: 2, ((T([128, 96, 56, 56], f16), T([128, 96, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 56, 56], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 28, 28], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 28, 28], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 14, 14], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 4, ((T([128, 672, 14, 14], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 7, 7], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16)), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16)), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16)), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 0.001), {}) +cnt: 3, ((T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 0.001), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 0.001), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 7, 7], f16), T([128, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 0.001, [True, True, True]), {}) +cnt: 4, ((T([128, 192, 7, 7], f16), T([128, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 112, 14, 14], f16), T([128, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 80, 14, 14], f16), T([128, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 40, 28, 28], f16), T([128, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 1, 1], f16),), {}) +cnt: 2, ((T([128, 144, 1, 1], f16),), {}) +cnt: 2, ((T([128, 240, 1, 1], f16),), {}) +cnt: 3, ((T([128, 480, 1, 1], f16),), {}) +cnt: 3, ((T([128, 672, 1, 1], f16),), {}) +cnt: 4, ((T([128, 1152, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 4, ((T([128, 1152, 1, 1], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 3, ((T([128, 672, 1, 1], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 3, ((T([128, 480, 1, 1], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([128, 96, 1, 1], f16)), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 2, ((T([128, 32, 112, 112], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 112, 112], f16),), {}) +cnt: 1, ((T([128, 96, 56, 56], f16),), {}) +cnt: 1, ((T([128, 4, 1, 1], f16),), {}) +cnt: 3, ((T([128, 144, 56, 56], f16),), {}) +cnt: 2, ((T([128, 6, 1, 1], f16),), {}) +cnt: 1, ((T([128, 144, 28, 28], f16),), {}) +cnt: 3, ((T([128, 240, 28, 28], f16),), {}) +cnt: 2, ((T([128, 10, 1, 1], f16),), {}) +cnt: 1, ((T([128, 240, 14, 14], f16),), {}) +cnt: 6, ((T([128, 480, 14, 14], f16),), {}) +cnt: 3, ((T([128, 20, 1, 1], f16),), {}) +cnt: 5, ((T([128, 672, 14, 14], f16),), {}) +cnt: 3, ((T([128, 28, 1, 1], f16),), {}) +cnt: 1, ((T([128, 672, 7, 7], f16),), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16),), {}) +cnt: 4, ((T([128, 48, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1280, 7, 7], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([128, 1280, 7, 7], f16), T([128, 1280, 7, 7], f16)), {}) +cnt: 4, ((T([128, 48, 1, 1], f16), T([128, 48, 1, 1], f16)), {}) +cnt: 8, ((T([128, 1152, 7, 7], f16), T([128, 1152, 7, 7], f16)), {}) +cnt: 3, ((T([128, 28, 1, 1], f16), T([128, 28, 1, 1], f16)), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), T([128, 672, 7, 7], f16)), {}) +cnt: 5, ((T([128, 672, 14, 14], f16), T([128, 672, 14, 14], f16)), {}) +cnt: 3, ((T([128, 20, 1, 1], f16), T([128, 20, 1, 1], f16)), {}) +cnt: 6, ((T([128, 480, 14, 14], f16), T([128, 480, 14, 14], f16)), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([128, 10, 1, 1], f16)), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), T([128, 240, 14, 14], f16)), {}) +cnt: 3, ((T([128, 240, 28, 28], f16), T([128, 240, 28, 28], f16)), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([128, 6, 1, 1], f16)), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), T([128, 144, 28, 28], f16)), {}) +cnt: 3, ((T([128, 144, 56, 56], f16), T([128, 144, 56, 56], f16)), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([128, 4, 1, 1], f16)), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), T([128, 96, 56, 56], f16)), {}) +cnt: 1, ((T([128, 96, 112, 112], f16), T([128, 96, 112, 112], f16)), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 8, 1, 1], f16)), {}) +cnt: 2, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 4, ((T([128, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 96, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), [2, 3], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_mixnet_l_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_mixnet_l_training.txt new file mode 100644 index 000000000..5612bc458 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tf_mixnet_l_training.txt @@ -0,0 +1,408 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 58, ((T([], i64), 1), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16)), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([64, 40, 56, 56], f16)), {}) +cnt: 6, ((T([64, 56, 28, 28], f16), T([64, 56, 28, 28], f16)), {}) +cnt: 6, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16)), {}) +cnt: 6, ((T([64, 160, 14, 14], f16), T([64, 160, 14, 14], f16)), {}) +cnt: 6, ((T([64, 264, 7, 7], f16), T([64, 264, 7, 7], f16)), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([64, 1536], f16), T([1536, 1000], f16, stride=(1, 1536))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 96, 112, 112], f16), T([64, 96, 112, 112], f16)], 1), {}) +cnt: 1, (([T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16), T([64, 64, 56, 56], f16)], 1), {}) +cnt: 3, (([T([64, 20, 56, 56], f16), T([64, 20, 56, 56], f16)], 1), {}) +cnt: 2, (([T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16), T([64, 60, 28, 28], f16)], 1), {}) +cnt: 12, (([T([64, 168, 28, 28], f16), T([64, 168, 28, 28], f16)], 1), {}) +cnt: 6, (([T([64, 28, 28, 28], f16), T([64, 28, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 112, 14, 14], f16), T([64, 112, 14, 14], f16), T([64, 112, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 312, 14, 14], f16), T([64, 312, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16), T([64, 156, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 52, 14, 14], f16), T([64, 52, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16), T([64, 120, 14, 14], f16)], 1), {}) +cnt: 6, (([T([64, 80, 14, 14], f16), T([64, 80, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16), T([64, 240, 7, 7], f16)], 1), {}) +cnt: 6, (([T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16), T([64, 396, 7, 7], f16)], 1), {}) +cnt: 3, (([T([64, 132, 7, 7], f16), T([64, 132, 7, 7], f16)], 1), {}) +cnt: 3, (([T([64, 792, 7, 7], f16), T([64, 792, 7, 7], f16)], 1), {}) +cnt: 1, (([T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16), T([64, 240, 14, 14], f16)], 1), {}) +cnt: 1, (([T([64, 112, 28, 28], f16), T([64, 112, 28, 28], f16), T([64, 112, 28, 28], f16)], 1), {}) +cnt: 1, (([T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16), T([64, 60, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 96, 56, 56], f16), T([64, 96, 56, 56], f16)], 1), {}) +cnt: 1, (([T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16)], 1), {}) +cnt: 1, (([T([64, 16, 112, 112], f16), T([64, 16, 112, 112], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +cnt: 1, ((T([64, 240, 56, 56], f16),), {}) +cnt: 1, ((T([64, 240, 28, 28], f16),), {}) +cnt: 1, ((T([64, 20, 1, 1], f16),), {}) +cnt: 7, ((T([64, 336, 28, 28], f16),), {}) +cnt: 3, ((T([64, 28, 1, 1], f16),), {}) +cnt: 1, ((T([64, 336, 14, 14], f16),), {}) +cnt: 1, ((T([64, 14, 1, 1], f16),), {}) +cnt: 8, ((T([64, 624, 14, 14], f16),), {}) +cnt: 3, ((T([64, 26, 1, 1], f16),), {}) +cnt: 1, ((T([64, 52, 1, 1], f16),), {}) +cnt: 6, ((T([64, 480, 14, 14], f16),), {}) +cnt: 4, ((T([64, 80, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 14, 14], f16),), {}) +cnt: 1, ((T([64, 960, 7, 7], f16),), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16),), {}) +cnt: 3, ((T([64, 132, 1, 1], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([64, 3, 224, 224], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([64, 64, 112, 112], f16, stride=(2408448, 12544, 112, 1)), [2, 3, 2, 3], 0.0), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), [2, 3, 2, 3], 0.0), {}) +cnt: 1, ((T([64, 60, 56, 56], f16, stride=(752640, 3136, 56, 1)), [3, 4, 3, 4], 0.0), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([64, 112, 28, 28], f16, stride=(263424, 784, 28, 1)), [2, 3, 2, 3], 0.0), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), [2, 3, 2, 3], 0.0), {}) +cnt: 1, ((T([64, 240, 14, 14], f16, stride=(188160, 196, 14, 1)), [3, 4, 3, 4], 0.0), {}) +cnt: 1, ((T([64, 240, 21, 21], f16), [-3, -4, -3, -4]), {}) +cnt: 1, ((T([64, 240, 19, 19], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([64, 240, 17, 17], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([64, 240, 15, 15], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([64, 112, 33, 33], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([64, 112, 31, 31], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([64, 112, 29, 29], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([64, 60, 63, 63], f16), [-3, -4, -3, -4]), {}) +cnt: 1, ((T([64, 60, 61, 61], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([64, 60, 59, 59], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([64, 60, 57, 57], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([64, 64, 117, 117], f16), [-2, -3, -2, -3]), {}) +cnt: 1, ((T([64, 64, 115, 115], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([64, 64, 113, 113], f16), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 225, 225], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([32, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 16, 112, 112], f16, stride=(401408, 12544, 112, 1)), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 113, 113], f16), T([64, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([64, 64, 115, 115], f16), T([64, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([64, 64, 117, 117], f16), T([64, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 64), {}) +cnt: 2, ((T([64, 96, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([20, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([60, 20, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([20, 60, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 40, 56, 56], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 60, 57, 57], f16), T([60, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 59, 59], f16), T([60, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 61, 61], f16), T([60, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 60, 63, 63], f16), T([60, 1, 9, 9], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 60), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([20, 240, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([240, 20, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([56, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([168, 28, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 168), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 168), {}) +cnt: 3, ((T([64, 336, 1, 1], f16), T([28, 336, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([336, 28, 1, 1], f16), T([336], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([28, 168, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 56, 28, 28], f16), T([336, 56, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 112, 29, 29], f16), T([112, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 112, 31, 31], f16), T([112, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 112, 33, 33], f16), T([112, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 112), {}) +cnt: 1, ((T([64, 336, 1, 1], f16), T([14, 336, 1, 1], f16), T([14], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([336, 14, 1, 1], f16), T([336], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([104, 336, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([312, 52, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 156), {}) +cnt: 3, ((T([64, 624, 1, 1], f16), T([26, 624, 1, 1], f16), T([26], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([624, 26, 1, 1], f16), T([624], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([52, 312, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 104, 14, 14], f16), T([624, 104, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([624, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 624), {}) +cnt: 1, ((T([64, 624, 1, 1], f16), T([52, 624, 1, 1], f16), T([52], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([624, 52, 1, 1], f16), T([624], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([160, 624, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([240, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 120), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([80, 480, 1, 1], f16), T([80], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 80, 1, 1], f16), T([480, 80, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 160, 14, 14], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 240, 15, 15], f16), T([240, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 17, 17], f16), T([240, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 19, 19], f16), T([240, 1, 7, 7], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 240, 21, 21], f16), T([240, 1, 9, 9], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([80, 960, 1, 1], f16), T([80], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 80, 1, 1], f16), T([960, 80, 1, 1], f16), T([960], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([264, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 264, 7, 7], f16), T([1584, 264, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 9, 9], f16), None, [1, 1], [4, 4], [1, 1], False, [0, 0], 396), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16), T([132, 1584, 1, 1], f16), T([132], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 132, 1, 1], f16), T([1584, 132, 1, 1], f16), T([1584], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([64, 792, 7, 7], f16, stride=(77616, 49, 7, 1)), T([132, 792, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 264, 7, 7], f16), T([1536, 264, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 264, 7, 7], f16), T([1536, 264, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 132, 7, 7], f16, stride=(12936, 49, 7, 1)), T([64, 792, 7, 7], f16, stride=(77616, 49, 7, 1)), T([132, 792, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16), T([64, 132, 1, 1], f16), T([1584, 132, 1, 1], f16), [1584], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 132, 1, 1], f16), T([64, 1584, 1, 1], f16), T([132, 1584, 1, 1], f16), [132], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([64, 396, 7, 7], f16, stride=(77616, 49, 7, 1)), T([396, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 396, [True, True, False]), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 264, 7, 7], f16), T([1584, 264, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 264, 7, 7], f16), T([64, 960, 7, 7], f16), T([264, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([64, 80, 1, 1], f16), T([960, 80, 1, 1], f16), [960], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 80, 1, 1], f16), T([64, 960, 1, 1], f16), T([80, 960, 1, 1], f16), [80], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 21, 21], f16), T([240, 1, 9, 9], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 19, 19], f16), T([240, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 17, 17], f16), T([240, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 7, 7], f16, stride=(47040, 49, 7, 1)), T([64, 240, 15, 15], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 160, 14, 14], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([64, 80, 1, 1], f16), T([480, 80, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 80, 1, 1], f16), T([64, 480, 1, 1], f16), T([80, 480, 1, 1], f16), [80], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 3, ((T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 120, 14, 14], f16, stride=(94080, 196, 14, 1)), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 6, ((T([64, 240, 14, 14], f16, stride=(94080, 196, 14, 1)), T([64, 80, 14, 14], f16, stride=(31360, 196, 14, 1)), T([240, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 160, 14, 14], f16), T([64, 624, 14, 14], f16), T([160, 624, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 624, 1, 1], f16), T([64, 52, 1, 1], f16), T([624, 52, 1, 1], f16), [624], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([64, 624, 1, 1], f16), T([52, 624, 1, 1], f16), [52], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16), T([624, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 624, [True, True, False]), {}) +cnt: 1, ((T([64, 624, 14, 14], f16), T([64, 104, 14, 14], f16), T([624, 104, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([52, 312, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 624, 1, 1], f16), T([64, 26, 1, 1], f16), T([624, 26, 1, 1], f16), [624], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([64, 624, 1, 1], f16), T([26, 624, 1, 1], f16), [26], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 9, 9], f16), [0], [1, 1], [4, 4], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 3, ((T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 156, 14, 14], f16, stride=(122304, 196, 14, 1)), T([156, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 156, [True, True, False]), {}) +cnt: 6, ((T([64, 312, 14, 14], f16, stride=(122304, 196, 14, 1)), T([64, 52, 14, 14], f16, stride=(20384, 196, 14, 1)), T([312, 52, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 104, 14, 14], f16), T([64, 336, 14, 14], f16), T([104, 336, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 336, 1, 1], f16), T([64, 14, 1, 1], f16), T([336, 14, 1, 1], f16), [336], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([64, 336, 1, 1], f16), T([14, 336, 1, 1], f16), [14], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 33, 33], f16), T([112, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 31, 31], f16), T([112, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 112, 14, 14], f16, stride=(65856, 196, 14, 1)), T([64, 112, 29, 29], f16), T([112, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 112, [True, True, False]), {}) +cnt: 1, ((T([64, 336, 28, 28], f16), T([64, 56, 28, 28], f16), T([336, 56, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([28, 168, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([64, 336, 1, 1], f16), T([64, 28, 1, 1], f16), T([336, 28, 1, 1], f16), [336], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([64, 336, 1, 1], f16), T([28, 336, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 3, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([168, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 168, [True, True, False]), {}) +cnt: 6, ((T([64, 168, 28, 28], f16, stride=(263424, 784, 28, 1)), T([64, 28, 28, 28], f16, stride=(43904, 784, 28, 1)), T([168, 28, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 56, 28, 28], f16), T([64, 240, 28, 28], f16), T([56, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([64, 20, 1, 1], f16), T([240, 20, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([64, 240, 1, 1], f16), T([20, 240, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 63, 63], f16), T([60, 1, 9, 9], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 61, 61], f16), T([60, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 59, 59], f16), T([60, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 60, 28, 28], f16, stride=(188160, 784, 28, 1)), T([64, 60, 57, 57], f16), T([60, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 60, [True, True, False]), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 40, 56, 56], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([20, 60, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), T([120, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([64, 60, 56, 56], f16, stride=(376320, 3136, 56, 1)), T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([60, 20, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([64, 20, 56, 56], f16, stride=(125440, 3136, 56, 1)), T([64, 96, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([20, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 117, 117], f16), T([64, 1, 7, 7], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 115, 115], f16), T([64, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 56, 56], f16, stride=(602112, 3136, 56, 1)), T([64, 64, 113, 113], f16), T([64, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 2, ((T([64, 96, 112, 112], f16, stride=(2408448, 12544, 112, 1)), T([64, 16, 112, 112], f16, stride=(401408, 12544, 112, 1)), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([64, 32, 112, 112], f16), T([64, 3, 225, 225], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([64, 1536, 7, 7], f16, stride=(1536, 1, 0, 0)), 49), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16, stride=(1584, 1, 0, 0)), 49), {}) +cnt: 1, ((T([64, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 3, ((T([64, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 4, ((T([64, 624, 14, 14], f16, stride=(624, 1, 0, 0)), 196), {}) +cnt: 1, ((T([64, 336, 14, 14], f16, stride=(336, 1, 0, 0)), 196), {}) +cnt: 3, ((T([64, 336, 28, 28], f16, stride=(336, 1, 0, 0)), 784), {}) +cnt: 1, ((T([64, 240, 28, 28], f16, stride=(240, 1, 0, 0)), 784), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([64, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), [2, 3], True), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 1536], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 1536], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([64, 240, 28, 28], f16), T([64, 240, 1, 1], f16)), {}) +cnt: 6, ((T([64, 336, 28, 28], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 2, ((T([64, 336, 14, 14], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 1, 1], f16)), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 1, 1], f16)), {}) +cnt: 2, ((T([64, 960, 7, 7], f16), T([64, 960, 1, 1], f16)), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 1, 1], f16)), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 3, ((T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 0.001), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 0.001), {}) +cnt: 4, ((T([64, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f16), True, 0.1, 0.001), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f16), True, 0.1, 0.001), {}) +cnt: 4, ((T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f16), True, 0.1, 0.001), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([624], f16), T([624], f16), T([624], f16), T([624], f16), True, 0.1, 0.001), {}) +cnt: 4, ((T([64, 160, 14, 14], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), True, 0.1, 0.001), {}) +cnt: 4, ((T([64, 264, 7, 7], f16), T([264], f16), T([264], f16), T([264], f16), T([264], f16), True, 0.1, 0.001), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([1584], f16), T([1584], f16), T([1584], f16), T([1584], f16), True, 0.1, 0.001), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f16), True, 0.1, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 1536, 7, 7], f16), T([1536], f16), T([1536], f16), T([1536], f16), T([1536], f32), T([1536], f32), True, 0.001, [True, True, True]), {}) +cnt: 4, ((T([64, 264, 7, 7], f16), T([64, 264, 7, 7], f16), T([264], f16), T([264], f16), T([264], f16), T([264], f32), T([264], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16), T([1584], f16), T([1584], f16), T([1584], f16), T([1584], f32), T([1584], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), True, 0.001, [True, True, True]), {}) +cnt: 4, ((T([64, 160, 14, 14], f16), T([64, 160, 14, 14], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), True, 0.001, [True, True, True]), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 0.001, [True, True, True]), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16), T([624], f16), T([624], f16), T([624], f16), T([624], f32), T([624], f32), True, 0.001, [True, True, True]), {}) +cnt: 4, ((T([64, 104, 14, 14], f16), T([64, 104, 14, 14], f16), T([104], f16), T([104], f16), T([104], f16), T([104], f32), T([104], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16), T([336], f16), T([336], f16), T([336], f16), T([336], f32), T([336], f32), True, 0.001, [True, True, True]), {}) +cnt: 4, ((T([64, 56, 28, 28], f16), T([64, 56, 28, 28], f16), T([56], f16), T([56], f16), T([56], f16), T([56], f32), T([56], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 240, 56, 56], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([64, 40, 56, 56], f16), T([64, 40, 56, 56], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 0.001, [True, True, True]), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([64, 192, 112, 112], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 0.001, [True, True, True]), {}) +cnt: 3, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 0.001, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 2, ((T([64, 32, 112, 112], f16),), {}) +cnt: 1, ((T([64, 192, 112, 112], f16),), {}) +cnt: 1, ((T([64, 192, 56, 56], f16),), {}) +cnt: 2, ((T([64, 120, 56, 56], f16),), {}) +cnt: 1, ((T([64, 1536, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([64, 240, 1, 1], f16),), {}) +cnt: 4, ((T([64, 336, 1, 1], f16),), {}) +cnt: 4, ((T([64, 624, 1, 1], f16),), {}) +cnt: 3, ((T([64, 480, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 1, 1], f16),), {}) +cnt: 3, ((T([64, 1584, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 3, ((T([64, 1584, 1, 1], f16), T([64, 1584, 1, 1], f16)), {}) +cnt: 1, ((T([64, 960, 1, 1], f16), T([64, 960, 1, 1], f16)), {}) +cnt: 3, ((T([64, 480, 1, 1], f16), T([64, 480, 1, 1], f16)), {}) +cnt: 4, ((T([64, 624, 1, 1], f16), T([64, 624, 1, 1], f16)), {}) +cnt: 4, ((T([64, 336, 1, 1], f16), T([64, 336, 1, 1], f16)), {}) +cnt: 1, ((T([64, 240, 1, 1], f16), T([64, 240, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 1, ((T([64, 240, 56, 56], f16),), {}) +cnt: 1, ((T([64, 240, 28, 28], f16),), {}) +cnt: 1, ((T([64, 20, 1, 1], f16),), {}) +cnt: 7, ((T([64, 336, 28, 28], f16),), {}) +cnt: 3, ((T([64, 28, 1, 1], f16),), {}) +cnt: 1, ((T([64, 336, 14, 14], f16),), {}) +cnt: 1, ((T([64, 14, 1, 1], f16),), {}) +cnt: 8, ((T([64, 624, 14, 14], f16),), {}) +cnt: 3, ((T([64, 26, 1, 1], f16),), {}) +cnt: 1, ((T([64, 52, 1, 1], f16),), {}) +cnt: 6, ((T([64, 480, 14, 14], f16),), {}) +cnt: 4, ((T([64, 80, 1, 1], f16),), {}) +cnt: 1, ((T([64, 960, 14, 14], f16),), {}) +cnt: 1, ((T([64, 960, 7, 7], f16),), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16),), {}) +cnt: 3, ((T([64, 132, 1, 1], f16),), {}) +Operator: aten.silu_backward.default +cnt: 3, ((T([64, 132, 1, 1], f16), T([64, 132, 1, 1], f16)), {}) +cnt: 6, ((T([64, 1584, 7, 7], f16), T([64, 1584, 7, 7], f16)), {}) +cnt: 4, ((T([64, 80, 1, 1], f16), T([64, 80, 1, 1], f16)), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), T([64, 960, 7, 7], f16)), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), T([64, 960, 14, 14], f16)), {}) +cnt: 6, ((T([64, 480, 14, 14], f16), T([64, 480, 14, 14], f16)), {}) +cnt: 1, ((T([64, 52, 1, 1], f16), T([64, 52, 1, 1], f16)), {}) +cnt: 8, ((T([64, 624, 14, 14], f16), T([64, 624, 14, 14], f16)), {}) +cnt: 3, ((T([64, 26, 1, 1], f16), T([64, 26, 1, 1], f16)), {}) +cnt: 1, ((T([64, 14, 1, 1], f16), T([64, 14, 1, 1], f16)), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), T([64, 336, 14, 14], f16)), {}) +cnt: 7, ((T([64, 336, 28, 28], f16), T([64, 336, 28, 28], f16)), {}) +cnt: 3, ((T([64, 28, 1, 1], f16), T([64, 28, 1, 1], f16)), {}) +cnt: 1, ((T([64, 20, 1, 1], f16), T([64, 20, 1, 1], f16)), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), T([64, 240, 28, 28], f16)), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), T([64, 240, 56, 56], f16)), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([64, 32, 112, 112], f16), [16, 16], 1), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), [64, 64, 64], 1), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), [96, 96], 1), {}) +cnt: 1, ((T([64, 40, 56, 56], f16), [20, 20], 1), {}) +cnt: 1, ((T([64, 120, 56, 56], f16), [60, 60], 1), {}) +cnt: 1, ((T([64, 240, 56, 56], f16), [60, 60, 60, 60], 1), {}) +cnt: 3, ((T([64, 56, 28, 28], f16), [28, 28], 1), {}) +cnt: 6, ((T([64, 336, 28, 28], f16), [168, 168], 1), {}) +cnt: 1, ((T([64, 336, 28, 28], f16), [112, 112, 112], 1), {}) +cnt: 3, ((T([64, 104, 14, 14], f16), [52, 52], 1), {}) +cnt: 3, ((T([64, 624, 14, 14], f16), [156, 156, 156, 156], 1), {}) +cnt: 3, ((T([64, 624, 14, 14], f16), [312, 312], 1), {}) +cnt: 3, ((T([64, 160, 14, 14], f16), [80, 80], 1), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [120, 120, 120, 120], 1), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [240, 240], 1), {}) +cnt: 1, ((T([64, 960, 14, 14], f16), [240, 240, 240, 240], 1), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [396, 396, 396, 396], 1), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [792, 792], 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 3, ((T([64, 1584, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 4, ((T([64, 624, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 336, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([64, 336, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([64, 240, 28, 28], f16), [2, 3], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([64, 1536, 7, 7], f16), T([64, 1536, 7, 7], f16), 0), {}) +cnt: 2, ((T([64, 120, 56, 56], f16), T([64, 120, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 192, 56, 56], f16), T([64, 192, 56, 56], f16), 0), {}) +cnt: 1, ((T([64, 192, 112, 112], f16), T([64, 192, 112, 112], f16), 0), {}) +cnt: 2, ((T([64, 32, 112, 112], f16), T([64, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tinynet_a_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tinynet_a_training.txt new file mode 100644 index 000000000..c3f1255f4 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tinynet_a_training.txt @@ -0,0 +1,302 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 58, ((T([], i64), 1), {}) +cnt: 2, ((T([128, 24, 48, 48], f16), T([128, 24, 48, 48], f16)), {}) +cnt: 2, ((T([128, 40, 24, 24], f16), T([128, 40, 24, 24], f16)), {}) +cnt: 6, ((T([128, 80, 12, 12], f16), T([128, 80, 12, 12], f16)), {}) +cnt: 6, ((T([128, 112, 12, 12], f16), T([128, 112, 12, 12], f16)), {}) +cnt: 8, ((T([128, 192, 6, 6], f16), T([128, 192, 6, 6], f16)), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16)), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([128, 672, 6, 6], f16)), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), T([128, 672, 12, 12], f16)), {}) +cnt: 4, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16)), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([128, 240, 12, 12], f16)), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([128, 240, 24, 24], f16)), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([128, 144, 24, 24], f16)), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([128, 144, 48, 48], f16)), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([128, 96, 48, 48], f16)), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 192, 192], f16),), {}) +cnt: 2, ((T([128, 32, 96, 96], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 96, 96], f16),), {}) +cnt: 1, ((T([128, 96, 48, 48], f16),), {}) +cnt: 1, ((T([128, 4, 1, 1], f16),), {}) +cnt: 3, ((T([128, 144, 48, 48], f16),), {}) +cnt: 2, ((T([128, 6, 1, 1], f16),), {}) +cnt: 1, ((T([128, 144, 24, 24], f16),), {}) +cnt: 3, ((T([128, 240, 24, 24], f16),), {}) +cnt: 2, ((T([128, 10, 1, 1], f16),), {}) +cnt: 1, ((T([128, 240, 12, 12], f16),), {}) +cnt: 8, ((T([128, 480, 12, 12], f16),), {}) +cnt: 4, ((T([128, 20, 1, 1], f16),), {}) +cnt: 7, ((T([128, 672, 12, 12], f16),), {}) +cnt: 4, ((T([128, 28, 1, 1], f16),), {}) +cnt: 1, ((T([128, 672, 6, 6], f16),), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16),), {}) +cnt: 5, ((T([128, 48, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1280, 6, 6], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 192, 192], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([8, 32, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([32, 8, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 96, 96], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([4, 96, 1, 1], f16), T([4], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([96, 4, 1, 1], f16), T([96], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 24, 48, 48], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([6, 144, 1, 1], f16), T([6], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([144, 6, 1, 1], f16), T([144], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([144, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([40, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 40, 24, 24], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([240, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([10, 240, 1, 1], f16), T([10], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([240, 10, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 80, 12, 12], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 480, 12, 12], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 4, ((T([128, 480, 1, 1], f16), T([20, 480, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 20, 1, 1], f16), T([480, 20, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 480, 12, 12], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 480, 12, 12], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([128, 480, 12, 12], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 112, 12, 12], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 4, ((T([128, 672, 1, 1], f16), T([28, 672, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 28, 1, 1], f16), T([672, 28, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 672, 12, 12], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([192, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 192, 6, 6], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 1152, 6, 6], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 5, ((T([128, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([128, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), T([1152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 1152, 6, 6], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1152, 6, 6], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([128, 1152, 6, 6], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 320, 6, 6], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1280, 6, 6], f16), T([128, 320, 6, 6], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 320, 6, 6], f16), T([128, 1152, 6, 6], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 1152, 1, 1], f16), T([128, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), [1152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 48, 1, 1], f16), T([128, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16), T([128, 192, 6, 6], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 192, 6, 6], f16), T([128, 1152, 6, 6], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 6, 6], f16), T([128, 672, 6, 6], f16), T([192, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 672, 1, 1], f16), T([128, 28, 1, 1], f16), T([672, 28, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 28, 1, 1], f16), T([128, 672, 1, 1], f16), T([28, 672, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([128, 672, 12, 12], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 4, ((T([128, 672, 12, 12], f16), T([128, 112, 12, 12], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 112, 12, 12], f16), T([128, 672, 12, 12], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), T([128, 672, 12, 12], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([128, 112, 12, 12], f16), T([128, 480, 12, 12], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 480, 1, 1], f16), T([128, 20, 1, 1], f16), T([480, 20, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 20, 1, 1], f16), T([128, 480, 1, 1], f16), T([20, 480, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 4, ((T([128, 480, 12, 12], f16), T([128, 80, 12, 12], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 80, 12, 12], f16), T([128, 480, 12, 12], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([128, 80, 12, 12], f16), T([128, 240, 12, 12], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 10, 1, 1], f16), T([240, 10, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([128, 240, 1, 1], f16), T([10, 240, 1, 1], f16), [10], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([128, 240, 24, 24], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 2, ((T([128, 240, 24, 24], f16), T([128, 40, 24, 24], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 24, 24], f16), T([128, 240, 24, 24], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([128, 240, 24, 24], f16), T([240, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([128, 40, 24, 24], f16), T([128, 144, 24, 24], f16), T([40, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([128, 6, 1, 1], f16), T([144, 6, 1, 1], f16), [144], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([128, 144, 1, 1], f16), T([6, 144, 1, 1], f16), [6], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([128, 144, 48, 48], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 2, ((T([128, 144, 48, 48], f16), T([128, 24, 48, 48], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 48, 48], f16), T([128, 144, 48, 48], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([128, 144, 48, 48], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 48, 48], f16), T([128, 96, 48, 48], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([128, 4, 1, 1], f16), T([96, 4, 1, 1], f16), [96], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([128, 96, 1, 1], f16), T([4, 96, 1, 1], f16), [4], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([128, 96, 96, 96], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([128, 96, 96, 96], f16), T([128, 16, 96, 96], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 32, 96, 96], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 8, 1, 1], f16), T([32, 8, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 32, 1, 1], f16), T([8, 32, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 3, 192, 192], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 192, 192], f16), T([128, 3, 192, 192], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1280, 6, 6], f16, stride=(1280, 1, 0, 0)), 36), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16, stride=(1152, 1, 0, 0)), 36), {}) +cnt: 1, ((T([128, 672, 6, 6], f16, stride=(672, 1, 0, 0)), 36), {}) +cnt: 3, ((T([128, 672, 12, 12], f16, stride=(672, 1, 0, 0)), 144), {}) +cnt: 4, ((T([128, 480, 12, 12], f16, stride=(480, 1, 0, 0)), 144), {}) +cnt: 1, ((T([128, 240, 12, 12], f16, stride=(240, 1, 0, 0)), 144), {}) +cnt: 1, ((T([128, 240, 24, 24], f16, stride=(240, 1, 0, 0)), 576), {}) +cnt: 1, ((T([128, 144, 24, 24], f16, stride=(144, 1, 0, 0)), 576), {}) +cnt: 1, ((T([128, 144, 48, 48], f16, stride=(144, 1, 0, 0)), 2304), {}) +cnt: 1, ((T([128, 96, 48, 48], f16, stride=(96, 1, 0, 0)), 2304), {}) +cnt: 1, ((T([128, 32, 96, 96], f16, stride=(32, 1, 0, 0)), 9216), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 32, 96, 96], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), [2, 3], True), {}) +cnt: 4, ((T([128, 480, 12, 12], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), [2, 3], True), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 1280, 6, 6], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([128, 32, 96, 96], f16), T([128, 32, 1, 1], f16)), {}) +cnt: 2, ((T([128, 96, 48, 48], f16), T([128, 96, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 48, 48], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 24, 24], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 24, 24], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 12, 12], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 8, ((T([128, 480, 12, 12], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 6, ((T([128, 672, 12, 12], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 2, ((T([128, 672, 6, 6], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16)), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([128, 672, 6, 6], f16)), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), T([128, 672, 12, 12], f16)), {}) +cnt: 4, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16)), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([128, 240, 12, 12], f16)), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), T([128, 240, 24, 24], f16)), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([128, 144, 24, 24], f16)), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), T([128, 144, 48, 48], f16)), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([128, 96, 48, 48], f16)), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([128, 32, 96, 96], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 96, 96], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 24, 48, 48], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 144, 48, 48], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), True, 0.1, 1e-05), {}) +cnt: 2, ((T([128, 40, 24, 24], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), True, 0.1, 1e-05), {}) +cnt: 3, ((T([128, 240, 24, 24], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 80, 12, 12], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 480, 12, 12], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), True, 0.1, 1e-05), {}) +cnt: 4, ((T([128, 112, 12, 12], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), True, 0.1, 1e-05), {}) +cnt: 7, ((T([128, 672, 12, 12], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), True, 0.1, 1e-05), {}) +cnt: 5, ((T([128, 192, 6, 6], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 320, 6, 6], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), True, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1280, 6, 6], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1280, 6, 6], f16), T([128, 1280, 6, 6], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 320, 6, 6], f16), T([128, 320, 6, 6], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), True, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), True, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([128, 192, 6, 6], f16), T([128, 192, 6, 6], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([128, 672, 6, 6], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([128, 672, 12, 12], f16), T([128, 672, 12, 12], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 112, 12, 12], f16), T([128, 112, 12, 12], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), True, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([128, 80, 12, 12], f16), T([128, 80, 12, 12], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([128, 240, 12, 12], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 240, 24, 24], f16), T([128, 240, 24, 24], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 40, 24, 24], f16), T([128, 40, 24, 24], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([128, 144, 24, 24], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([128, 144, 48, 48], f16), T([128, 144, 48, 48], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 24, 48, 48], f16), T([128, 24, 48, 48], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([128, 96, 48, 48], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 96, 96, 96], f16), T([128, 96, 96, 96], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 16, 96, 96], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 32, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 1, 1], f16),), {}) +cnt: 2, ((T([128, 144, 1, 1], f16),), {}) +cnt: 2, ((T([128, 240, 1, 1], f16),), {}) +cnt: 4, ((T([128, 480, 1, 1], f16),), {}) +cnt: 4, ((T([128, 672, 1, 1], f16),), {}) +cnt: 5, ((T([128, 1152, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 5, ((T([128, 1152, 1, 1], f16), T([128, 1152, 1, 1], f16)), {}) +cnt: 4, ((T([128, 672, 1, 1], f16), T([128, 672, 1, 1], f16)), {}) +cnt: 4, ((T([128, 480, 1, 1], f16), T([128, 480, 1, 1], f16)), {}) +cnt: 2, ((T([128, 240, 1, 1], f16), T([128, 240, 1, 1], f16)), {}) +cnt: 2, ((T([128, 144, 1, 1], f16), T([128, 144, 1, 1], f16)), {}) +cnt: 1, ((T([128, 96, 1, 1], f16), T([128, 96, 1, 1], f16)), {}) +cnt: 1, ((T([128, 32, 1, 1], f16), T([128, 32, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 2, ((T([128, 32, 96, 96], f16),), {}) +cnt: 1, ((T([128, 8, 1, 1], f16),), {}) +cnt: 1, ((T([128, 96, 96, 96], f16),), {}) +cnt: 1, ((T([128, 96, 48, 48], f16),), {}) +cnt: 1, ((T([128, 4, 1, 1], f16),), {}) +cnt: 3, ((T([128, 144, 48, 48], f16),), {}) +cnt: 2, ((T([128, 6, 1, 1], f16),), {}) +cnt: 1, ((T([128, 144, 24, 24], f16),), {}) +cnt: 3, ((T([128, 240, 24, 24], f16),), {}) +cnt: 2, ((T([128, 10, 1, 1], f16),), {}) +cnt: 1, ((T([128, 240, 12, 12], f16),), {}) +cnt: 8, ((T([128, 480, 12, 12], f16),), {}) +cnt: 4, ((T([128, 20, 1, 1], f16),), {}) +cnt: 7, ((T([128, 672, 12, 12], f16),), {}) +cnt: 4, ((T([128, 28, 1, 1], f16),), {}) +cnt: 1, ((T([128, 672, 6, 6], f16),), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16),), {}) +cnt: 5, ((T([128, 48, 1, 1], f16),), {}) +cnt: 1, ((T([128, 1280, 6, 6], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([128, 1280, 6, 6], f16), T([128, 1280, 6, 6], f16)), {}) +cnt: 5, ((T([128, 48, 1, 1], f16), T([128, 48, 1, 1], f16)), {}) +cnt: 10, ((T([128, 1152, 6, 6], f16), T([128, 1152, 6, 6], f16)), {}) +cnt: 4, ((T([128, 28, 1, 1], f16), T([128, 28, 1, 1], f16)), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), T([128, 672, 6, 6], f16)), {}) +cnt: 7, ((T([128, 672, 12, 12], f16), T([128, 672, 12, 12], f16)), {}) +cnt: 4, ((T([128, 20, 1, 1], f16), T([128, 20, 1, 1], f16)), {}) +cnt: 8, ((T([128, 480, 12, 12], f16), T([128, 480, 12, 12], f16)), {}) +cnt: 2, ((T([128, 10, 1, 1], f16), T([128, 10, 1, 1], f16)), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), T([128, 240, 12, 12], f16)), {}) +cnt: 3, ((T([128, 240, 24, 24], f16), T([128, 240, 24, 24], f16)), {}) +cnt: 2, ((T([128, 6, 1, 1], f16), T([128, 6, 1, 1], f16)), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), T([128, 144, 24, 24], f16)), {}) +cnt: 3, ((T([128, 144, 48, 48], f16), T([128, 144, 48, 48], f16)), {}) +cnt: 1, ((T([128, 4, 1, 1], f16), T([128, 4, 1, 1], f16)), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), T([128, 96, 48, 48], f16)), {}) +cnt: 1, ((T([128, 96, 96, 96], f16), T([128, 96, 96, 96], f16)), {}) +cnt: 1, ((T([128, 8, 1, 1], f16), T([128, 8, 1, 1], f16)), {}) +cnt: 2, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 5, ((T([128, 1152, 6, 6], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 672, 6, 6], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 672, 12, 12], f16), [2, 3], True), {}) +cnt: 4, ((T([128, 480, 12, 12], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 12, 12], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 240, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 144, 48, 48], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 96, 48, 48], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), [2, 3], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tnt_s_patch16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tnt_s_patch16_224_training.txt new file mode 100644 index 000000000..d7622dd4d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/tnt_s_patch16_224_training.txt @@ -0,0 +1,146 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([12544, 4, 16, 16], f16), -1, False), {}) +cnt: 12, ((T([64, 6, 197, 197], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 6, 197, 197], f16), T([64, 6, 197, 197], f16), -1, f16), {}) +cnt: 12, ((T([12544, 4, 16, 16], f16), T([12544, 4, 16, 16], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 1, ((T([64, 196, 384], f16), [12544, 24, 4, 4]), {}) +cnt: 1, ((T([12544, 16, 24], f16), [64, 196, 384]), {}) +cnt: 12, ((T([200704, 48], f16), [12544, 16, 48]), {}) +cnt: 12, ((T([200704, 24], f16), [12544, 16, 24]), {}) +cnt: 36, ((T([12544, 4, 16, 6], f16), [50176, 16, 6]), {}) +cnt: 12, ((T([12544, 4, 6, 16], f16), [50176, 6, 16]), {}) +cnt: 12, ((T([50176, 16, 16], f16), [12544, 4, 16, 16]), {}) +cnt: 12, ((T([50176, 16, 6], f16), [12544, 4, 16, 6]), {}) +cnt: 24, ((T([12544, 16, 4, 6], f16), [12544, 16, 24]), {}) +cnt: 12, ((T([12608, 768], f16), [64, 197, 768]), {}) +cnt: 12, ((T([12608, 384], f16), [64, 197, 384]), {}) +cnt: 36, ((T([64, 6, 197, 64], f16), [384, 197, 64]), {}) +cnt: 12, ((T([64, 6, 64, 197], f16), [384, 64, 197]), {}) +cnt: 12, ((T([384, 197, 197], f16), [64, 6, 197, 197]), {}) +cnt: 12, ((T([384, 197, 64], f16), [64, 6, 197, 64]), {}) +cnt: 24, ((T([64, 197, 6, 64], f16), [64, 197, 384]), {}) +cnt: 12, ((T([64, 197, 2, 6, 64], f16), [64, 197, 768]), {}) +cnt: 12, ((T([64, 196, 384], f16), [12544, 384]), {}) +cnt: 12, ((T([12544, 16, 2, 4, 6], f16), [12544, 16, 48]), {}) +cnt: 1, ((T([12544, 24, 4, 4], f16), [64, 196, 384]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([12544, 24, 4, 4], f16), T([1, 24, 4, 4], f16)), {}) +cnt: 1, ((T([64, 197, 384], f16), T([1, 197, 384], f16)), {}) +cnt: 24, ((T([12544, 16, 24], f16, stride=(384, 1, 16)), T([12544, 16, 24], f16)), {}) +cnt: 12, ((T([64, 196, 384], f16, stride=(75648, 384, 1)), T([64, 196, 384], f16)), {}) +cnt: 72, ((T([64, 197, 384], f16), T([64, 197, 384], f16)), {}) +cnt: 48, ((T([12544, 16, 24], f16), T([12544, 16, 24], f16)), {}) +Operator: aten.addmm.default +cnt: 13, ((T([384], f16), T([12544, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 12, ((T([24], f16), T([200704, 24], f16), T([24, 24], f16, stride=(1, 24))), {}) +cnt: 12, ((T([96], f16), T([200704, 24], f16), T([24, 96], f16, stride=(1, 24))), {}) +cnt: 12, ((T([24], f16), T([200704, 96], f16), T([96, 24], f16, stride=(1, 96))), {}) +cnt: 12, ((T([384], f16), T([12608, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 12, ((T([1536], f16), T([12608, 384], f16), T([384, 1536], f16, stride=(1, 384))), {}) +cnt: 12, ((T([384], f16), T([12608, 1536], f16), T([1536, 384], f16, stride=(1, 1536))), {}) +cnt: 1, ((T([1000], f16), T([64, 384], f16, stride=(75648, 1)), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([50176, 16, 6], f16), T([50176, 6, 16], f16)), {}) +cnt: 12, ((T([50176, 16, 16], f16), T([50176, 16, 6], f16)), {}) +cnt: 12, ((T([384, 197, 64], f16), T([384, 64, 197], f16)), {}) +cnt: 12, ((T([384, 197, 197], f16), T([384, 197, 64], f16)), {}) +cnt: 12, ((T([384, 197, 197], f16, stride=(38809, 1, 197)), T([384, 197, 64], f16)), {}) +cnt: 12, ((T([384, 197, 64], f16), T([384, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 12, ((T([384, 64, 197], f16, stride=(12608, 1, 64)), T([384, 197, 197], f16)), {}) +cnt: 12, ((T([384, 197, 197], f16), T([384, 197, 64], f16, stride=(12608, 1, 197))), {}) +cnt: 12, ((T([50176, 16, 16], f16, stride=(256, 1, 16)), T([50176, 16, 6], f16)), {}) +cnt: 12, ((T([50176, 16, 6], f16), T([50176, 6, 16], f16, stride=(96, 1, 6))), {}) +cnt: 12, ((T([50176, 6, 16], f16, stride=(96, 1, 6)), T([50176, 16, 16], f16)), {}) +cnt: 12, ((T([50176, 16, 16], f16), T([50176, 16, 6], f16, stride=(96, 1, 16))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 384], f16, stride=(0, 384, 1)), T([64, 196, 384], f16)], 1), {}) +cnt: 12, (([T([64, 1, 384], f16, stride=(75648, 384, 1)), T([64, 196, 384], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([24, 3, 7, 7], f16), T([24], f16), [4, 4], [3, 3], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 24, 56, 56], f16), T([64, 3, 224, 224], f16), T([24, 3, 7, 7], f16), [24], [4, 4], [3, 3], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.gelu.default +cnt: 12, ((T([12544, 16, 96], f16),), {}) +cnt: 12, ((T([64, 197, 1536], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 197, 1536], f16), T([64, 197, 1536], f16)), {}) +cnt: 12, ((T([12544, 16, 96], f16), T([12544, 16, 96], f16)), {}) +Operator: aten.im2col.default +cnt: 1, ((T([64, 24, 56, 56], f16), [4, 4], [1, 1], [0, 0], [4, 4]), {}) +Operator: aten.im2col_backward.default +cnt: 1, ((T([64, 384, 196], f16, stride=(75264, 1, 384)), [56, 56], [4, 4], [1, 1], [0, 0], [4, 4]), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mm.default +cnt: 12, ((T([200704, 24], f16), T([24, 48], f16, stride=(1, 24))), {}) +cnt: 12, ((T([200704, 24], f16), T([24, 24], f16, stride=(1, 24))), {}) +cnt: 12, ((T([12608, 384], f16), T([384, 768], f16, stride=(1, 384))), {}) +cnt: 12, ((T([12608, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 384], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 384], f16, stride=(75648, 1))), {}) +cnt: 12, ((T([12608, 384], f16), T([384, 1536], f16)), {}) +cnt: 12, ((T([384, 12608], f16, stride=(1, 384)), T([12608, 1536], f16)), {}) +cnt: 12, ((T([12608, 1536], f16), T([1536, 384], f16)), {}) +cnt: 12, ((T([1536, 12608], f16, stride=(1, 1536)), T([12608, 384], f16)), {}) +cnt: 24, ((T([12608, 384], f16), T([384, 384], f16)), {}) +cnt: 24, ((T([384, 12608], f16, stride=(1, 384)), T([12608, 384], f16)), {}) +cnt: 12, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 384], f16)), {}) +cnt: 12, ((T([12608, 768], f16), T([768, 384], f16)), {}) +cnt: 13, ((T([12544, 384], f16), T([384, 384], f16)), {}) +cnt: 13, ((T([384, 12544], f16, stride=(1, 384)), T([12544, 384], f16)), {}) +cnt: 12, ((T([200704, 24], f16), T([24, 96], f16)), {}) +cnt: 12, ((T([24, 200704], f16, stride=(1, 24)), T([200704, 96], f16)), {}) +cnt: 12, ((T([200704, 96], f16), T([96, 24], f16)), {}) +cnt: 12, ((T([96, 200704], f16, stride=(1, 96)), T([200704, 24], f16)), {}) +cnt: 24, ((T([200704, 24], f16), T([24, 24], f16)), {}) +cnt: 24, ((T([24, 200704], f16, stride=(1, 24)), T([200704, 24], f16)), {}) +cnt: 12, ((T([48, 200704], f16, stride=(1, 48)), T([200704, 24], f16)), {}) +cnt: 12, ((T([200704, 48], f16), T([48, 24], f16)), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([12544, 4, 16, 16], f16), 0.408248290463863), {}) +cnt: 24, ((T([64, 6, 197, 197], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 2, ((T([64, 196, 384], f16), [384], T([384], f16), T([384], f16), 1e-05), {}) +cnt: 36, ((T([12544, 16, 24], f16, stride=(384, 1, 16)), [24], T([24], f16), T([24], f16), 1e-05), {}) +cnt: 25, ((T([64, 197, 384], f16), [384], T([384], f16), T([384], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 197, 384], f16), T([64, 197, 384], f16), [384], T([64, 197, 1], f32), T([64, 197, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 36, ((T([12544, 16, 24], f16), T([12544, 16, 24], f16, stride=(384, 1, 16)), [24], T([12544, 16, 1], f32), T([12544, 16, 1], f32), T([24], f16), T([24], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 196, 384], f16, stride=(75648, 384, 1)), T([64, 196, 384], f16), [384], T([64, 196, 1], f32), T([64, 196, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 1, ((T([64, 196, 384], f16), T([64, 196, 384], f16), [384], T([64, 196, 1], f32), T([64, 196, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 384], f16), [64, 197, 384], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 25, ((T([64, 197, 384], f16), [64, 197, 384], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([64, 196, 384], f16, stride=(75648, 384, 1)), [64, 197, 384], 1, 1, 9223372036854775807, 1), {}) +cnt: 12, ((T([64, 1, 384], f16, stride=(75648, 384, 1)), [64, 197, 384], 1, 0, 1, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([64, 6, 197, 64], f16), T([64, 6, 197, 64], f16, stride=(75648, 12608, 1, 197))],), {}) +cnt: 12, (([T([12544, 4, 16, 6], f16), T([12544, 4, 16, 6], f16, stride=(384, 96, 1, 16))],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 24, ((T([12608, 384], f16), [0], True), {}) +cnt: 12, ((T([12608, 1536], f16), [0], True), {}) +cnt: 13, ((T([12544, 384], f16), [0], True), {}) +cnt: 24, ((T([200704, 24], f16), [0], True), {}) +cnt: 12, ((T([200704, 96], f16), [0], True), {}) +cnt: 1, ((T([64, 197, 384], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 384], f16, stride=(75648, 384, 1)), [0], True), {}) +cnt: 1, ((T([12544, 24, 4, 4], f16, stride=(384, 1, 96, 24)), [0], True), {}) +Operator: aten.unbind.int +cnt: 12, ((T([2, 12544, 4, 16, 6], f16, stride=(24, 768, 6, 48, 1)),), {}) +cnt: 12, ((T([2, 64, 6, 197, 64], f16, stride=(384, 151296, 64, 768, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/twins_pcpvt_base_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/twins_pcpvt_base_training.txt new file mode 100644 index 000000000..f3a99cba2 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/twins_pcpvt_base_training.txt @@ -0,0 +1,245 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([32, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([32, 1000], f16), T([32, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 3, ((T([32, 1, 3136, 49], f16), -1, False), {}) +cnt: 4, ((T([32, 2, 784, 49], f16), -1, False), {}) +cnt: 18, ((T([32, 5, 196, 49], f16), -1, False), {}) +cnt: 3, ((T([32, 8, 49, 49], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 3, ((T([32, 8, 49, 49], f16), T([32, 8, 49, 49], f16), -1, f16), {}) +cnt: 18, ((T([32, 5, 196, 49], f16), T([32, 5, 196, 49], f16), -1, f16), {}) +cnt: 4, ((T([32, 2, 784, 49], f16), T([32, 2, 784, 49], f16), -1, f16), {}) +cnt: 3, ((T([32, 1, 3136, 49], f16), T([32, 1, 3136, 49], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 3, ((T([32, 3136, 49], f16), [32, 1, 3136, 49]), {}) +cnt: 3, ((T([32, 3136, 64], f16), [32, 1, 3136, 64]), {}) +cnt: 8, ((T([32, 2, 784, 64], f16), [64, 784, 64]), {}) +cnt: 4, ((T([32, 2, 64, 49], f16), [64, 64, 49]), {}) +cnt: 4, ((T([64, 784, 49], f16), [32, 2, 784, 49]), {}) +cnt: 4, ((T([32, 2, 49, 64], f16), [64, 49, 64]), {}) +cnt: 4, ((T([64, 784, 64], f16), [32, 2, 784, 64]), {}) +cnt: 8, ((T([32, 784, 2, 64], f16), [32, 784, 128]), {}) +cnt: 36, ((T([32, 5, 196, 64], f16), [160, 196, 64]), {}) +cnt: 18, ((T([32, 5, 64, 49], f16), [160, 64, 49]), {}) +cnt: 18, ((T([160, 196, 49], f16), [32, 5, 196, 49]), {}) +cnt: 18, ((T([32, 5, 49, 64], f16), [160, 49, 64]), {}) +cnt: 18, ((T([160, 196, 64], f16), [32, 5, 196, 64]), {}) +cnt: 36, ((T([32, 196, 5, 64], f16), [32, 196, 320]), {}) +cnt: 9, ((T([32, 8, 49, 64], f16), [256, 49, 64]), {}) +cnt: 3, ((T([32, 8, 64, 49], f16), [256, 64, 49]), {}) +cnt: 3, ((T([256, 49, 49], f16), [32, 8, 49, 49]), {}) +cnt: 3, ((T([256, 49, 64], f16), [32, 8, 49, 64]), {}) +cnt: 6, ((T([32, 49, 8, 64], f16), [32, 49, 512]), {}) +cnt: 3, ((T([32, 49, 2, 8, 64], f16), [32, 49, 1024]), {}) +cnt: 36, ((T([32, 196, 320], f16), [6272, 320]), {}) +cnt: 18, ((T([32, 49, 2, 5, 64], f16), [32, 49, 640]), {}) +cnt: 8, ((T([32, 784, 128], f16), [25088, 128]), {}) +cnt: 4, ((T([32, 49, 2, 2, 64], f16), [32, 49, 256]), {}) +cnt: 6, ((T([32, 3136, 64], f16), [100352, 64]), {}) +cnt: 3, ((T([32, 49, 2, 1, 64], f16), [32, 49, 128]), {}) +Operator: aten.add.Tensor +cnt: 9, ((T([32, 3136, 64], f16), T([32, 3136, 64], f16)), {}) +cnt: 12, ((T([32, 784, 128], f16), T([32, 784, 128], f16)), {}) +cnt: 54, ((T([32, 196, 320], f16), T([32, 196, 320], f16)), {}) +cnt: 15, ((T([32, 49, 512], f16), T([32, 49, 512], f16)), {}) +cnt: 3, ((T([2, 32, 8, 49, 64], f16), T([2, 32, 8, 49, 64], f16)), {}) +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512))), {}) +cnt: 36, ((T([32, 196, 320], f16, stride=(62720, 1, 196)), T([32, 196, 320], f16)), {}) +cnt: 18, ((T([2, 32, 5, 49, 64], f16), T([2, 32, 5, 49, 64], f16)), {}) +cnt: 1, ((T([32, 320, 14, 14], f16), T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320))), {}) +cnt: 8, ((T([32, 784, 128], f16, stride=(100352, 1, 784)), T([32, 784, 128], f16)), {}) +cnt: 4, ((T([2, 32, 2, 49, 64], f16), T([2, 32, 2, 49, 64], f16)), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128))), {}) +cnt: 6, ((T([32, 3136, 64], f16, stride=(200704, 1, 3136)), T([32, 3136, 64], f16)), {}) +cnt: 3, ((T([2, 32, 1, 49, 64], f16), T([2, 32, 1, 49, 64], f16)), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64))), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64))), {}) +cnt: 1, ((T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128))), {}) +cnt: 1, ((T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320))), {}) +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512))), {}) +Operator: aten.addmm.default +cnt: 6, ((T([64], f16), T([100352, 64], f16), T([64, 64], f16, stride=(1, 64))), {}) +cnt: 3, ((T([128], f16), T([1568, 64], f16), T([64, 128], f16, stride=(1, 64))), {}) +cnt: 3, ((T([512], f16), T([100352, 64], f16), T([64, 512], f16, stride=(1, 64))), {}) +cnt: 3, ((T([64], f16), T([100352, 512], f16), T([512, 64], f16, stride=(1, 512))), {}) +cnt: 8, ((T([128], f16), T([25088, 128], f16), T([128, 128], f16, stride=(1, 128))), {}) +cnt: 4, ((T([256], f16), T([1568, 128], f16), T([128, 256], f16, stride=(1, 128))), {}) +cnt: 4, ((T([1024], f16), T([25088, 128], f16), T([128, 1024], f16, stride=(1, 128))), {}) +cnt: 4, ((T([128], f16), T([25088, 1024], f16), T([1024, 128], f16, stride=(1, 1024))), {}) +cnt: 36, ((T([320], f16), T([6272, 320], f16), T([320, 320], f16, stride=(1, 320))), {}) +cnt: 18, ((T([640], f16), T([1568, 320], f16), T([320, 640], f16, stride=(1, 320))), {}) +cnt: 18, ((T([1280], f16), T([6272, 320], f16), T([320, 1280], f16, stride=(1, 320))), {}) +cnt: 18, ((T([320], f16), T([6272, 1280], f16), T([1280, 320], f16, stride=(1, 1280))), {}) +cnt: 6, ((T([512], f16), T([1568, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 3, ((T([1024], f16), T([1568, 512], f16), T([512, 1024], f16, stride=(1, 512))), {}) +cnt: 3, ((T([2048], f16), T([1568, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 3, ((T([512], f16), T([1568, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 1, ((T([1000], f16), T([32, 512], f16), T([512, 1000], f16, stride=(1, 512))), {}) +Operator: aten.bmm.default +cnt: 6, ((T([32, 3136, 64], f16), T([32, 64, 49], f16, stride=(6272, 1, 128))), {}) +cnt: 6, ((T([32, 3136, 49], f16), T([32, 49, 64], f16, stride=(6272, 128, 1))), {}) +cnt: 4, ((T([64, 784, 64], f16), T([64, 64, 49], f16)), {}) +cnt: 4, ((T([64, 784, 49], f16), T([64, 49, 64], f16)), {}) +cnt: 18, ((T([160, 196, 64], f16), T([160, 64, 49], f16)), {}) +cnt: 18, ((T([160, 196, 49], f16), T([160, 49, 64], f16)), {}) +cnt: 3, ((T([256, 49, 64], f16), T([256, 64, 49], f16)), {}) +cnt: 3, ((T([256, 49, 49], f16), T([256, 49, 64], f16)), {}) +cnt: 3, ((T([256, 49, 49], f16, stride=(2401, 1, 49)), T([256, 49, 64], f16)), {}) +cnt: 3, ((T([256, 49, 64], f16), T([256, 64, 49], f16, stride=(3136, 1, 64))), {}) +cnt: 3, ((T([256, 64, 49], f16, stride=(3136, 1, 64)), T([256, 49, 49], f16)), {}) +cnt: 3, ((T([256, 49, 49], f16), T([256, 49, 64], f16, stride=(3136, 1, 49))), {}) +cnt: 18, ((T([160, 49, 196], f16, stride=(9604, 1, 49)), T([160, 196, 64], f16)), {}) +cnt: 18, ((T([160, 196, 64], f16), T([160, 64, 49], f16, stride=(3136, 1, 64))), {}) +cnt: 18, ((T([160, 64, 196], f16, stride=(12544, 1, 64)), T([160, 196, 49], f16)), {}) +cnt: 18, ((T([160, 196, 49], f16), T([160, 49, 64], f16, stride=(3136, 1, 49))), {}) +cnt: 4, ((T([64, 49, 784], f16, stride=(38416, 1, 49)), T([64, 784, 64], f16)), {}) +cnt: 4, ((T([64, 784, 64], f16), T([64, 64, 49], f16, stride=(3136, 1, 64))), {}) +cnt: 4, ((T([64, 64, 784], f16, stride=(50176, 1, 64)), T([64, 784, 49], f16)), {}) +cnt: 4, ((T([64, 784, 49], f16), T([64, 49, 64], f16, stride=(3136, 1, 49))), {}) +cnt: 3, ((T([32, 49, 3136], f16, stride=(153664, 1, 49)), T([32, 3136, 64], f16)), {}) +cnt: 3, ((T([32, 64, 3136], f16, stride=(200704, 1, 64)), T([32, 3136, 49], f16)), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 4, 4], f16), T([64], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([64, 64, 8, 8], f16), T([64], f16), [8, 8], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([64, 1, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([128, 64, 2, 2], f16), T([128], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 128, 4, 4], f16), T([128], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 1, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 128), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([320, 128, 2, 2], f16), T([320], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 18, ((T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([320, 320, 2, 2], f16), T([320], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([320, 1, 3, 3], f16), T([320], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 320), {}) +cnt: 1, ((T([32, 320, 14, 14], f16), T([512, 320, 2, 2], f16), T([512], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([512, 1, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 512), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([512, 1, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 512, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 7, 7], f16, stride=(25088, 1, 3584, 512)), T([32, 320, 14, 14], f16), T([512, 320, 2, 2], f16), [512], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 18, ((T([32, 320, 7, 7], f16, stride=(15680, 1, 2240, 320)), T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([320, 320, 2, 2], f16), [320], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 320, 14, 14], f16), T([32, 320, 14, 14], f16, stride=(62720, 1, 4480, 320)), T([320, 1, 3, 3], f16), [320], [1, 1], [1, 1], [1, 1], False, [0, 0], 320, [True, True, True]), {}) +cnt: 1, ((T([32, 320, 14, 14], f16), T([32, 128, 28, 28], f16), T([320, 128, 2, 2], f16), [320], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([32, 128, 7, 7], f16, stride=(6272, 1, 896, 128)), T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 128, 4, 4], f16), [128], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([32, 128, 28, 28], f16, stride=(100352, 1, 3584, 128)), T([128, 1, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 128, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([32, 64, 56, 56], f16), T([128, 64, 2, 2], f16), [128], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 64, 7, 7], f16, stride=(3136, 1, 448, 64)), T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([64, 64, 8, 8], f16), [64], [8, 8], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16, stride=(200704, 1, 3584, 64)), T([64, 1, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 64, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 3, 224, 224], f16), T([64, 3, 4, 4], f16), [64], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +cnt: 18, ((T([320, 320, 2, 2], f16), T([320, 320, 2, 2], f16, stride=(1280, 1, 640, 320))), {}) +cnt: 4, ((T([128, 128, 4, 4], f16), T([128, 128, 4, 4], f16, stride=(2048, 1, 512, 128))), {}) +cnt: 3, ((T([64, 64, 8, 8], f16), T([64, 64, 8, 8], f16, stride=(4096, 1, 512, 64))), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 49, 512], f16, stride=(512, 0, 1)), 49), {}) +Operator: aten.gelu.default +cnt: 3, ((T([32, 3136, 512], f16),), {}) +cnt: 4, ((T([32, 784, 1024], f16),), {}) +cnt: 18, ((T([32, 196, 1280], f16),), {}) +cnt: 3, ((T([32, 49, 2048], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 3, ((T([32, 49, 2048], f16), T([32, 49, 2048], f16)), {}) +cnt: 18, ((T([32, 196, 1280], f16), T([32, 196, 1280], f16)), {}) +cnt: 4, ((T([32, 784, 1024], f16), T([32, 784, 1024], f16)), {}) +cnt: 3, ((T([32, 3136, 512], f16), T([32, 3136, 512], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([32], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 49, 512], f16), [1]), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16), T([1000, 512], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(1, 1000)), T([32, 512], f16)), {}) +cnt: 3, ((T([1568, 512], f16), T([512, 2048], f16)), {}) +cnt: 3, ((T([512, 1568], f16, stride=(1, 512)), T([1568, 2048], f16)), {}) +cnt: 3, ((T([1568, 2048], f16), T([2048, 512], f16)), {}) +cnt: 3, ((T([2048, 1568], f16, stride=(1, 2048)), T([1568, 512], f16)), {}) +cnt: 6, ((T([1568, 512], f16), T([512, 512], f16)), {}) +cnt: 6, ((T([512, 1568], f16, stride=(1, 512)), T([1568, 512], f16)), {}) +cnt: 3, ((T([1568, 1024], f16), T([1024, 512], f16)), {}) +cnt: 3, ((T([1024, 1568], f16, stride=(1, 1024)), T([1568, 512], f16)), {}) +cnt: 18, ((T([6272, 320], f16), T([320, 1280], f16)), {}) +cnt: 18, ((T([320, 6272], f16, stride=(1, 320)), T([6272, 1280], f16)), {}) +cnt: 18, ((T([6272, 1280], f16), T([1280, 320], f16)), {}) +cnt: 18, ((T([1280, 6272], f16, stride=(1, 1280)), T([6272, 320], f16)), {}) +cnt: 36, ((T([6272, 320], f16), T([320, 320], f16)), {}) +cnt: 36, ((T([320, 6272], f16, stride=(1, 320)), T([6272, 320], f16)), {}) +cnt: 18, ((T([1568, 640], f16), T([640, 320], f16)), {}) +cnt: 18, ((T([640, 1568], f16, stride=(1, 640)), T([1568, 320], f16)), {}) +cnt: 4, ((T([25088, 128], f16), T([128, 1024], f16)), {}) +cnt: 4, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 1024], f16)), {}) +cnt: 4, ((T([25088, 1024], f16), T([1024, 128], f16)), {}) +cnt: 4, ((T([1024, 25088], f16, stride=(1, 1024)), T([25088, 128], f16)), {}) +cnt: 8, ((T([25088, 128], f16), T([128, 128], f16)), {}) +cnt: 8, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 128], f16)), {}) +cnt: 4, ((T([1568, 256], f16), T([256, 128], f16)), {}) +cnt: 4, ((T([256, 1568], f16, stride=(1, 256)), T([1568, 128], f16)), {}) +cnt: 3, ((T([100352, 64], f16), T([64, 512], f16)), {}) +cnt: 3, ((T([64, 100352], f16, stride=(1, 64)), T([100352, 512], f16)), {}) +cnt: 3, ((T([100352, 512], f16), T([512, 64], f16)), {}) +cnt: 3, ((T([512, 100352], f16, stride=(1, 512)), T([100352, 64], f16)), {}) +cnt: 6, ((T([100352, 64], f16), T([64, 64], f16)), {}) +cnt: 6, ((T([64, 100352], f16, stride=(1, 64)), T([100352, 64], f16)), {}) +cnt: 3, ((T([1568, 128], f16), T([128, 64], f16)), {}) +cnt: 3, ((T([128, 1568], f16, stride=(1, 128)), T([1568, 64], f16)), {}) +Operator: aten.mul.Tensor +cnt: 6, ((T([32, 1, 3136, 49], f16), 0.125), {}) +cnt: 8, ((T([32, 2, 784, 49], f16), 0.125), {}) +cnt: 36, ((T([32, 5, 196, 49], f16), 0.125), {}) +cnt: 6, ((T([32, 8, 49, 49], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 1, ((T([32, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([64], f16), T([64], f16), 1e-05), {}) +cnt: 6, ((T([32, 3136, 64], f16), [64], T([64], f16), T([64], f16), 1e-06), {}) +cnt: 3, ((T([32, 49, 64], f16), [64], T([64], f16), T([64], f16), 1e-05), {}) +cnt: 1, ((T([32, 784, 128], f16, stride=(100352, 1, 784)), [128], T([128], f16), T([128], f16), 1e-05), {}) +cnt: 8, ((T([32, 784, 128], f16), [128], T([128], f16), T([128], f16), 1e-06), {}) +cnt: 4, ((T([32, 49, 128], f16), [128], T([128], f16), T([128], f16), 1e-05), {}) +cnt: 1, ((T([32, 196, 320], f16, stride=(62720, 1, 196)), [320], T([320], f16), T([320], f16), 1e-05), {}) +cnt: 36, ((T([32, 196, 320], f16), [320], T([320], f16), T([320], f16), 1e-06), {}) +cnt: 18, ((T([32, 49, 320], f16), [320], T([320], f16), T([320], f16), 1e-05), {}) +cnt: 1, ((T([32, 49, 512], f16, stride=(25088, 1, 49)), [512], T([512], f16), T([512], f16), 1e-05), {}) +cnt: 7, ((T([32, 49, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 7, ((T([32, 49, 512], f16), T([32, 49, 512], f16), [512], T([32, 49, 1], f32), T([32, 49, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 1, ((T([32, 49, 512], f16), T([32, 49, 512], f16, stride=(25088, 1, 49)), [512], T([32, 49, 1], f32), T([32, 49, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 36, ((T([32, 196, 320], f16), T([32, 196, 320], f16), [320], T([32, 196, 1], f32), T([32, 196, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {}) +cnt: 18, ((T([32, 49, 320], f16), T([32, 49, 320], f16), [320], T([32, 49, 1], f32), T([32, 49, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {}) +cnt: 1, ((T([32, 196, 320], f16, stride=(62720, 1, 196)), T([32, 196, 320], f16, stride=(62720, 1, 196)), [320], T([32, 196, 1], f32), T([32, 196, 1], f32), T([320], f16), T([320], f16), [True, True, True]), {}) +cnt: 8, ((T([32, 784, 128], f16), T([32, 784, 128], f16), [128], T([32, 784, 1], f32), T([32, 784, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 4, ((T([32, 49, 128], f16), T([32, 49, 128], f16), [128], T([32, 49, 1], f32), T([32, 49, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 1, ((T([32, 784, 128], f16, stride=(100352, 1, 784)), T([32, 784, 128], f16, stride=(100352, 1, 784)), [128], T([32, 784, 1], f32), T([32, 784, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 6, ((T([32, 3136, 64], f16), T([32, 3136, 64], f16), [64], T([32, 3136, 1], f32), T([32, 3136, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {}) +cnt: 3, ((T([32, 49, 64], f16), T([32, 49, 64], f16), [64], T([32, 49, 1], f32), T([32, 49, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {}) +cnt: 1, ((T([32, 3136, 64], f16, stride=(200704, 1, 3136)), T([32, 3136, 64], f16, stride=(200704, 1, 3136)), [64], T([32, 3136, 1], f32), T([32, 3136, 1], f32), T([64], f16), T([64], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 18, ((T([320, 320, 2, 2], f16, stride=(1280, 1, 640, 320)), [320, 320, 2, 2], [1280, 4, 2, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 4, ((T([128, 128, 4, 4], f16, stride=(2048, 1, 512, 128)), [128, 128, 4, 4], [2048, 16, 4, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 3, ((T([64, 64, 8, 8], f16, stride=(4096, 1, 512, 64)), [64, 64, 8, 8], [4096, 64, 8, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([32, 1000], f16), T([32], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([32, 1000], f16), T([32], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 3, ((T([32, 8, 49, 64], f16), [2, 32, 8, 49, 64], 0, 1), {}) +cnt: 3, ((T([32, 8, 49, 64], f16, stride=(25088, 3136, 1, 49)), [2, 32, 8, 49, 64], 0, 0), {}) +cnt: 18, ((T([32, 5, 49, 64], f16), [2, 32, 5, 49, 64], 0, 1), {}) +cnt: 18, ((T([32, 5, 49, 64], f16, stride=(15680, 3136, 1, 49)), [2, 32, 5, 49, 64], 0, 0), {}) +cnt: 4, ((T([32, 2, 49, 64], f16), [2, 32, 2, 49, 64], 0, 1), {}) +cnt: 4, ((T([32, 2, 49, 64], f16, stride=(6272, 3136, 1, 49)), [2, 32, 2, 49, 64], 0, 0), {}) +cnt: 3, ((T([32, 1, 49, 64], f16), [2, 32, 1, 49, 64], 0, 1), {}) +cnt: 3, ((T([32, 1, 49, 64], f16, stride=(3136, 3136, 1, 49)), [2, 32, 1, 49, 64], 0, 0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16), [0], True), {}) +cnt: 9, ((T([1568, 512], f16), [0], True), {}) +cnt: 3, ((T([1568, 2048], f16), [0], True), {}) +cnt: 3, ((T([1568, 1024], f16), [0], True), {}) +cnt: 54, ((T([6272, 320], f16), [0], True), {}) +cnt: 18, ((T([6272, 1280], f16), [0], True), {}) +cnt: 18, ((T([1568, 640], f16), [0], True), {}) +cnt: 12, ((T([25088, 128], f16), [0], True), {}) +cnt: 4, ((T([25088, 1024], f16), [0], True), {}) +cnt: 4, ((T([1568, 256], f16), [0], True), {}) +cnt: 9, ((T([100352, 64], f16), [0], True), {}) +cnt: 3, ((T([100352, 512], f16), [0], True), {}) +cnt: 3, ((T([1568, 128], f16), [0], True), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/visformer_small_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/visformer_small_training.txt new file mode 100644 index 000000000..76ef9f176 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/visformer_small_training.txt @@ -0,0 +1,132 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([128, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 4, ((T([128, 6, 196, 196], f16), -1, False), {}) +cnt: 4, ((T([128, 6, 49, 49], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 4, ((T([128, 6, 49, 49], f16), T([128, 6, 49, 49], f16), -1, f16), {}) +cnt: 4, ((T([128, 6, 196, 196], f16), T([128, 6, 196, 196], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 8, ((T([128, 6, 196, 64], f16), [768, 196, 64]), {}) +cnt: 4, ((T([128, 6, 64, 196], f16), [768, 64, 196]), {}) +cnt: 4, ((T([768, 196, 196], f16), [128, 6, 196, 196]), {}) +cnt: 4, ((T([768, 196, 64], f16), [128, 6, 196, 64]), {}) +cnt: 4, ((T([128, 6, 64, 196], f16), [128, 384, 14, 14]), {}) +cnt: 8, ((T([128, 6, 49, 128], f16), [768, 49, 128]), {}) +cnt: 4, ((T([128, 6, 128, 49], f16), [768, 128, 49]), {}) +cnt: 4, ((T([768, 49, 49], f16), [128, 6, 49, 49]), {}) +cnt: 4, ((T([768, 49, 128], f16), [128, 6, 49, 128]), {}) +cnt: 4, ((T([128, 6, 128, 49], f16), [128, 768, 7, 7]), {}) +cnt: 4, ((T([128, 3, 6, 128, 49], f16), [128, 2304, 7, 7]), {}) +cnt: 4, ((T([128, 3, 6, 64, 196], f16), [128, 1152, 14, 14]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128, 192, 28, 28], f16), T([1, 192, 28, 28], f16)), {}) +cnt: 14, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16)), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([1, 384, 14, 14], f16)), {}) +cnt: 16, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16)), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([1, 768, 7, 7], f16)), {}) +cnt: 16, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16)), {}) +Operator: aten.add_.Tensor +cnt: 28, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 768], f16), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 4, ((T([768, 196, 64], f16), T([768, 64, 196], f16)), {}) +cnt: 4, ((T([768, 196, 196], f16), T([768, 196, 64], f16)), {}) +cnt: 4, ((T([768, 49, 128], f16), T([768, 128, 49], f16)), {}) +cnt: 4, ((T([768, 49, 49], f16), T([768, 49, 128], f16)), {}) +cnt: 4, ((T([768, 49, 49], f16, stride=(2401, 1, 49)), T([768, 49, 128], f16, stride=(6272, 1, 49))), {}) +cnt: 4, ((T([768, 49, 128], f16, stride=(6272, 1, 49)), T([768, 128, 49], f16, stride=(6272, 1, 128))), {}) +cnt: 4, ((T([768, 128, 49], f16, stride=(6272, 1, 128)), T([768, 49, 49], f16)), {}) +cnt: 4, ((T([768, 49, 49], f16), T([768, 49, 128], f16, stride=(6272, 1, 49))), {}) +cnt: 4, ((T([768, 196, 196], f16, stride=(38416, 1, 196)), T([768, 196, 64], f16, stride=(12544, 1, 196))), {}) +cnt: 4, ((T([768, 196, 64], f16, stride=(12544, 1, 196)), T([768, 64, 196], f16, stride=(12544, 1, 64))), {}) +cnt: 4, ((T([768, 64, 196], f16, stride=(12544, 1, 64)), T([768, 196, 196], f16)), {}) +cnt: 4, ((T([768, 196, 196], f16), T([768, 196, 64], f16, stride=(12544, 1, 196))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([32, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([192, 32, 4, 4], f16), T([192], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 192, 28, 28], f16), T([384, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 384, 28, 28], f16), T([384, 48, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 7, ((T([128, 384, 28, 28], f16), T([192, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([384, 192, 2, 2], f16), T([384], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([1152, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([384, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([768, 384, 2, 2], f16), T([768], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 7, 7], f16), T([2304, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 7, 7], f16), T([768, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 768, 7, 7], f16), T([3072, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([128, 3072, 7, 7], f16), T([768, 3072, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 4, ((T([128, 768, 7, 7], f16), T([128, 3072, 7, 7], f16), T([768, 3072, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 3072, 7, 7], f16), T([128, 768, 7, 7], f16), T([3072, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16), T([768, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 2304, 7, 7], f16), T([128, 768, 7, 7], f16), T([2304, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), T([128, 384, 14, 14], f16), T([768, 384, 2, 2], f16), [768], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([128, 1536, 14, 14], f16), T([384, 1536, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 1536, 14, 14], f16), T([128, 384, 14, 14], f16), T([1536, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 1152, 14, 14], f16), T([128, 384, 14, 14], f16), T([1152, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), T([128, 192, 28, 28], f16), T([384, 192, 2, 2], f16), [384], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 7, ((T([128, 192, 28, 28], f16), T([128, 384, 28, 28], f16), T([192, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 384, 28, 28], f16), T([128, 384, 28, 28], f16), T([384, 48, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 7, ((T([128, 384, 28, 28], f16), T([128, 192, 28, 28], f16), T([384, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), T([128, 32, 112, 112], f16), T([192, 32, 4, 4], f16), [192], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 3, 224, 224], f16), T([32, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 768, 7, 7], f16, stride=(768, 1, 0, 0)), 49), {}) +Operator: aten.gelu.default +cnt: 14, ((T([128, 384, 28, 28], f16),), {}) +cnt: 4, ((T([128, 1536, 14, 14], f16),), {}) +cnt: 4, ((T([128, 3072, 7, 7], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 4, ((T([128, 3072, 7, 7], f16), T([128, 3072, 7, 7], f16)), {}) +cnt: 4, ((T([128, 1536, 14, 14], f16), T([128, 1536, 14, 14], f16)), {}) +cnt: 14, ((T([128, 384, 28, 28], f16), T([128, 384, 28, 28], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([128], i64),), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 768, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 8, ((T([128, 6, 196, 196], f16), 0.125), {}) +cnt: 8, ((T([128, 6, 49, 49], f16), 0.08838834764831845), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {}) +cnt: 8, ((T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), True, 0.1, 1e-05), {}) +cnt: 9, ((T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {}) +cnt: 10, ((T([128, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 10, ((T([128, 768, 7, 7], f16), T([128, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {}) +cnt: 9, ((T([128, 384, 14, 14], f16), T([128, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([128, 192, 28, 28], f16), T([128, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), True, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 32, 112, 112], f16),), {}) +Operator: aten.stack.default +cnt: 4, (([T([128, 6, 49, 128], f16), T([128, 6, 49, 128], f16, stride=(37632, 6272, 1, 49)), T([128, 6, 49, 128], f16)],), {}) +cnt: 4, (([T([128, 6, 196, 64], f16), T([128, 6, 196, 64], f16, stride=(75264, 12544, 1, 196)), T([128, 6, 196, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16), [0], True), {}) +cnt: 1, ((T([128, 768, 7, 7], f16), [0], True), {}) +cnt: 1, ((T([128, 384, 14, 14], f16), [0], True), {}) +cnt: 1, ((T([128, 192, 28, 28], f16), [0], True), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 32, 112, 112], f16), T([128, 32, 112, 112], f16), 0), {}) +Operator: aten.unbind.int +cnt: 4, ((T([3, 128, 6, 196, 64], f16, stride=(75264, 225792, 12544, 1, 196)),), {}) +cnt: 4, ((T([3, 128, 6, 49, 128], f16, stride=(37632, 112896, 6272, 1, 49)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/vit_base_patch16_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/vit_base_patch16_224_training.txt new file mode 100644 index 000000000..8d2c7bd9a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/vit_base_patch16_224_training.txt @@ -0,0 +1,83 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 12, ((T([64, 12, 197, 197], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([64, 12, 197, 197], f16), T([64, 12, 197, 197], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([64, 12, 197, 64], f16), [768, 197, 64]), {}) +cnt: 12, ((T([64, 12, 64, 197], f16), [768, 64, 197]), {}) +cnt: 12, ((T([768, 197, 197], f16), [64, 12, 197, 197]), {}) +cnt: 12, ((T([768, 197, 64], f16), [64, 12, 197, 64]), {}) +cnt: 12, ((T([64, 197, 12, 64], f16), [64, 197, 768]), {}) +cnt: 12, ((T([64, 197, 3, 12, 64], f16), [64, 197, 2304]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 197, 768], f16), T([1, 197, 768], f16)), {}) +cnt: 48, ((T([64, 197, 768], f16), T([64, 197, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([2304], f16), T([12608, 768], f16), T([768, 2304], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12608, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([12608, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([12608, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([1000], f16), T([64, 768], f16, stride=(151296, 1)), T([768, 1000], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([768, 197, 64], f16), T([768, 64, 197], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16), T([768, 197, 64], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16, stride=(38809, 1, 197)), T([768, 197, 64], f16)), {}) +cnt: 12, ((T([768, 197, 64], f16), T([768, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 12, ((T([768, 64, 197], f16, stride=(12608, 1, 64)), T([768, 197, 197], f16)), {}) +cnt: 12, ((T([768, 197, 197], f16), T([768, 197, 64], f16, stride=(12608, 1, 197))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 768], f16, stride=(0, 768, 1)), T([64, 196, 768], f16, stride=(150528, 1, 196))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), T([768], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 768, 14, 14], f16, stride=(151296, 1, 10752, 768)), T([64, 3, 224, 224], f16), T([768, 3, 16, 16], f16), [768], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.gelu.default +cnt: 12, ((T([64, 197, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([64, 197, 3072], f16), T([64, 197, 3072], f16)), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16), T([1000, 768], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 768], f16, stride=(151296, 1))), {}) +cnt: 12, ((T([12608, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 3072], f16)), {}) +cnt: 12, ((T([12608, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 12608], f16, stride=(1, 3072)), T([12608, 768], f16)), {}) +cnt: 12, ((T([12608, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 768], f16)), {}) +cnt: 12, ((T([12608, 2304], f16), T([2304, 768], f16)), {}) +cnt: 12, ((T([2304, 12608], f16, stride=(1, 2304)), T([12608, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([64, 12, 197, 197], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([64, 197, 768], f16), [768], T([768], f16), T([768], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([64, 197, 768], f16), T([64, 197, 768], f16), [768], T([64, 197, 1], f32), T([64, 197, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 768], f16), [64, 197, 768], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 197, 768], f16), [64, 197, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([64, 12, 197, 64], f16), T([64, 12, 197, 64], f16, stride=(151296, 12608, 1, 197)), T([64, 12, 197, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 24, ((T([12608, 768], f16), [0], True), {}) +cnt: 12, ((T([12608, 3072], f16), [0], True), {}) +cnt: 12, ((T([12608, 2304], f16), [0], True), {}) +cnt: 1, ((T([64, 197, 768], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 768], f16, stride=(151296, 768, 1)), [0], True), {}) +Operator: aten.unbind.int +cnt: 12, ((T([3, 64, 12, 197, 64], f16, stride=(768, 453888, 64, 2304, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/timm_train/volo_d1_224_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/volo_d1_224_training.txt new file mode 100644 index 000000000..2f173f535 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/timm_train/volo_d1_224_training.txt @@ -0,0 +1,216 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([64, 1000], f16), 1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16), 1, f16), {}) +Operator: aten._softmax.default +cnt: 4, ((T([64, 6, 196, 9, 9], f16, stride=(95256, 81, 486, 9, 1)), -1, False), {}) +cnt: 14, ((T([64, 12, 196, 196], f16), -1, False), {}) +cnt: 2, ((T([64, 12, 1, 197], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 2, ((T([64, 12, 1, 197], f16), T([64, 12, 1, 197], f16), -1, f16), {}) +cnt: 14, ((T([64, 12, 196, 196], f16), T([64, 12, 196, 196], f16), -1, f16), {}) +cnt: 4, ((T([64, 6, 196, 9, 9], f16), T([64, 6, 196, 9, 9], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 12, ((T([50176, 192], f16), [64, 28, 28, 192]), {}) +cnt: 4, ((T([12544, 486], f16), [64, 14, 14, 486]), {}) +cnt: 8, ((T([64, 6, 196, 9, 32], f16), [75264, 9, 32]), {}) +cnt: 4, ((T([75264, 9, 32], f16), [64, 6, 196, 9, 32]), {}) +cnt: 8, ((T([64, 6, 32, 9, 196], f16), [64, 1728, 196]), {}) +cnt: 16, ((T([64, 28, 28, 192], f16), [50176, 192]), {}) +cnt: 4, ((T([50176, 576], f16), [64, 28, 28, 576]), {}) +cnt: 28, ((T([12544, 1152], f16), [64, 14, 14, 1152]), {}) +cnt: 42, ((T([64, 12, 196, 32], f16), [768, 196, 32]), {}) +cnt: 14, ((T([64, 12, 32, 196], f16), [768, 32, 196]), {}) +cnt: 14, ((T([768, 196, 196], f16), [64, 12, 196, 196]), {}) +cnt: 14, ((T([768, 196, 32], f16), [64, 12, 196, 32]), {}) +cnt: 14, ((T([64, 196, 12, 32], f16), [64, 14, 14, 384]), {}) +cnt: 28, ((T([12544, 384], f16), [64, 14, 14, 384]), {}) +cnt: 2, ((T([12608, 768], f16), [64, 197, 768]), {}) +cnt: 2, ((T([64, 384], f16), [64, 1, 384]), {}) +cnt: 2, ((T([64, 12, 32, 197], f16), [768, 32, 197]), {}) +cnt: 2, ((T([768, 1, 197], f16), [64, 12, 1, 197]), {}) +cnt: 2, ((T([64, 12, 197, 32], f16), [768, 197, 32]), {}) +cnt: 2, ((T([768, 1, 32], f16), [64, 12, 1, 32]), {}) +cnt: 1, ((T([64, 196, 384], f16), [12544, 384]), {}) +cnt: 1, ((T([12544, 1000], f16), [64, 196, 1000]), {}) +cnt: 2, ((T([64, 197, 2, 12, 32], f16), [64, 197, 768]), {}) +cnt: 1, ((T([64, 14, 14, 384], f16), [12544, 384]), {}) +cnt: 14, ((T([64, 196, 3, 12, 32], f16), [64, 14, 14, 1152]), {}) +cnt: 4, ((T([64, 196, 6, 9, 9], f16), [64, 14, 14, 486]), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([64, 14, 14, 486], f16), T([486], f16)), {}) +cnt: 8, ((T([64, 28, 28, 192], f16), T([192], f16)), {}) +cnt: 16, ((T([64, 28, 28, 192], f16, stride=(150528, 28, 1, 784)), T([64, 28, 28, 192], f16)), {}) +cnt: 4, ((T([64, 28, 28, 576], f16), T([576], f16)), {}) +cnt: 1, ((T([64, 14, 14, 384], f16, stride=(75264, 14, 1, 196)), T([1, 14, 14, 384], f16)), {}) +cnt: 28, ((T([64, 14, 14, 384], f16), T([384], f16)), {}) +cnt: 28, ((T([64, 14, 14, 384], f16, stride=(75264, 14, 1, 196)), T([64, 14, 14, 384], f16)), {}) +cnt: 14, ((T([64, 14, 14, 1152], f16), T([1152], f16)), {}) +cnt: 4, ((T([64, 1, 384], f16, stride=(75648, 384, 1)), T([64, 1, 384], f16)), {}) +cnt: 2, ((T([64, 1, 384], f16), T([64, 1, 384], f16)), {}) +cnt: 1, ((T([64, 196, 1000], f16), T([1000], f16)), {}) +cnt: 1, ((T([64, 1000], f16), T([64, 1000], f16)), {}) +cnt: 7, ((T([64, 197, 384], f16), T([64, 197, 384], f16)), {}) +cnt: 1, ((T([64, 14, 14, 384], f16, stride=(75648, 5376, 384, 1)), T([64, 14, 14, 384], f16)), {}) +cnt: 27, ((T([64, 14, 14, 384], f16), T([64, 14, 14, 384], f16)), {}) +cnt: 4, ((T([64, 28, 28, 192], f16), T([64, 28, 28, 192], f16)), {}) +Operator: aten.add_.Tensor +cnt: 3, ((T([], i64), 1), {}) +Operator: aten.addmm.default +cnt: 2, ((T([384], f16), T([64, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 2, ((T([1152], f16), T([64, 384], f16), T([384, 1152], f16, stride=(1, 384))), {}) +cnt: 2, ((T([384], f16), T([64, 1152], f16), T([1152, 384], f16, stride=(1, 1152))), {}) +cnt: 1, ((T([1000], f16), T([64, 384], f16, stride=(75648, 1)), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.avg_pool2d.default +cnt: 4, ((T([64, 192, 28, 28], f16, stride=(150528, 1, 5376, 192)), [2, 2], [2, 2], [0, 0], True), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 4, ((T([64, 192, 14, 14], f16, stride=(37632, 1, 2688, 192)), T([64, 192, 28, 28], f16, stride=(150528, 1, 5376, 192)), [2, 2], [2, 2], [0, 0], True, True, None), {}) +Operator: aten.bmm.default +cnt: 4, ((T([75264, 9, 9], f16), T([75264, 9, 32], f16)), {}) +cnt: 14, ((T([768, 196, 32], f16), T([768, 32, 196], f16)), {}) +cnt: 14, ((T([768, 196, 196], f16), T([768, 196, 32], f16)), {}) +cnt: 2, ((T([768, 1, 32], f16), T([768, 32, 197], f16)), {}) +cnt: 2, ((T([768, 1, 197], f16), T([768, 197, 32], f16)), {}) +cnt: 2, ((T([768, 197, 1], f16), T([768, 1, 32], f16)), {}) +cnt: 2, ((T([768, 1, 32], f16), T([768, 32, 197], f16, stride=(6304, 1, 32))), {}) +cnt: 2, ((T([768, 32, 1], f16), T([768, 1, 197], f16)), {}) +cnt: 2, ((T([768, 1, 197], f16), T([768, 197, 32], f16, stride=(6304, 1, 197))), {}) +cnt: 14, ((T([768, 196, 196], f16, stride=(38416, 1, 196)), T([768, 196, 32], f16)), {}) +cnt: 14, ((T([768, 196, 32], f16), T([768, 32, 196], f16, stride=(6272, 1, 32))), {}) +cnt: 14, ((T([768, 32, 196], f16, stride=(6272, 1, 32)), T([768, 196, 196], f16)), {}) +cnt: 14, ((T([768, 196, 196], f16), T([768, 196, 32], f16, stride=(6272, 1, 196))), {}) +cnt: 4, ((T([75264, 9, 9], f16, stride=(81, 1, 9)), T([75264, 9, 32], f16)), {}) +cnt: 4, ((T([75264, 9, 32], f16), T([75264, 32, 9], f16, stride=(288, 1, 32))), {}) +Operator: aten.cat.default +cnt: 1, (([T([64, 1, 384], f16, stride=(0, 384, 1)), T([64, 196, 384], f16, stride=(75264, 1, 196))], 1), {}) +cnt: 2, (([T([64, 1, 384], f16), T([64, 196, 384], f16, stride=(75648, 384, 1))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.col2im.default +cnt: 4, ((T([64, 1728, 196], f16), [28, 28], [3, 3], [1, 1], [1, 1], [2, 2]), {}) +Operator: aten.col2im_backward.default +cnt: 4, ((T([64, 192, 28, 28], f16, stride=(150528, 1, 5376, 192)), [3, 3], [1, 1], [1, 1], [2, 2]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([192, 64, 4, 4], f16), T([192], f16), [4, 4], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([384, 192, 2, 2], f16), T([384], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([64, 384, 14, 14], f16, stride=(75264, 1, 5376, 384)), T([64, 192, 28, 28], f16), T([384, 192, 2, 2], f16), [384], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 192, 28, 28], f16), T([64, 64, 112, 112], f16), T([192, 64, 4, 4], f16), [192], [4, 4], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.gelu.default +cnt: 4, ((T([64, 28, 28, 576], f16),), {}) +cnt: 14, ((T([64, 14, 14, 1152], f16),), {}) +cnt: 2, ((T([64, 1, 1152], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 2, ((T([64, 1, 1152], f16), T([64, 1, 1152], f16)), {}) +cnt: 14, ((T([64, 14, 14, 1152], f16), T([64, 14, 14, 1152], f16)), {}) +cnt: 4, ((T([64, 28, 28, 576], f16), T([64, 28, 28, 576], f16)), {}) +Operator: aten.im2col.default +cnt: 4, ((T([64, 192, 28, 28], f16, stride=(150528, 1, 5376, 192)), [3, 3], [1, 1], [1, 1], [2, 2]), {}) +Operator: aten.im2col_backward.default +cnt: 4, ((T([64, 1728, 196], f16), [28, 28], [3, 3], [1, 1], [1, 1], [2, 2]), {}) +Operator: aten.lift_fresh_copy.default +cnt: 1, ((T([64], i64),), {}) +Operator: aten.max.dim +cnt: 1, ((T([64, 196, 1000], f16), 1), {}) +Operator: aten.mm.default +cnt: 8, ((T([50176, 192], f16), T([192, 192], f16, stride=(1, 192))), {}) +cnt: 4, ((T([12544, 192], f16), T([192, 486], f16, stride=(1, 192))), {}) +cnt: 4, ((T([50176, 192], f16), T([192, 576], f16, stride=(1, 192))), {}) +cnt: 4, ((T([50176, 576], f16), T([576, 192], f16, stride=(1, 576))), {}) +cnt: 28, ((T([12544, 384], f16), T([384, 1152], f16, stride=(1, 384))), {}) +cnt: 14, ((T([12544, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 14, ((T([12544, 1152], f16), T([1152, 384], f16, stride=(1, 1152))), {}) +cnt: 2, ((T([12608, 384], f16), T([384, 768], f16, stride=(1, 384))), {}) +cnt: 2, ((T([64, 384], f16, stride=(75648, 1)), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 1, ((T([12544, 384], f16), T([384, 1000], f16, stride=(1, 384))), {}) +cnt: 1, ((T([1000, 12544], f16, stride=(1, 1000)), T([12544, 384], f16)), {}) +cnt: 1, ((T([12544, 1000], f16), T([1000, 384], f16)), {}) +cnt: 1, ((T([64, 1000], f16), T([1000, 384], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(1, 1000)), T([64, 384], f16, stride=(75648, 1))), {}) +cnt: 2, ((T([64, 384], f16, stride=(75648, 1)), T([384, 1152], f16)), {}) +cnt: 2, ((T([384, 64], f16, stride=(1, 75648)), T([64, 1152], f16)), {}) +cnt: 2, ((T([64, 1152], f16), T([1152, 384], f16)), {}) +cnt: 2, ((T([1152, 64], f16, stride=(1, 1152)), T([64, 384], f16)), {}) +cnt: 4, ((T([64, 384], f16), T([384, 384], f16)), {}) +cnt: 2, ((T([384, 64], f16, stride=(1, 384)), T([64, 384], f16)), {}) +cnt: 2, ((T([384, 64], f16, stride=(1, 384)), T([64, 384], f16, stride=(75648, 1))), {}) +cnt: 2, ((T([768, 12608], f16, stride=(1, 768)), T([12608, 384], f16)), {}) +cnt: 2, ((T([12608, 768], f16), T([768, 384], f16)), {}) +cnt: 14, ((T([384, 12544], f16, stride=(1, 384)), T([12544, 1152], f16)), {}) +cnt: 14, ((T([12544, 384], f16), T([384, 1152], f16)), {}) +cnt: 28, ((T([1152, 12544], f16, stride=(1, 1152)), T([12544, 384], f16)), {}) +cnt: 28, ((T([12544, 1152], f16), T([1152, 384], f16)), {}) +cnt: 14, ((T([384, 12544], f16, stride=(1, 384)), T([12544, 384], f16)), {}) +cnt: 14, ((T([12544, 384], f16), T([384, 384], f16)), {}) +cnt: 4, ((T([192, 50176], f16, stride=(1, 192)), T([50176, 576], f16)), {}) +cnt: 4, ((T([50176, 192], f16), T([192, 576], f16)), {}) +cnt: 4, ((T([576, 50176], f16, stride=(1, 576)), T([50176, 192], f16)), {}) +cnt: 4, ((T([50176, 576], f16), T([576, 192], f16)), {}) +cnt: 8, ((T([192, 50176], f16, stride=(1, 192)), T([50176, 192], f16)), {}) +cnt: 8, ((T([50176, 192], f16), T([192, 192], f16)), {}) +cnt: 4, ((T([486, 12544], f16, stride=(1, 486)), T([12544, 192], f16)), {}) +cnt: 4, ((T([12544, 486], f16), T([486, 192], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([64, 6, 196, 9, 9], f16, stride=(95256, 81, 486, 9, 1)), 0.1767766952966369), {}) +cnt: 28, ((T([64, 12, 196, 196], f16), 0.1767766952966369), {}) +cnt: 4, ((T([64, 12, 1, 32], f16), 0.1767766952966369), {}) +cnt: 2, ((T([64, 1000], f16), 0.5), {}) +cnt: 4, ((T([64, 6, 196, 9, 9], f16), 0.1767766952966369), {}) +Operator: aten.native_batch_norm.default +cnt: 3, ((T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {}) +Operator: aten.native_layer_norm.default +cnt: 8, ((T([64, 28, 28, 192], f16, stride=(150528, 28, 1, 784)), [192], T([192], f16), T([192], f16), 1e-05), {}) +cnt: 28, ((T([64, 14, 14, 384], f16, stride=(75264, 14, 1, 196)), [384], T([384], f16), T([384], f16), 1e-05), {}) +cnt: 3, ((T([64, 197, 384], f16), [384], T([384], f16), T([384], f16), 1e-05), {}) +cnt: 2, ((T([64, 1, 384], f16), [384], T([384], f16), T([384], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 3, ((T([64, 197, 384], f16), T([64, 197, 384], f16), [384], T([64, 197, 1], f32), T([64, 197, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 2, ((T([64, 1, 384], f16), T([64, 1, 384], f16), [384], T([64, 1, 1], f32), T([64, 1, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 28, ((T([64, 14, 14, 384], f16), T([64, 14, 14, 384], f16, stride=(75264, 14, 1, 196)), [384], T([64, 14, 14, 1], f32), T([64, 14, 14, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +cnt: 8, ((T([64, 28, 28, 192], f16), T([64, 28, 28, 192], f16, stride=(150528, 28, 1, 784)), [192], T([64, 28, 28, 1], f32), T([64, 28, 28, 1], f32), T([192], f16), T([192], f16), [True, True, True]), {}) +Operator: aten.nll_loss_backward.default +cnt: 1, ((T([], f16), T([64, 1000], f16), T([64], i64), None, 1, -100, T([], f16)), {}) +Operator: aten.nll_loss_forward.default +cnt: 1, ((T([64, 1000], f16), T([64], i64), None, 1, -100), {}) +Operator: aten.relu_.default +cnt: 3, ((T([64, 64, 112, 112], f16),), {}) +Operator: aten.scatter.src +cnt: 1, ((T([64, 196, 1000], f16), 1, T([64, 1, 1000], i64), T([64, 1, 1000], f16)), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 384], f16), [64, 197, 384], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 196, 384], f16), [64, 197, 384], 1, 1, 9223372036854775807, 1), {}) +cnt: 8, ((T([64, 197, 384], f16), [64, 197, 384], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 196, 384], f16, stride=(75648, 384, 1)), [64, 197, 384], 1, 1, 9223372036854775807, 1), {}) +cnt: 2, ((T([64, 1, 384], f16), [64, 1, 384], 2, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([64, 1, 384], f16), [64, 197, 384], 1, 0, 1, 1), {}) +Operator: aten.stack.default +cnt: 2, (([T([64, 12, 197, 32], f16, stride=(75648, 6304, 1, 197)), T([64, 12, 197, 32], f16)],), {}) +cnt: 14, (([T([64, 12, 196, 32], f16), T([64, 12, 196, 32], f16, stride=(75264, 6272, 1, 196)), T([64, 12, 196, 32], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 196, 1000], f16), [0, 1], True), {}) +cnt: 1, ((T([64, 1000], f16), [0], True), {}) +cnt: 2, ((T([64, 384], f16, stride=(75648, 1)), [0], True), {}) +cnt: 2, ((T([64, 1152], f16), [0], True), {}) +cnt: 2, ((T([64, 384], f16), [0], True), {}) +cnt: 1, ((T([64, 1, 384], f16, stride=(75648, 384, 1)), [0], True), {}) +cnt: 1, ((T([64, 14, 14, 384], f16, stride=(75648, 5376, 384, 1)), [0, 1, 2], True), {}) +cnt: 14, ((T([64, 14, 14, 1152], f16), [0, 1, 2], True), {}) +cnt: 27, ((T([64, 14, 14, 384], f16), [0, 1, 2], True), {}) +cnt: 1, ((T([64, 14, 14, 384], f16), [0], True), {}) +cnt: 8, ((T([64, 28, 28, 192], f16, stride=(150528, 28, 1, 784)), [0, 1, 2], True), {}) +cnt: 4, ((T([64, 28, 28, 576], f16), [0, 1, 2], True), {}) +cnt: 4, ((T([64, 14, 14, 486], f16), [0, 1, 2], True), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([64, 64, 112, 112], f16), T([64, 64, 112, 112], f16), 0), {}) +Operator: aten.unbind.int +cnt: 14, ((T([3, 64, 12, 196, 32], f16, stride=(384, 225792, 32, 1152, 1)),), {}) +cnt: 2, ((T([2, 64, 12, 197, 32], f16, stride=(384, 151296, 32, 768, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/BERT_pytorch_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/BERT_pytorch_training.txt new file mode 100644 index 000000000..6c1b78ab6 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/BERT_pytorch_training.txt @@ -0,0 +1,94 @@ +Operator: aten._softmax.default +cnt: 12, ((T([16, 12, 128, 128], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([16, 12, 128, 128], f16), T([16, 12, 128, 128], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([16, 12, 128, 64], f16), [192, 128, 64]), {}) +cnt: 12, ((T([16, 12, 64, 128], f16), [192, 64, 128]), {}) +cnt: 12, ((T([192, 128, 128], f16), [16, 12, 128, 128]), {}) +cnt: 12, ((T([192, 128, 64], f16), [16, 12, 128, 64]), {}) +cnt: 24, ((T([16, 128, 12, 64], f16), [16, 128, 768]), {}) +cnt: 12, ((T([16, 128, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([16, 128, 768], f16), T([1, 128, 768], f16)), {}) +cnt: 120, ((T([16, 128, 768], f16), T([16, 128, 768], f16)), {}) +cnt: 24, ((T([16, 128, 1], f16), 1e-06), {}) +cnt: 24, ((T([16, 128, 768], f16), T([768], f16)), {}) +cnt: 1, ((T([16, 128, 768], f16, stride=(0, 0, 0)), T([16, 128, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([192, 128, 64], f16), T([192, 64, 128], f16)), {}) +cnt: 12, ((T([192, 128, 128], f16), T([192, 128, 64], f16)), {}) +cnt: 12, ((T([192, 128, 128], f16, stride=(16384, 1, 128)), T([192, 128, 64], f16)), {}) +cnt: 12, ((T([192, 128, 64], f16), T([192, 64, 128], f16, stride=(8192, 1, 64))), {}) +cnt: 12, ((T([192, 64, 128], f16, stride=(8192, 1, 64)), T([192, 128, 128], f16)), {}) +cnt: 12, ((T([192, 128, 128], f16), T([192, 128, 64], f16, stride=(8192, 1, 128))), {}) +Operator: aten.clone.default +cnt: 2, ((T([16, 128], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([16, 128], i64), T([16, 128], i64)), {}) +Operator: aten.div.Scalar +cnt: 24, ((T([16, 128, 768], f16, stride=(128, 1, 0)), 768), {}) +Operator: aten.div.Tensor +cnt: 96, ((T([16, 128, 768], f16), T([16, 128, 1], f16)), {}) +cnt: 24, ((T([16, 12, 128, 128], f16), 8.0), {}) +cnt: 2, ((T([], f16), 1572864), {}) +cnt: 24, ((T([16, 128, 1], f16), T([16, 128, 1], f16)), {}) +Operator: aten.embedding.default +cnt: 1, ((T([20005, 768], f16), T([16, 128], i64), 0), {}) +cnt: 1, ((T([3, 768], f16), T([16, 128], i64), 0), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([16, 128, 768], f16), T([16, 128], i64), 3, 0, False), {}) +cnt: 1, ((T([16, 128, 768], f16), T([16, 128], i64), 20005, 0, False), {}) +Operator: aten.eq.Scalar +cnt: 12, ((T([16, 1, 128, 128], b8), 0), {}) +cnt: 24, ((T([16, 128, 1], f16), 0), {}) +Operator: aten.gelu.default +cnt: 12, ((T([16, 128, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([16, 128, 3072], f16), T([16, 128, 3072], f16)), {}) +Operator: aten.gt.Scalar +cnt: 1, ((T([16, 128], i64), 0), {}) +Operator: aten.masked_fill.Scalar +cnt: 12, ((T([16, 12, 128, 128], f16), T([16, 1, 128, 128], b8), -65504.0), {}) +cnt: 12, ((T([16, 12, 128, 128], f16), T([16, 1, 128, 128], b8), 0), {}) +Operator: aten.masked_fill_.Scalar +cnt: 24, ((T([16, 128, 1], f16), T([16, 128, 1], b8), 0), {}) +Operator: aten.mean.dim +cnt: 48, ((T([16, 128, 768], f16), [-1], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 768], f16, stride=(0, 0)), T([768, 3072], f16)), {}) +cnt: 1, ((T([768, 2048], f16, stride=(0, 0)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 48, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 11, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 11, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +Operator: aten.mul.Scalar +cnt: 24, ((T([16, 128, 1], f16), 2), {}) +cnt: 24, ((T([16, 128, 1], f16), 0.002607561929595828), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([768], f16), T([16, 128, 768], f16)), {}) +cnt: 48, ((T([16, 128, 768], f16), T([16, 128, 768], f16)), {}) +cnt: 24, ((T([16, 128, 768], f16), T([768], f16)), {}) +cnt: 24, ((T([16, 128, 1], f16), T([16, 128, 768], f16)), {}) +Operator: aten.neg.default +cnt: 48, ((T([16, 128, 768], f16),), {}) +Operator: aten.repeat.default +cnt: 1, ((T([16, 1, 128], b8), [1, 128, 1]), {}) +Operator: aten.std.correction +cnt: 24, ((T([16, 128, 768], f16), [-1]), {'correction': 1, 'keepdim': True}) +Operator: aten.sub.Tensor +cnt: 48, ((T([16, 128, 768], f16), T([16, 128, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 768], f16, stride=(0, 0)), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 48, ((T([16, 128, 768], f16), [0, 1], True), {}) +cnt: 48, ((T([16, 128, 768], f16), [2], True), {}) +cnt: 59, ((T([2048, 768], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([16, 128, 768], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Background_Matting_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Background_Matting_training.txt new file mode 100644 index 000000000..fbc1f47d5 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Background_Matting_training.txt @@ -0,0 +1,119 @@ +Operator: aten.add.Tensor +cnt: 27, ((T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16)), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(7340032, 16384, 128, 1)), T([3, 256, 128, 128], f16, stride=(8388608, 16384, 128, 1))), {}) +cnt: 2, ((T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16, stride=(8388608, 16384, 128, 1))), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(8388608, 16384, 128, 1)), T([3, 256, 128, 128], f16, stride=(8388608, 16384, 128, 1))), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(16777216, 65536, 256, 1)), T([3, 128, 256, 256], f16)), {}) +Operator: aten.cat.default +cnt: 2, (([T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16)], 1), {}) +cnt: 1, (([T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256))], 1), {}) +cnt: 1, (([T([3, 64, 128, 128], f16), T([3, 64, 128, 128], f16), T([3, 64, 128, 128], f16)], 1), {}) +cnt: 1, (([T([3, 256, 128, 128], f16), T([3, 192, 128, 128], f16)], 1), {}) +cnt: 1, (([T([3, 128, 256, 256], f16), T([3, 128, 256, 256], f16)], 1), {}) +Operator: aten.clone.default +cnt: 2, ((T([3, 3, 512, 512], f16),), {}) +cnt: 1, ((T([3, 1, 512, 512], f16),), {}) +cnt: 1, ((T([3, 4, 512, 512], f16),), {}) +Operator: aten.convolution.default +cnt: 2, ((T([3, 3, 518, 518], f16), T([64, 3, 7, 7], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([3, 64, 512, 512], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([3, 128, 256, 256], f16), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 1, 518, 518], f16), T([64, 1, 7, 7], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 4, 518, 518], f16), T([64, 4, 7, 7], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([3, 512, 128, 128], f16), T([64, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 448, 128, 128], f16), T([256, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 26, ((T([3, 256, 130, 130], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([3, 256, 256, 256], f16), T([128, 256, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 128, 512, 512], f16), T([64, 128, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 64, 518, 518], f16), T([1, 64, 7, 7], f16), T([1], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 256, 512, 512], f16), T([64, 256, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([3, 64, 518, 518], f16), T([3, 64, 7, 7], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([3, 3, 512, 512], f16, stride=(0, 0, 0, 0)), T([3, 64, 518, 518], f16), T([3, 64, 7, 7], f16), [3], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 64, 512, 512], f16), T([3, 256, 512, 512], f16), T([64, 256, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([3, 128, 256, 256], f16), T([3, 256, 256, 256], f16), T([128, 256, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 26, ((T([3, 256, 128, 128], f16), T([3, 256, 130, 130], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 1, 512, 512], f16), T([3, 64, 518, 518], f16), T([1, 64, 7, 7], f16), [1], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 64, 512, 512], f16), T([3, 128, 512, 512], f16), T([64, 128, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 256, 128, 128], f16), T([3, 448, 128, 128], f16), T([256, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([3, 64, 128, 128], f16), T([3, 512, 128, 128], f16), T([64, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)), T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([256, 128, 3, 3], f16), [256], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([128, 64, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([3, 1, 518, 518], f16), T([64, 1, 7, 7], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +cnt: 2, ((T([3, 256, 128, 128], f16), T([3, 128, 256, 256], f16), T([256, 128, 3, 3], f16), [256], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([3, 128, 256, 256], f16), T([3, 64, 512, 512], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([3, 64, 512, 512], f16), T([3, 3, 518, 518], f16), T([64, 3, 7, 7], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 2, ((T([3, 3, 512, 512], f16), T([3, 3, 512, 512], f16)), {}) +cnt: 1, ((T([3, 1, 512, 512], f16), T([3, 1, 512, 512], f16)), {}) +cnt: 1, ((T([3, 4, 512, 512], f16), T([3, 4, 512, 512], f16)), {}) +cnt: 1, ((T([256, 128, 3, 3], f16), T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128))), {}) +cnt: 1, ((T([128, 64, 3, 3], f16), T([128, 64, 3, 3], f16, stride=(576, 1, 192, 64))), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 786432), {}) +cnt: 2, ((T([], f16), 2359296), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.native_batch_norm.default +cnt: 5, ((T([3, 64, 512, 512], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([3, 128, 256, 256], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 30, ((T([3, 256, 128, 128], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([3, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([3, 64, 512, 512], f16), T([3, 64, 512, 512], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([3, 128, 256, 256], f16), T([3, 128, 256, 256], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 29, ((T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([3, 64, 128, 128], f16), T([3, 64, 128, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)), T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([256, 128, 3, 3], f16, stride=(1152, 1, 384, 128)), [256, 128, 3, 3], [1152, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([128, 64, 3, 3], f16, stride=(576, 1, 192, 64)), [128, 64, 3, 3], [576, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.reflection_pad2d.default +cnt: 2, ((T([3, 3, 512, 512], f16), [3, 3, 3, 3]), {}) +cnt: 1, ((T([3, 1, 512, 512], f16), [3, 3, 3, 3]), {}) +cnt: 1, ((T([3, 4, 512, 512], f16), [3, 3, 3, 3]), {}) +cnt: 26, ((T([3, 256, 128, 128], f16), [1, 1, 1, 1]), {}) +cnt: 2, ((T([3, 64, 512, 512], f16), [3, 3, 3, 3]), {}) +Operator: aten.reflection_pad2d_backward.default +cnt: 2, ((T([3, 64, 518, 518], f16), T([3, 64, 512, 512], f16), [3, 3, 3, 3]), {}) +cnt: 26, ((T([3, 256, 130, 130], f16), T([3, 256, 128, 128], f16), [1, 1, 1, 1]), {}) +Operator: aten.relu_.default +cnt: 5, ((T([3, 64, 512, 512], f16),), {}) +cnt: 5, ((T([3, 128, 256, 256], f16),), {}) +cnt: 17, ((T([3, 256, 128, 128], f16),), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)),), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)),), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)),), {}) +cnt: 3, ((T([3, 64, 128, 128], f16),), {}) +Operator: aten.sum.default +cnt: 1, ((T([3, 1, 512, 512], f16),), {}) +cnt: 1, ((T([3, 3, 512, 512], f16),), {}) +Operator: aten.tanh.default +cnt: 1, ((T([3, 1, 512, 512], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([3, 1, 512, 512], f16, stride=(0, 0, 0, 0)), T([3, 1, 512, 512], f16)), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([3, 64, 512, 512], f16), T([3, 64, 512, 512], f16), 0), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(16777216, 65536, 256, 1)), T([3, 128, 256, 256], f16), 0), {}) +cnt: 16, ((T([3, 256, 128, 128], f16), T([3, 256, 128, 128], f16), 0), {}) +cnt: 3, ((T([3, 128, 256, 256], f16), T([3, 128, 256, 256], f16), 0), {}) +cnt: 3, ((T([3, 64, 128, 128], f16, stride=(7340032, 16384, 128, 1)), T([3, 64, 128, 128], f16), 0), {}) +cnt: 1, ((T([3, 256, 128, 128], f16, stride=(8388608, 16384, 128, 1)), T([3, 256, 128, 128], f16, stride=(4194304, 1, 32768, 256)), 0), {}) +cnt: 1, ((T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), T([3, 128, 256, 256], f16, stride=(8388608, 1, 32768, 128)), 0), {}) +cnt: 1, ((T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), T([3, 64, 512, 512], f16, stride=(16777216, 1, 32768, 64)), 0), {}) +Operator: aten.upsample_bilinear2d.vec +cnt: 2, ((T([3, 256, 128, 128], f16), None, True, [2.0, 2.0]), {}) +cnt: 1, ((T([3, 128, 256, 256], f16), None, True, [2.0, 2.0]), {}) +cnt: 1, ((T([3, 256, 256, 256], f16), None, True, [2.0, 2.0]), {}) +Operator: aten.upsample_bilinear2d_backward.vec +cnt: 1, ((T([3, 256, 512, 512], f16), None, [3, 256, 256, 256], True, [2.0, 2.0]), {}) +cnt: 2, ((T([3, 256, 256, 256], f16), None, [3, 256, 128, 128], True, [2.0, 2.0]), {}) +cnt: 1, ((T([3, 128, 512, 512], f16), None, [3, 128, 256, 256], True, [2.0, 2.0]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/LearningToPaint_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/LearningToPaint_training.txt new file mode 100644 index 000000000..272e9fb33 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/LearningToPaint_training.txt @@ -0,0 +1,86 @@ +Operator: aten.add.Tensor +cnt: 1, ((T([96, 512, 4, 4], f16), T([96, 512, 4, 4], f16)), {}) +cnt: 2, ((T([96, 256, 8, 8], f16), T([96, 256, 8, 8], f16)), {}) +cnt: 2, ((T([96, 128, 16, 16], f16), T([96, 128, 16, 16], f16)), {}) +cnt: 2, ((T([96, 64, 32, 32], f16), T([96, 64, 32, 32], f16)), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([96, 64, 64, 64], f16)), {}) +Operator: aten.add_.Tensor +cnt: 2, ((T([96, 64, 32, 32], f16), T([96, 64, 32, 32], f16)), {}) +cnt: 2, ((T([96, 128, 16, 16], f16), T([96, 128, 16, 16], f16)), {}) +cnt: 2, ((T([96, 256, 8, 8], f16), T([96, 256, 8, 8], f16)), {}) +cnt: 2, ((T([96, 512, 4, 4], f16), T([96, 512, 4, 4], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([65], f16), T([96, 512], f16), T([512, 65], f16, stride=(1, 512))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([96, 512, 4, 4], f16), [4, 4]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([96, 512, 1, 1], f16), T([96, 512, 4, 4], f16), [4, 4], [], [0, 0], False, True, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([96, 9, 128, 128], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([96, 9, 128, 128], f16), T([64, 9, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([64, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 64, 32, 32], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([64, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 64, 32, 32], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 128, 16, 16], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 64, 32, 32], f16), T([128, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 128, 16, 16], f16), T([256, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 256, 8, 8], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 128, 16, 16], f16), T([256, 128, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 256, 8, 8], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 512, 4, 4], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 256, 8, 8], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([96, 512, 4, 4], f16), T([96, 512, 4, 4], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 512, 4, 4], f16), T([96, 256, 8, 8], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 512, 4, 4], f16), T([96, 256, 8, 8], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([96, 256, 8, 8], f16), T([96, 256, 8, 8], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 256, 8, 8], f16), T([96, 128, 16, 16], f16), T([256, 128, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 256, 8, 8], f16), T([96, 128, 16, 16], f16), T([256, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([96, 128, 16, 16], f16), T([96, 128, 16, 16], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 128, 16, 16], f16), T([96, 64, 32, 32], f16), T([128, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 128, 16, 16], f16), T([96, 64, 32, 32], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([96, 64, 32, 32], f16), T([96, 64, 32, 32], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 64, 32, 32], f16), T([96, 64, 64, 64], f16), T([64, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 64, 32, 32], f16), T([96, 64, 64, 64], f16), T([64, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([96, 9, 128, 128], f16), T([64, 9, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([96, 9, 128, 128], f16), T([96, 9, 128, 128], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 6240), {}) +Operator: aten.mm.default +cnt: 1, ((T([96, 65], f16), T([65, 512], f16)), {}) +cnt: 1, ((T([65, 96], f16, stride=(1, 65)), T([96, 512], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([96, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 64, 32, 32], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 256, 8, 8], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 512, 4, 4], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 5, ((T([96, 512, 4, 4], f16), T([96, 512, 4, 4], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([96, 256, 8, 8], f16), T([96, 256, 8, 8], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([96, 128, 16, 16], f16), T([96, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([96, 64, 32, 32], f16), T([96, 64, 32, 32], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([96, 64, 64, 64], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu.default +cnt: 1, ((T([96, 64, 64, 64], f16),), {}) +cnt: 4, ((T([96, 64, 32, 32], f16),), {}) +cnt: 4, ((T([96, 128, 16, 16], f16),), {}) +cnt: 4, ((T([96, 256, 8, 8], f16),), {}) +cnt: 4, ((T([96, 512, 4, 4], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([96, 65], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([96, 65], f16, stride=(0, 0)), T([96, 65], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([96, 65], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([96, 65], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([96, 512, 4, 4], f16), T([96, 512, 4, 4], f16), 0), {}) +cnt: 4, ((T([96, 256, 8, 8], f16), T([96, 256, 8, 8], f16), 0), {}) +cnt: 4, ((T([96, 128, 16, 16], f16), T([96, 128, 16, 16], f16), 0), {}) +cnt: 4, ((T([96, 64, 32, 32], f16), T([96, 64, 32, 32], f16), 0), {}) +cnt: 1, ((T([96, 64, 64, 64], f16), T([96, 64, 64, 64], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Super_SloMo_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Super_SloMo_training.txt new file mode 100644 index 000000000..ff432c07b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/Super_SloMo_training.txt @@ -0,0 +1,255 @@ +Operator: aten._to_copy.default +cnt: 12, ((T([6, 352, 352], i64, stride=(0, 352, 1)),), {'dtype': f16}) +Operator: aten.abs.default +cnt: 5, ((T([6, 3, 352, 352], f16),), {}) +cnt: 2, ((T([6, 2, 352, 351], f16),), {}) +cnt: 2, ((T([6, 2, 351, 352], f16),), {}) +Operator: aten.add.Tensor +cnt: 22, ((T([6, 2, 352, 352], f16), T([6, 2, 352, 352], f16)), {}) +cnt: 8, ((T([6, 352, 352], f16), T([6, 352, 352], f16, stride=(247808, 352, 1))), {}) +cnt: 2, ((T([6, 2, 352, 352], f16, stride=(619520, 123904, 352, 1)), T([6, 2, 352, 352], f16)), {}) +cnt: 2, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)), {}) +cnt: 4, ((T([6, 1, 352, 352], f16), T([6, 1, 352, 352], f16)), {}) +cnt: 10, ((T([], f16), T([], f16)), {}) +cnt: 4, ((T([6, 352, 352], f16), T([6, 352, 352], f16, stride=(495616, 352, 1))), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 1, ((T([6, 3, 352, 352], f16, stride=(0, 0, 0, 0)), T([6, 3, 352, 352], f16)), {}) +cnt: 2, ((T([6, 5, 352, 352], f16), T([6, 5, 352, 352], f16)), {}) +cnt: 2, ((T([6, 512, 22, 22], f16, stride=(495616, 484, 22, 1)), T([6, 512, 22, 22], f16)), {}) +cnt: 2, ((T([6, 256, 44, 44], f16, stride=(991232, 1936, 44, 1)), T([6, 256, 44, 44], f16)), {}) +cnt: 2, ((T([6, 128, 88, 88], f16, stride=(1982464, 7744, 88, 1)), T([6, 128, 88, 88], f16)), {}) +cnt: 2, ((T([6, 64, 176, 176], f16, stride=(3964928, 30976, 176, 1)), T([6, 64, 176, 176], f16)), {}) +cnt: 2, ((T([6, 32, 352, 352], f16, stride=(7929856, 123904, 352, 1)), T([6, 32, 352, 352], f16)), {}) +cnt: 4, ((T([6, 2, 352, 352], f16), T([6, 2, 352, 352], f16, stride=(2478080, 123904, 352, 1))), {}) +cnt: 2, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16, stride=(2478080, 123904, 352, 1))), {}) +cnt: 1, ((T([6, 4, 352, 352], f16), T([6, 4, 352, 352], f16)), {}) +Operator: aten.avg_pool2d.default +cnt: 2, ((T([6, 32, 352, 352], f16), [2, 2]), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), [2, 2]), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), [2, 2]), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), [2, 2]), {}) +cnt: 2, ((T([6, 512, 22, 22], f16), [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 2, ((T([6, 512, 11, 11], f16), T([6, 512, 22, 22], f16), [2, 2], [], [0, 0], False, True, None), {}) +cnt: 2, ((T([6, 256, 22, 22], f16), T([6, 256, 44, 44], f16), [2, 2], [], [0, 0], False, True, None), {}) +cnt: 2, ((T([6, 128, 44, 44], f16), T([6, 128, 88, 88], f16), [2, 2], [], [0, 0], False, True, None), {}) +cnt: 2, ((T([6, 64, 88, 88], f16), T([6, 64, 176, 176], f16), [2, 2], [], [0, 0], False, True, None), {}) +cnt: 2, ((T([6, 32, 176, 176], f16), T([6, 32, 352, 352], f16), [2, 2], [], [0, 0], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)], 1), {}) +cnt: 2, (([T([6, 512, 22, 22], f16), T([6, 512, 22, 22], f16)], 1), {}) +cnt: 2, (([T([6, 256, 44, 44], f16), T([6, 256, 44, 44], f16)], 1), {}) +cnt: 2, (([T([6, 128, 88, 88], f16), T([6, 128, 88, 88], f16)], 1), {}) +cnt: 2, (([T([6, 64, 176, 176], f16), T([6, 64, 176, 176], f16)], 1), {}) +cnt: 2, (([T([6, 32, 352, 352], f16), T([6, 32, 352, 352], f16)], 1), {}) +cnt: 1, (([T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16), T([6, 2, 352, 352], f16, stride=(495616, 123904, 352, 1)), T([6, 2, 352, 352], f16, stride=(495616, 123904, 352, 1)), T([6, 2, 352, 352], f16), T([6, 2, 352, 352], f16), T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([6], i64),), {}) +cnt: 3, ((T([6, 3, 352, 352], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([6, 6, 352, 352], f16), T([32, 6, 7, 7], f16), T([32], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 32, 352, 352], f16), T([32, 32, 7, 7], f16), T([32], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 32, 176, 176], f16), T([64, 32, 5, 5], f16), T([64], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), T([64, 64, 5, 5], f16), T([64], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 64, 88, 88], f16), T([128, 64, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 128, 44, 44], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 256, 22, 22], f16), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 512, 22, 22], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 512, 11, 11], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 1024, 22, 22], f16), T([512, 1024, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 512, 44, 44], f16), T([256, 512, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 256, 88, 88], f16), T([128, 256, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 128, 176, 176], f16), T([64, 128, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 64, 352, 352], f16), T([32, 64, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([6, 32, 352, 352], f16), T([4, 32, 3, 3], f16), T([4], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([6, 20, 352, 352], f16), T([32, 20, 7, 7], f16), T([32], f16), [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([6, 32, 352, 352], f16), T([5, 32, 3, 3], f16), T([5], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 3, 352, 352], f16), T([64, 3, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 64, 352, 352], f16), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), T([128, 64, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 128, 176, 176], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 256, 88, 88], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([6, 512, 44, 44], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 2, ((T([6, 512, 44, 44], f16), T([6, 512, 44, 44], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 512, 44, 44], f16), T([6, 256, 44, 44], f16), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 2, ((T([6, 256, 88, 88], f16), T([6, 256, 88, 88], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 256, 88, 88], f16), T([6, 128, 88, 88], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 128, 176, 176], f16), T([6, 128, 176, 176], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 128, 176, 176], f16), T([6, 64, 176, 176], f16), T([128, 64, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 64, 352, 352], f16), T([6, 64, 352, 352], f16), T([64, 64, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 64, 352, 352], f16), T([6, 3, 352, 352], f16), T([64, 3, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, False, False]), {}) +cnt: 1, ((T([6, 5, 352, 352], f16), T([6, 32, 352, 352], f16), T([5, 32, 3, 3], f16), [5], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 32, 352, 352], f16), T([6, 64, 352, 352], f16), T([32, 64, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 64, 176, 176], f16), T([6, 128, 176, 176], f16), T([64, 128, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 128, 88, 88], f16), T([6, 256, 88, 88], f16), T([128, 256, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 256, 44, 44], f16), T([6, 512, 44, 44], f16), T([256, 512, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 512, 22, 22], f16), T([6, 1024, 22, 22], f16), T([512, 1024, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 512, 22, 22], f16), T([6, 512, 22, 22], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([6, 512, 11, 11], f16), T([6, 512, 11, 11], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 512, 22, 22], f16), T([6, 256, 22, 22], f16), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), T([6, 256, 44, 44], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), T([6, 128, 44, 44], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), T([6, 128, 88, 88], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), T([6, 64, 88, 88], f16), T([128, 64, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), T([6, 64, 176, 176], f16), T([64, 64, 5, 5], f16), [64], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), T([6, 32, 176, 176], f16), T([64, 32, 5, 5], f16), [64], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([6, 32, 352, 352], f16), T([6, 32, 352, 352], f16), T([32, 32, 7, 7], f16), [32], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([6, 32, 352, 352], f16), T([6, 20, 352, 352], f16), T([32, 20, 7, 7], f16), [32], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([6, 4, 352, 352], f16), T([6, 32, 352, 352], f16), T([4, 32, 3, 3], f16), [4], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([6, 32, 352, 352], f16), T([6, 6, 352, 352], f16), T([32, 6, 7, 7], f16), [32], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([6], i64), T([6], i64)), {}) +cnt: 3, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)), {}) +Operator: aten.div.Scalar +cnt: 2, ((T([6, 2, 351, 352], f16, stride=(0, 0, 0, 0)), 1482624), {}) +cnt: 2, ((T([6, 2, 352, 351], f16, stride=(0, 0, 0, 0)), 1482624), {}) +cnt: 5, ((T([6, 3, 352, 352], f16, stride=(0, 0, 0, 0)), 2230272), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([6, 352, 352], f16), 352), {}) +cnt: 4, ((T([6, 3, 352, 352], f16), T([6, 1, 352, 352], f16)), {}) +cnt: 2, ((T([], f16), 2230272), {}) +cnt: 2, ((T([], f16), 1), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.grid_sampler_2d.default +cnt: 6, ((T([6, 3, 352, 352], f16), T([6, 352, 352, 2], f16), 0, 0, False), {}) +Operator: aten.grid_sampler_2d_backward.default +cnt: 6, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16), T([6, 352, 352, 2], f16), 0, 0, False, [False, True]), {}) +Operator: aten.index.Tensor +cnt: 8, ((T([7], f16), [T([6], i64)]), {}) +Operator: aten.leaky_relu.default +cnt: 8, ((T([6, 32, 352, 352], f16), 0.1), {}) +cnt: 8, ((T([6, 64, 176, 176], f16), 0.1), {}) +cnt: 8, ((T([6, 128, 88, 88], f16), 0.1), {}) +cnt: 8, ((T([6, 256, 44, 44], f16), 0.1), {}) +cnt: 8, ((T([6, 512, 22, 22], f16), 0.1), {}) +cnt: 4, ((T([6, 512, 11, 11], f16), 0.1), {}) +cnt: 1, ((T([6, 4, 352, 352], f16), 0.1), {}) +cnt: 1, ((T([6, 5, 352, 352], f16), 0.1), {}) +Operator: aten.leaky_relu_backward.default +cnt: 1, ((T([6, 5, 352, 352], f16), T([6, 5, 352, 352], f16), 0.1, False), {}) +cnt: 6, ((T([6, 32, 352, 352], f16), T([6, 32, 352, 352], f16), 0.1, False), {}) +cnt: 2, ((T([6, 32, 352, 352], f16, stride=(7929856, 123904, 352, 1)), T([6, 32, 352, 352], f16), 0.1, False), {}) +cnt: 6, ((T([6, 64, 176, 176], f16), T([6, 64, 176, 176], f16), 0.1, False), {}) +cnt: 2, ((T([6, 64, 176, 176], f16, stride=(3964928, 30976, 176, 1)), T([6, 64, 176, 176], f16), 0.1, False), {}) +cnt: 6, ((T([6, 128, 88, 88], f16), T([6, 128, 88, 88], f16), 0.1, False), {}) +cnt: 2, ((T([6, 128, 88, 88], f16, stride=(1982464, 7744, 88, 1)), T([6, 128, 88, 88], f16), 0.1, False), {}) +cnt: 6, ((T([6, 256, 44, 44], f16), T([6, 256, 44, 44], f16), 0.1, False), {}) +cnt: 2, ((T([6, 256, 44, 44], f16, stride=(991232, 1936, 44, 1)), T([6, 256, 44, 44], f16), 0.1, False), {}) +cnt: 6, ((T([6, 512, 22, 22], f16), T([6, 512, 22, 22], f16), 0.1, False), {}) +cnt: 2, ((T([6, 512, 22, 22], f16, stride=(495616, 484, 22, 1)), T([6, 512, 22, 22], f16), 0.1, False), {}) +cnt: 4, ((T([6, 512, 11, 11], f16), T([6, 512, 11, 11], f16), 0.1, False), {}) +cnt: 1, ((T([6, 4, 352, 352], f16), T([6, 4, 352, 352], f16), 0.1, False), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 2, ((T([6, 64, 352, 352], f16), [2, 2], [2, 2]), {}) +cnt: 2, ((T([6, 128, 176, 176], f16), [2, 2], [2, 2]), {}) +cnt: 2, ((T([6, 256, 88, 88], f16), [2, 2], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([6, 256, 44, 44], f16), T([6, 256, 88, 88], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([6, 256, 44, 44], i64)), {}) +cnt: 1, ((T([6, 128, 88, 88], f16), T([6, 128, 176, 176], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([6, 128, 88, 88], i64)), {}) +cnt: 1, ((T([6, 64, 176, 176], f16), T([6, 64, 352, 352], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([6, 64, 176, 176], i64)), {}) +Operator: aten.mean.default +cnt: 5, ((T([6, 3, 352, 352], f16),), {}) +cnt: 2, ((T([6, 2, 352, 351], f16),), {}) +cnt: 2, ((T([6, 2, 351, 352], f16),), {}) +Operator: aten.mse_loss.default +cnt: 1, ((T([6, 512, 44, 44], f16), T([6, 512, 44, 44], f16)), {}) +Operator: aten.mse_loss_backward.default +cnt: 1, ((T([], f16), T([6, 512, 44, 44], f16), T([6, 512, 44, 44], f16), 1), {}) +Operator: aten.mul.Tensor +cnt: 3, ((T([6], f16), T([6], f16)), {}) +cnt: 4, ((T([6, 1, 1, 1], f16), T([6, 2, 352, 352], f16, stride=(495616, 123904, 352, 1))), {}) +cnt: 12, ((T([6, 352, 352], f16), 2), {}) +cnt: 4, ((T([6, 1, 1, 1], f16), T([6, 1, 352, 352], f16)), {}) +cnt: 2, ((T([6, 1, 352, 352], f16), T([6, 3, 352, 352], f16)), {}) +cnt: 2, ((T([], f16), 204), {}) +cnt: 2, ((T([], f16), 102), {}) +cnt: 2, ((T([], f16), 0.005), {}) +cnt: 2, ((T([6, 2, 351, 352], f16), T([6, 2, 351, 352], f16)), {}) +cnt: 2, ((T([6, 2, 352, 351], f16), T([6, 2, 352, 351], f16)), {}) +cnt: 8, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)), {}) +cnt: 12, ((T([6, 352, 352], f16, stride=(247808, 704, 2)), 2), {}) +cnt: 4, ((T([6, 1, 352, 352], f16), T([6, 1, 1, 1], f16)), {}) +cnt: 2, ((T([6, 3, 352, 352], f16), T([6, 1, 352, 352], f16)), {}) +cnt: 4, ((T([6, 2, 352, 352], f16), T([6, 1, 1, 1], f16)), {}) +Operator: aten.neg.default +cnt: 1, ((T([6], f16),), {}) +cnt: 2, ((T([6, 2, 351, 352], f16),), {}) +cnt: 2, ((T([6, 2, 352, 351], f16),), {}) +cnt: 1, ((T([6, 3, 352, 352], f16),), {}) +cnt: 1, ((T([6, 1, 352, 352], f16),), {}) +Operator: aten.relu_.default +cnt: 4, ((T([6, 64, 352, 352], f16),), {}) +cnt: 4, ((T([6, 128, 176, 176], f16),), {}) +cnt: 6, ((T([6, 256, 88, 88], f16),), {}) +cnt: 4, ((T([6, 512, 44, 44], f16),), {}) +Operator: aten.rsub.Scalar +cnt: 4, ((T([6], f16), 1), {}) +cnt: 1, ((T([6, 1, 352, 352], f16), 1), {}) +Operator: aten.select_backward.default +cnt: 6, ((T([6, 352, 352], f16), [6, 2, 352, 352], 1, 1), {}) +cnt: 6, ((T([6, 352, 352], f16), [6, 2, 352, 352], 1, 0), {}) +Operator: aten.sgn.default +cnt: 2, ((T([6, 2, 351, 352], f16),), {}) +cnt: 2, ((T([6, 2, 352, 351], f16),), {}) +cnt: 5, ((T([6, 3, 352, 352], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([6, 1, 352, 352], f16, stride=(619520, 123904, 352, 1)),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([6, 1, 352, 352], f16), T([6, 1, 352, 352], f16)), {}) +Operator: aten.slice_backward.default +cnt: 4, ((T([6, 2, 351, 352], f16), [6, 2, 351, 352], 3, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([6, 2, 351, 352], f16), [6, 2, 352, 352], 2, 1, 9223372036854775807, 1), {}) +cnt: 8, ((T([6, 2, 352, 352], f16), [6, 2, 352, 352], 1, 0, 9223372036854775807, 1), {}) +cnt: 20, ((T([6, 2, 352, 352], f16), [6, 2, 352, 352], 0, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([6, 2, 351, 352], f16), [6, 2, 352, 352], 2, 0, -1, 1), {}) +cnt: 2, ((T([6, 2, 352, 351], f16), [6, 2, 352, 352], 3, 1, 9223372036854775807, 1), {}) +cnt: 8, ((T([6, 2, 352, 352], f16), [6, 2, 352, 352], 2, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([6, 2, 352, 351], f16), [6, 2, 352, 352], 3, 0, -1, 1), {}) +cnt: 12, ((T([6, 352, 352], f16), [6, 352, 352], 2, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([6, 352, 352], f16), [6, 352, 352], 1, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 1, 352, 352], f16), [6, 1, 352, 352], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 1, 352, 352], f16), [6, 1, 352, 352], 2, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 1, 352, 352], f16), [6, 5, 352, 352], 1, 4, 5, 1), {}) +cnt: 3, ((T([6, 5, 352, 352], f16), [6, 5, 352, 352], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([6, 2, 352, 352], f16), [6, 2, 352, 352], 3, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 2, 352, 352], f16), [6, 5, 352, 352], 1, 2, 4, 1), {}) +cnt: 1, ((T([6, 2, 352, 352], f16), [6, 5, 352, 352], 1, 0, 2, 1), {}) +cnt: 1, ((T([6, 2, 352, 352], f16), [6, 4, 352, 352], 1, 2, 9223372036854775807, 1), {}) +cnt: 2, ((T([6, 4, 352, 352], f16), [6, 4, 352, 352], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 2, 352, 352], f16), [6, 4, 352, 352], 1, 0, 2, 1), {}) +Operator: aten.stack.default +cnt: 6, (([T([6, 352, 352], f16), T([6, 352, 352], f16)], 3), {}) +Operator: aten.sub.Tensor +cnt: 12, ((T([6, 352, 352], f16), 0.5), {}) +cnt: 5, ((T([6, 3, 352, 352], f16), T([6, 3, 352, 352], f16)), {}) +cnt: 2, ((T([6, 2, 352, 351], f16, stride=(495616, 123904, 352, 1)), T([6, 2, 352, 351], f16, stride=(495616, 123904, 352, 1))), {}) +cnt: 2, ((T([6, 2, 351, 352], f16, stride=(495616, 123904, 352, 1)), T([6, 2, 351, 352], f16, stride=(495616, 123904, 352, 1))), {}) +Operator: aten.sum.SymInt +cnt: 3, ((T([6, 3, 352, 352], f16), [1], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([6, 3, 352, 352], f16),), {}) +cnt: 1, ((T([], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([6, 512, 44, 44], f16), T([6, 512, 44, 44], f16), 0), {}) +cnt: 3, ((T([6, 256, 88, 88], f16), T([6, 256, 88, 88], f16), 0), {}) +cnt: 2, ((T([6, 128, 176, 176], f16), T([6, 128, 176, 176], f16), 0), {}) +cnt: 2, ((T([6, 64, 352, 352], f16), T([6, 64, 352, 352], f16), 0), {}) +Operator: aten.unbind.int +cnt: 6, ((T([6, 352, 352, 2], f16), 3), {}) +Operator: aten.upsample_bilinear2d.vec +cnt: 2, ((T([6, 512, 11, 11], f16), None, False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 512, 22, 22], f16), None, False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 256, 44, 44], f16), None, False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 128, 88, 88], f16), None, False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 64, 176, 176], f16), None, False, [2.0, 2.0]), {}) +Operator: aten.upsample_bilinear2d_backward.vec +cnt: 2, ((T([6, 64, 352, 352], f16), None, [6, 64, 176, 176], False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 128, 176, 176], f16), None, [6, 128, 88, 88], False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 256, 88, 88], f16), None, [6, 256, 44, 44], False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 512, 44, 44], f16), None, [6, 512, 22, 22], False, [2.0, 2.0]), {}) +cnt: 2, ((T([6, 512, 22, 22], f16), None, [6, 512, 11, 11], False, [2.0, 2.0]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/alexnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/alexnet_training.txt new file mode 100644 index 000000000..a235e1b05 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/alexnet_training.txt @@ -0,0 +1,58 @@ +Operator: aten._adaptive_avg_pool2d.default +cnt: 1, ((T([128, 256, 6, 6], f16), [6, 6]), {}) +Operator: aten._adaptive_avg_pool2d_backward.default +cnt: 1, ((T([128, 256, 6, 6], f16), T([128, 256, 6, 6], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([4096], f16), T([128, 9216], f16), T([9216, 4096], f16, stride=(1, 9216))), {}) +cnt: 1, ((T([4096], f16), T([128, 4096], f16), T([4096, 4096], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([1000], f16), T([128, 4096], f16), T([4096, 1000], f16, stride=(1, 4096))), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([64, 3, 11, 11], f16), T([64], f16), [4, 4], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 27, 27], f16), T([192, 64, 5, 5], f16), T([192], f16), [1, 1], [2, 2], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 192, 13, 13], f16), T([384, 192, 3, 3], f16), T([384], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 384, 13, 13], f16), T([256, 384, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 13, 13], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 256, 13, 13], f16), T([128, 256, 13, 13], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 13, 13], f16), T([128, 384, 13, 13], f16), T([256, 384, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 384, 13, 13], f16), T([128, 192, 13, 13], f16), T([384, 192, 3, 3], f16), [384], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 192, 27, 27], f16), T([128, 64, 27, 27], f16), T([192, 64, 5, 5], f16), [192], [1, 1], [2, 2], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 55, 55], f16), T([128, 3, 224, 224], f16), T([64, 3, 11, 11], f16), [64], [4, 4], [2, 2], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 128000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 64, 55, 55], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 192, 27, 27], f16), [3, 3], [2, 2]), {}) +cnt: 1, ((T([128, 256, 13, 13], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 256, 6, 6], f16), T([128, 256, 13, 13], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 256, 6, 6], i64)), {}) +cnt: 1, ((T([128, 192, 13, 13], f16), T([128, 192, 27, 27], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 192, 13, 13], i64)), {}) +cnt: 1, ((T([128, 64, 27, 27], f16), T([128, 64, 55, 55], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([128, 64, 27, 27], i64)), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), T([1000, 4096], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(0, 0)), T([128, 4096], f16)), {}) +cnt: 1, ((T([128, 4096], f16), T([4096, 4096], f16)), {}) +cnt: 1, ((T([4096, 128], f16, stride=(1, 4096)), T([128, 4096], f16)), {}) +cnt: 1, ((T([128, 4096], f16), T([4096, 9216], f16)), {}) +cnt: 1, ((T([4096, 128], f16, stride=(1, 4096)), T([128, 9216], f16)), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 64, 55, 55], f16),), {}) +cnt: 1, ((T([128, 192, 27, 27], f16),), {}) +cnt: 1, ((T([128, 384, 13, 13], f16),), {}) +cnt: 2, ((T([128, 256, 13, 13], f16),), {}) +cnt: 2, ((T([128, 4096], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 2, ((T([128, 4096], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([128, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([128, 4096], f16), T([128, 4096], f16), 0), {}) +cnt: 2, ((T([128, 256, 13, 13], f16), T([128, 256, 13, 13], f16), 0), {}) +cnt: 1, ((T([128, 384, 13, 13], f16), T([128, 384, 13, 13], f16), 0), {}) +cnt: 1, ((T([128, 192, 27, 27], f16), T([128, 192, 27, 27], f16), 0), {}) +cnt: 1, ((T([128, 64, 55, 55], f16), T([128, 64, 55, 55], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/attention_is_all_you_need_pytorch_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/attention_is_all_you_need_pytorch_training.txt new file mode 100644 index 000000000..16700c6bb --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/attention_is_all_you_need_pytorch_training.txt @@ -0,0 +1,148 @@ +Operator: aten._softmax.default +cnt: 6, ((T([256, 8, 33, 33], f16), -1, False), {}) +cnt: 6, ((T([256, 8, 31, 31], f16), -1, False), {}) +cnt: 6, ((T([256, 8, 31, 33], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([256, 8, 31, 33], f16), T([256, 8, 31, 33], f16), -1, f16), {}) +cnt: 6, ((T([256, 8, 31, 31], f16), T([256, 8, 31, 31], f16), -1, f16), {}) +cnt: 6, ((T([256, 8, 33, 33], f16), T([256, 8, 33, 33], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([1, 31, 31], f32),), {'dtype': torch.bool}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([8448, 512], f16), [256, 33, 512]), {}) +cnt: 24, ((T([256, 8, 33, 64], f16), [2048, 33, 64]), {}) +cnt: 12, ((T([256, 8, 64, 33], f16), [2048, 64, 33]), {}) +cnt: 6, ((T([2048, 33, 33], f16), [256, 8, 33, 33]), {}) +cnt: 6, ((T([2048, 33, 64], f16), [256, 8, 33, 64]), {}) +cnt: 36, ((T([7936, 512], f16), [256, 31, 512]), {}) +cnt: 30, ((T([256, 8, 31, 64], f16), [2048, 31, 64]), {}) +cnt: 6, ((T([256, 8, 64, 31], f16), [2048, 64, 31]), {}) +cnt: 6, ((T([2048, 31, 31], f16), [256, 8, 31, 31]), {}) +cnt: 12, ((T([2048, 31, 64], f16), [256, 8, 31, 64]), {}) +cnt: 6, ((T([2048, 31, 33], f16), [256, 8, 31, 33]), {}) +cnt: 1, ((T([7936, 9521], f16), [256, 31, 9521]), {}) +cnt: 18, ((T([256, 33, 8, 64], f16), [256, 33, 512]), {}) +cnt: 12, ((T([256, 33, 512], f16), [8448, 512]), {}) +cnt: 18, ((T([256, 31, 8, 64], f16), [256, 31, 512]), {}) +cnt: 6, ((T([256, 31, 512], f16), [7936, 512]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([256, 33, 512], f16), T([1, 33, 512], f16)), {}) +cnt: 1, ((T([256, 31, 512], f16), T([1, 31, 512], f16)), {}) +cnt: 30, ((T([256, 31, 512], f16), T([256, 31, 512], f16)), {}) +cnt: 35, ((T([256, 33, 512], f16), T([256, 33, 512], f16)), {}) +Operator: aten.add_.Tensor +cnt: 12, ((T([256, 33, 512], f16), T([256, 33, 512], f16)), {}) +cnt: 18, ((T([256, 31, 512], f16), T([256, 31, 512], f16)), {}) +Operator: aten.addmm.default +cnt: 6, ((T([2048], f16), T([8448, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([8448, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 6, ((T([2048], f16), T([7936, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([7936, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +Operator: aten.bitwise_and.Tensor +cnt: 1, ((T([256, 1, 31], b8, stride=(1, 7936, 256)), T([1, 31, 31], b8)), {}) +Operator: aten.bmm.default +cnt: 6, ((T([2048, 33, 64], f16), T([2048, 64, 33], f16)), {}) +cnt: 6, ((T([2048, 33, 33], f16), T([2048, 33, 64], f16)), {}) +cnt: 6, ((T([2048, 31, 64], f16), T([2048, 64, 31], f16)), {}) +cnt: 6, ((T([2048, 31, 31], f16), T([2048, 31, 64], f16)), {}) +cnt: 6, ((T([2048, 31, 64], f16), T([2048, 64, 33], f16)), {}) +cnt: 6, ((T([2048, 31, 33], f16), T([2048, 33, 64], f16)), {}) +cnt: 6, ((T([2048, 33, 31], f16, stride=(1023, 1, 33)), T([2048, 31, 64], f16)), {}) +cnt: 6, ((T([2048, 31, 64], f16), T([2048, 64, 33], f16, stride=(2112, 1, 64))), {}) +cnt: 6, ((T([2048, 64, 31], f16, stride=(1984, 1, 64)), T([2048, 31, 33], f16)), {}) +cnt: 6, ((T([2048, 31, 33], f16), T([2048, 33, 64], f16, stride=(2112, 1, 33))), {}) +cnt: 6, ((T([2048, 31, 31], f16, stride=(961, 1, 31)), T([2048, 31, 64], f16)), {}) +cnt: 6, ((T([2048, 31, 64], f16), T([2048, 64, 31], f16, stride=(1984, 1, 64))), {}) +cnt: 6, ((T([2048, 64, 31], f16, stride=(1984, 1, 64)), T([2048, 31, 31], f16)), {}) +cnt: 6, ((T([2048, 31, 31], f16), T([2048, 31, 64], f16, stride=(1984, 1, 31))), {}) +cnt: 6, ((T([2048, 33, 33], f16, stride=(1089, 1, 33)), T([2048, 33, 64], f16)), {}) +cnt: 6, ((T([2048, 33, 64], f16), T([2048, 64, 33], f16, stride=(2112, 1, 64))), {}) +cnt: 6, ((T([2048, 64, 33], f16, stride=(2112, 1, 64)), T([2048, 33, 33], f16)), {}) +cnt: 6, ((T([2048, 33, 33], f16), T([2048, 33, 64], f16, stride=(2112, 1, 33))), {}) +Operator: aten.clone.default +cnt: 1, ((T([256, 33], i64, stride=(1, 256)),), {}) +cnt: 1, ((T([256, 31], i64, stride=(1, 256)),), {}) +cnt: 1, ((T([1, 33, 512], f16),), {}) +cnt: 1, ((T([1, 31, 512], f16),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([256, 33], i64, stride=(1, 256)), T([256, 33], i64, stride=(1, 256))), {}) +cnt: 1, ((T([256, 31], i64, stride=(1, 256)), T([256, 31], i64, stride=(1, 256))), {}) +cnt: 12, ((T([256, 31, 512], f16), T([256, 31, 512], f16)), {}) +cnt: 6, ((T([7936, 512], f16), T([7936, 512], f16)), {}) +cnt: 12, ((T([256, 33, 512], f16), T([256, 33, 512], f16)), {}) +cnt: 6, ((T([8448, 512], f16), T([8448, 512], f16)), {}) +Operator: aten.div.Tensor +cnt: 6, ((T([256, 8, 33, 64], f16, stride=(16896, 64, 512, 1)), 8.0), {}) +cnt: 12, ((T([256, 8, 31, 64], f16, stride=(15872, 64, 512, 1)), 8.0), {}) +cnt: 2, ((T([], f16), 75558656), {}) +cnt: 12, ((T([256, 8, 31, 64], f16), 8.0), {}) +cnt: 6, ((T([256, 8, 33, 64], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([9521, 512], f16), T([256, 33], i64, stride=(1, 256)), 1), {}) +cnt: 1, ((T([9521, 512], f16), T([256, 31], i64, stride=(1, 256)), 1), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([256, 31, 512], f16), T([256, 31], i64, stride=(1, 256)), 9521, 1, False), {}) +cnt: 1, ((T([256, 33, 512], f16), T([256, 33], i64, stride=(1, 256)), 9521, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 12, ((T([256, 1, 1, 33], b8, stride=(1, 8448, 8448, 256)), 0), {}) +cnt: 6, ((T([256, 1, 31, 31], b8, stride=(1, 7936, 256, 7936)), 0), {}) +Operator: aten.masked_fill.Scalar +cnt: 6, ((T([256, 8, 33, 33], f16), T([256, 1, 1, 33], b8, stride=(1, 8448, 8448, 256)), -65504.0), {}) +cnt: 6, ((T([256, 8, 31, 31], f16), T([256, 1, 31, 31], b8, stride=(1, 7936, 256, 7936)), -65504.0), {}) +cnt: 6, ((T([256, 8, 31, 33], f16), T([256, 1, 1, 33], b8, stride=(1, 8448, 8448, 256)), -65504.0), {}) +cnt: 6, ((T([256, 8, 31, 33], f16), T([256, 1, 1, 33], b8, stride=(1, 8448, 8448, 256)), 0), {}) +cnt: 6, ((T([256, 8, 31, 31], f16), T([256, 1, 31, 31], b8, stride=(1, 7936, 256, 7936)), 0), {}) +cnt: 6, ((T([256, 8, 33, 33], f16), T([256, 1, 1, 33], b8, stride=(1, 8448, 8448, 256)), 0), {}) +Operator: aten.mm.default +cnt: 36, ((T([8448, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 36, ((T([7936, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 1, ((T([7936, 512], f16), T([512, 9521], f16, stride=(1, 512))), {}) +cnt: 1, ((T([9521, 7936], f16, stride=(1, 9521)), T([7936, 512], f16)), {}) +cnt: 1, ((T([7936, 9521], f16), T([9521, 512], f16)), {}) +cnt: 6, ((T([7936, 512], f16), T([512, 2048], f16)), {}) +cnt: 6, ((T([512, 7936], f16, stride=(1, 512)), T([7936, 2048], f16)), {}) +cnt: 6, ((T([7936, 2048], f16), T([2048, 512], f16)), {}) +cnt: 6, ((T([2048, 7936], f16, stride=(1, 2048)), T([7936, 512], f16)), {}) +cnt: 36, ((T([512, 7936], f16, stride=(1, 512)), T([7936, 512], f16)), {}) +cnt: 36, ((T([7936, 512], f16), T([512, 512], f16)), {}) +cnt: 36, ((T([512, 8448], f16, stride=(1, 512)), T([8448, 512], f16)), {}) +cnt: 36, ((T([8448, 512], f16), T([512, 512], f16)), {}) +cnt: 6, ((T([8448, 512], f16), T([512, 2048], f16)), {}) +cnt: 6, ((T([512, 8448], f16, stride=(1, 512)), T([8448, 2048], f16)), {}) +cnt: 6, ((T([8448, 2048], f16), T([2048, 512], f16)), {}) +cnt: 6, ((T([2048, 8448], f16, stride=(1, 2048)), T([8448, 512], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([256, 31, 9521], f16), 1.0), {}) +cnt: 1, ((T([256, 31, 9521], f16, stride=(0, 0, 0)), 1.0), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([256, 33, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +cnt: 19, ((T([256, 31, 512], f16), [512], T([512], f16), T([512], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 19, ((T([256, 31, 512], f16), T([256, 31, 512], f16), [512], T([256, 31, 1], f32), T([256, 31, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 13, ((T([256, 33, 512], f16), T([256, 33, 512], f16), [512], T([256, 33, 1], f32), T([256, 33, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([256, 33], i64, stride=(1, 256)), 1), {}) +cnt: 1, ((T([256, 31], i64, stride=(1, 256)), 1), {}) +Operator: aten.new_empty_strided.default +cnt: 6, ((T([7936, 512], f16), [7936, 512], [512, 1]), {}) +cnt: 6, ((T([8448, 512], f16), [8448, 512], [512, 1]), {}) +Operator: aten.new_zeros.default +cnt: 6, ((T([256, 31, 512], f16), [4063232]), {}) +cnt: 6, ((T([256, 33, 512], f16), [4325376]), {}) +Operator: aten.relu.default +cnt: 6, ((T([256, 33, 2048], f16),), {}) +cnt: 6, ((T([256, 31, 2048], f16),), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([1, 31, 31], f32), 1), {}) +Operator: aten.sum.SymInt +cnt: 6, ((T([7936, 512], f16), [0], True), {}) +cnt: 6, ((T([7936, 2048], f16), [0], True), {}) +cnt: 6, ((T([8448, 512], f16), [0], True), {}) +cnt: 6, ((T([8448, 2048], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([7936, 9521], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([256, 31, 2048], f16), T([256, 31, 2048], f16), 0), {}) +cnt: 6, ((T([256, 33, 2048], f16), T([256, 33, 2048], f16), 0), {}) +Operator: aten.triu.default +cnt: 1, ((T([1, 31, 31], f32), 1), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/dcgan_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/dcgan_training.txt new file mode 100644 index 000000000..0adf5dcbf --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/dcgan_training.txt @@ -0,0 +1,42 @@ +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 64, 64], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 64, 64], f16), T([64, 3, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 32, 32], f16), T([128, 64, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 16, 16], f16), T([256, 128, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), T([512, 256, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 4, 4], f16), T([1, 512, 4, 4], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1, 1, 1], f16), T([32, 512, 4, 4], f16), T([1, 512, 4, 4], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 4, 4], f16), T([32, 256, 8, 8], f16), T([512, 256, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), T([32, 128, 16, 16], f16), T([256, 128, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 16, 16], f16), T([32, 64, 32, 32], f16), T([128, 64, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 32, 32], f16), T([32, 3, 64, 64], f16), T([64, 3, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 64, 64], f16), T([32, 3, 64, 64], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32), {}) +Operator: aten.leaky_relu_.default +cnt: 1, ((T([32, 64, 32, 32], f16), 0.2), {}) +cnt: 1, ((T([32, 128, 16, 16], f16), 0.2), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), 0.2), {}) +cnt: 1, ((T([32, 512, 4, 4], f16), 0.2), {}) +Operator: aten.leaky_relu_backward.default +cnt: 1, ((T([32, 512, 4, 4], f16), T([32, 512, 4, 4], f16), 0.2, True), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), T([32, 256, 8, 8], f16), 0.2, True), {}) +cnt: 1, ((T([32, 128, 16, 16], f16), T([32, 128, 16, 16], f16), 0.2, True), {}) +cnt: 1, ((T([32, 64, 32, 32], f16), T([32, 64, 32, 32], f16), 0.2, True), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 512, 4, 4], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([32, 512, 4, 4], f16), T([32, 512, 4, 4], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 8, 8], f16), T([32, 256, 8, 8], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 16, 16], f16), T([32, 128, 16, 16], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([32, 1, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([32, 1, 1, 1], f16, stride=(0, 0, 0, 0)), T([32, 1, 1, 1], f16)), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1, 1, 1], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/densenet121_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/densenet121_training.txt new file mode 100644 index 000000000..80f89b783 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/densenet121_training.txt @@ -0,0 +1,609 @@ +Operator: aten.add.Tensor +cnt: 1, ((T([4, 512, 7, 7], f16, stride=(50176, 49, 7, 1)), T([4, 512, 7, 7], f16, stride=(48608, 49, 7, 1))), {}) +cnt: 15, ((T([4, 32, 7, 7], f16, stride=(50176, 49, 7, 1)), T([4, 32, 7, 7], f16, stride=(48608, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(47040, 49, 7, 1))), {}) +cnt: 14, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(47040, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(45472, 49, 7, 1))), {}) +cnt: 13, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(45472, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(43904, 49, 7, 1))), {}) +cnt: 12, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(43904, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(42336, 49, 7, 1))), {}) +cnt: 11, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(42336, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(40768, 49, 7, 1))), {}) +cnt: 10, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(40768, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(39200, 49, 7, 1))), {}) +cnt: 9, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(39200, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(37632, 49, 7, 1))), {}) +cnt: 8, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(37632, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(36064, 49, 7, 1))), {}) +cnt: 7, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(36064, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(34496, 49, 7, 1))), {}) +cnt: 6, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(34496, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(32928, 49, 7, 1))), {}) +cnt: 5, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(32928, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(31360, 49, 7, 1))), {}) +cnt: 4, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(31360, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(29792, 49, 7, 1))), {}) +cnt: 3, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(29792, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(28224, 49, 7, 1))), {}) +cnt: 2, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(28224, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16, stride=(26656, 49, 7, 1))), {}) +cnt: 1, ((T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16, stride=(26656, 49, 7, 1))), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16)), {}) +cnt: 1, ((T([4, 256, 14, 14], f16, stride=(200704, 196, 14, 1)), T([4, 256, 14, 14], f16, stride=(194432, 196, 14, 1))), {}) +cnt: 23, ((T([4, 32, 14, 14], f16, stride=(200704, 196, 14, 1)), T([4, 32, 14, 14], f16, stride=(194432, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(188160, 196, 14, 1))), {}) +cnt: 22, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(188160, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(181888, 196, 14, 1))), {}) +cnt: 21, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(181888, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(175616, 196, 14, 1))), {}) +cnt: 20, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(175616, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(169344, 196, 14, 1))), {}) +cnt: 19, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(169344, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(163072, 196, 14, 1))), {}) +cnt: 18, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(163072, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(156800, 196, 14, 1))), {}) +cnt: 17, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(156800, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(150528, 196, 14, 1))), {}) +cnt: 16, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(150528, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(144256, 196, 14, 1))), {}) +cnt: 15, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(144256, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(137984, 196, 14, 1))), {}) +cnt: 14, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(137984, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(131712, 196, 14, 1))), {}) +cnt: 13, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(131712, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(125440, 196, 14, 1))), {}) +cnt: 12, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(125440, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(119168, 196, 14, 1))), {}) +cnt: 11, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(119168, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(112896, 196, 14, 1))), {}) +cnt: 10, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(112896, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(106624, 196, 14, 1))), {}) +cnt: 9, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(106624, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(100352, 196, 14, 1))), {}) +cnt: 8, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(100352, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(94080, 196, 14, 1))), {}) +cnt: 7, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(94080, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(87808, 196, 14, 1))), {}) +cnt: 6, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(87808, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(81536, 196, 14, 1))), {}) +cnt: 5, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(81536, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(75264, 196, 14, 1))), {}) +cnt: 4, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(75264, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(68992, 196, 14, 1))), {}) +cnt: 3, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(68992, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(62720, 196, 14, 1))), {}) +cnt: 2, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(62720, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16, stride=(56448, 196, 14, 1))), {}) +cnt: 1, ((T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16, stride=(56448, 196, 14, 1))), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16)), {}) +cnt: 1, ((T([4, 128, 28, 28], f16, stride=(401408, 784, 28, 1)), T([4, 128, 28, 28], f16, stride=(376320, 784, 28, 1))), {}) +cnt: 11, ((T([4, 32, 28, 28], f16, stride=(401408, 784, 28, 1)), T([4, 32, 28, 28], f16, stride=(376320, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(351232, 784, 28, 1))), {}) +cnt: 10, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(351232, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(326144, 784, 28, 1))), {}) +cnt: 9, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(326144, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(301056, 784, 28, 1))), {}) +cnt: 8, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(301056, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(275968, 784, 28, 1))), {}) +cnt: 7, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(275968, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(250880, 784, 28, 1))), {}) +cnt: 6, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(250880, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(225792, 784, 28, 1))), {}) +cnt: 5, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(225792, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(200704, 784, 28, 1))), {}) +cnt: 4, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(200704, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(175616, 784, 28, 1))), {}) +cnt: 3, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(175616, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(150528, 784, 28, 1))), {}) +cnt: 2, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(150528, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16, stride=(125440, 784, 28, 1))), {}) +cnt: 1, ((T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16, stride=(125440, 784, 28, 1))), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16)), {}) +cnt: 1, ((T([4, 64, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([4, 64, 56, 56], f16, stride=(702464, 3136, 56, 1))), {}) +cnt: 5, ((T([4, 32, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([4, 32, 56, 56], f16, stride=(702464, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16, stride=(602112, 3136, 56, 1))), {}) +cnt: 4, ((T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16, stride=(602112, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16, stride=(501760, 3136, 56, 1))), {}) +cnt: 3, ((T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16, stride=(501760, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16, stride=(401408, 3136, 56, 1))), {}) +cnt: 2, ((T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16, stride=(401408, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16, stride=(301056, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16, stride=(301056, 3136, 56, 1))), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([4, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([4, 128, 56, 56], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), [2, 2], [2, 2]), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 14, 14], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 28, 28], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 56, 56], f16), [2, 2], [2, 2], [0, 0], False, True, None), {}) +Operator: aten.cat.default +cnt: 1, (([T([4, 64, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 64, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16), T([4, 32, 56, 56], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 128, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16), T([4, 32, 28, 28], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 256, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16), T([4, 32, 14, 14], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +cnt: 1, (([T([4, 512, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16), T([4, 32, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([4, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([4, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([4, 128, 56, 56], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 96, 56, 56], f16), T([128, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 160, 56, 56], f16), T([128, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 192, 56, 56], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 224, 56, 56], f16), T([128, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([128, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([4, 128, 28, 28], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 160, 28, 28], f16), T([128, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 192, 28, 28], f16), T([128, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 224, 28, 28], f16), T([128, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 288, 28, 28], f16), T([128, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 320, 28, 28], f16), T([128, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 352, 28, 28], f16), T([128, 352, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 384, 28, 28], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 416, 28, 28], f16), T([128, 416, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 448, 28, 28], f16), T([128, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 480, 28, 28], f16), T([128, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 24, ((T([4, 128, 14, 14], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 288, 14, 14], f16), T([128, 288, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 320, 14, 14], f16), T([128, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 352, 14, 14], f16), T([128, 352, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 384, 14, 14], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 416, 14, 14], f16), T([128, 416, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 448, 14, 14], f16), T([128, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 480, 14, 14], f16), T([128, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 544, 14, 14], f16), T([128, 544, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 576, 14, 14], f16), T([128, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 608, 14, 14], f16), T([128, 608, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 640, 14, 14], f16), T([128, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 672, 14, 14], f16), T([128, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 704, 14, 14], f16), T([128, 704, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 736, 14, 14], f16), T([128, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 768, 14, 14], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 800, 14, 14], f16), T([128, 800, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 832, 14, 14], f16), T([128, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 864, 14, 14], f16), T([128, 864, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 896, 14, 14], f16), T([128, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 928, 14, 14], f16), T([128, 928, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 960, 14, 14], f16), T([128, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 992, 14, 14], f16), T([128, 992, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([4, 128, 7, 7], f16), T([32, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 544, 7, 7], f16), T([128, 544, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 576, 7, 7], f16), T([128, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 608, 7, 7], f16), T([128, 608, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 640, 7, 7], f16), T([128, 640, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 672, 7, 7], f16), T([128, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 704, 7, 7], f16), T([128, 704, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 736, 7, 7], f16), T([128, 736, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 768, 7, 7], f16), T([128, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 800, 7, 7], f16), T([128, 800, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 832, 7, 7], f16), T([128, 832, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 864, 7, 7], f16), T([128, 864, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 896, 7, 7], f16), T([128, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 928, 7, 7], f16), T([128, 928, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 960, 7, 7], f16), T([128, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 992, 7, 7], f16), T([128, 992, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([4, 32, 7, 7], f16, stride=(50176, 49, 7, 1)), T([4, 128, 7, 7], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 992, 7, 7], f16), T([128, 992, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 15, ((T([4, 32, 7, 7], f16), T([4, 128, 7, 7], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 960, 7, 7], f16), T([128, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 928, 7, 7], f16), T([128, 928, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 896, 7, 7], f16), T([128, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 864, 7, 7], f16), T([128, 864, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 832, 7, 7], f16), T([128, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 800, 7, 7], f16), T([128, 800, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 768, 7, 7], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 736, 7, 7], f16), T([128, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 704, 7, 7], f16), T([128, 704, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 672, 7, 7], f16), T([128, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 640, 7, 7], f16), T([128, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 608, 7, 7], f16), T([128, 608, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 576, 7, 7], f16), T([128, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 544, 7, 7], f16), T([128, 544, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 7, 7], f16), T([4, 512, 7, 7], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), T([4, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 32, 14, 14], f16, stride=(200704, 196, 14, 1)), T([4, 128, 14, 14], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 992, 14, 14], f16), T([128, 992, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 23, ((T([4, 32, 14, 14], f16), T([4, 128, 14, 14], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 960, 14, 14], f16), T([128, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 928, 14, 14], f16), T([128, 928, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 896, 14, 14], f16), T([128, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 864, 14, 14], f16), T([128, 864, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 832, 14, 14], f16), T([128, 832, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 800, 14, 14], f16), T([128, 800, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 768, 14, 14], f16), T([128, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 736, 14, 14], f16), T([128, 736, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 704, 14, 14], f16), T([128, 704, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 672, 14, 14], f16), T([128, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 640, 14, 14], f16), T([128, 640, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 608, 14, 14], f16), T([128, 608, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 576, 14, 14], f16), T([128, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 544, 14, 14], f16), T([128, 544, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 512, 14, 14], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 480, 14, 14], f16), T([128, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 448, 14, 14], f16), T([128, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 416, 14, 14], f16), T([128, 416, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 384, 14, 14], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 352, 14, 14], f16), T([128, 352, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 320, 14, 14], f16), T([128, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 288, 14, 14], f16), T([128, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 14, 14], f16), T([4, 256, 14, 14], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), T([4, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 32, 28, 28], f16, stride=(401408, 784, 28, 1)), T([4, 128, 28, 28], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 480, 28, 28], f16), T([128, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 11, ((T([4, 32, 28, 28], f16), T([4, 128, 28, 28], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 448, 28, 28], f16), T([128, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 416, 28, 28], f16), T([128, 416, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 384, 28, 28], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 352, 28, 28], f16), T([128, 352, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 320, 28, 28], f16), T([128, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 288, 28, 28], f16), T([128, 288, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 256, 28, 28], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 224, 28, 28], f16), T([128, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 192, 28, 28], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 160, 28, 28], f16), T([128, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 32, 56, 56], f16, stride=(802816, 3136, 56, 1)), T([4, 128, 56, 56], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 224, 56, 56], f16), T([128, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([4, 32, 56, 56], f16), T([4, 128, 56, 56], f16), T([32, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 192, 56, 56], f16), T([128, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 160, 56, 56], f16), T([128, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 128, 56, 56], f16), T([128, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 96, 56, 56], f16), T([128, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 56, 56], f16), T([4, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 64, 112, 112], f16), T([4, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([4, 3, 224, 224], f16), T([4, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([4, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 4000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([4, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([4, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([4, 1024, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([4, 1000], f16, stride=(0, 0)), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 4], f16, stride=(0, 0)), T([4, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([4, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 7, ((T([4, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 160, 56, 56], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 13, ((T([4, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 320, 28, 28], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 352, 28, 28], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 384, 28, 28], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 480, 28, 28], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 24, ((T([4, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 320, 14, 14], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 352, 14, 14], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 544, 14, 14], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 608, 14, 14], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 640, 14, 14], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 704, 14, 14], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 736, 14, 14], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 864, 14, 14], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 928, 14, 14], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 992, 14, 14], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 16, ((T([4, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 544, 7, 7], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 608, 7, 7], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 640, 7, 7], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 704, 7, 7], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 800, 7, 7], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 864, 7, 7], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 928, 7, 7], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 992, 7, 7], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([4, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([4, 1024, 7, 7], f16), T([4, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 16, ((T([4, 128, 7, 7], f16), T([4, 128, 7, 7], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 992, 7, 7], f16), T([4, 992, 7, 7], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f32), T([992], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 960, 7, 7], f16), T([4, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 928, 7, 7], f16), T([4, 928, 7, 7], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f32), T([928], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 896, 7, 7], f16), T([4, 896, 7, 7], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 864, 7, 7], f16), T([4, 864, 7, 7], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 832, 7, 7], f16), T([4, 832, 7, 7], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 800, 7, 7], f16), T([4, 800, 7, 7], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 768, 7, 7], f16), T([4, 768, 7, 7], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 736, 7, 7], f16), T([4, 736, 7, 7], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f32), T([736], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 704, 7, 7], f16), T([4, 704, 7, 7], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f32), T([704], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 672, 7, 7], f16), T([4, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 640, 7, 7], f16), T([4, 640, 7, 7], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 608, 7, 7], f16), T([4, 608, 7, 7], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f32), T([608], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 576, 7, 7], f16), T([4, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 544, 7, 7], f16), T([4, 544, 7, 7], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f32), T([544], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 1024, 14, 14], f16), T([4, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 24, ((T([4, 128, 14, 14], f16), T([4, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 992, 14, 14], f16), T([4, 992, 14, 14], f16), T([992], f16), T([992], f16), T([992], f16), T([992], f32), T([992], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 960, 14, 14], f16), T([4, 960, 14, 14], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 928, 14, 14], f16), T([4, 928, 14, 14], f16), T([928], f16), T([928], f16), T([928], f16), T([928], f32), T([928], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 896, 14, 14], f16), T([4, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 864, 14, 14], f16), T([4, 864, 14, 14], f16), T([864], f16), T([864], f16), T([864], f16), T([864], f32), T([864], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 832, 14, 14], f16), T([4, 832, 14, 14], f16), T([832], f16), T([832], f16), T([832], f16), T([832], f32), T([832], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 800, 14, 14], f16), T([4, 800, 14, 14], f16), T([800], f16), T([800], f16), T([800], f16), T([800], f32), T([800], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 768, 14, 14], f16), T([4, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 736, 14, 14], f16), T([4, 736, 14, 14], f16), T([736], f16), T([736], f16), T([736], f16), T([736], f32), T([736], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 704, 14, 14], f16), T([4, 704, 14, 14], f16), T([704], f16), T([704], f16), T([704], f16), T([704], f32), T([704], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 672, 14, 14], f16), T([4, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 640, 14, 14], f16), T([4, 640, 14, 14], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 608, 14, 14], f16), T([4, 608, 14, 14], f16), T([608], f16), T([608], f16), T([608], f16), T([608], f32), T([608], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 576, 14, 14], f16), T([4, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 544, 14, 14], f16), T([4, 544, 14, 14], f16), T([544], f16), T([544], f16), T([544], f16), T([544], f32), T([544], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), T([4, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 480, 14, 14], f16), T([4, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 448, 14, 14], f16), T([4, 448, 14, 14], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 416, 14, 14], f16), T([4, 416, 14, 14], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 384, 14, 14], f16), T([4, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 352, 14, 14], f16), T([4, 352, 14, 14], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f32), T([352], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 320, 14, 14], f16), T([4, 320, 14, 14], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 288, 14, 14], f16), T([4, 288, 14, 14], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 512, 28, 28], f16), T([4, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 13, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 480, 28, 28], f16), T([4, 480, 28, 28], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 448, 28, 28], f16), T([4, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 416, 28, 28], f16), T([4, 416, 28, 28], f16), T([416], f16), T([416], f16), T([416], f16), T([416], f32), T([416], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 384, 28, 28], f16), T([4, 384, 28, 28], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 352, 28, 28], f16), T([4, 352, 28, 28], f16), T([352], f16), T([352], f16), T([352], f16), T([352], f32), T([352], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 320, 28, 28], f16), T([4, 320, 28, 28], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 288, 28, 28], f16), T([4, 288, 28, 28], f16), T([288], f16), T([288], f16), T([288], f16), T([288], f32), T([288], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), T([4, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 224, 28, 28], f16), T([4, 224, 28, 28], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 192, 28, 28], f16), T([4, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 160, 28, 28], f16), T([4, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 56, 56], f16), T([4, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([4, 128, 56, 56], f16), T([4, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 224, 56, 56], f16), T([4, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 192, 56, 56], f16), T([4, 192, 56, 56], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 160, 56, 56], f16), T([4, 160, 56, 56], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 96, 56, 56], f16), T([4, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([4, 64, 112, 112], f16), T([4, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([4, 64, 112, 112], f16),), {}) +cnt: 1, ((T([4, 64, 56, 56], f16),), {}) +cnt: 7, ((T([4, 128, 56, 56], f16),), {}) +cnt: 1, ((T([4, 96, 56, 56], f16),), {}) +cnt: 1, ((T([4, 160, 56, 56], f16),), {}) +cnt: 1, ((T([4, 192, 56, 56], f16),), {}) +cnt: 1, ((T([4, 224, 56, 56], f16),), {}) +cnt: 1, ((T([4, 256, 56, 56], f16),), {}) +cnt: 13, ((T([4, 128, 28, 28], f16),), {}) +cnt: 1, ((T([4, 160, 28, 28], f16),), {}) +cnt: 1, ((T([4, 192, 28, 28], f16),), {}) +cnt: 1, ((T([4, 224, 28, 28], f16),), {}) +cnt: 1, ((T([4, 256, 28, 28], f16),), {}) +cnt: 1, ((T([4, 288, 28, 28], f16),), {}) +cnt: 1, ((T([4, 320, 28, 28], f16),), {}) +cnt: 1, ((T([4, 352, 28, 28], f16),), {}) +cnt: 1, ((T([4, 384, 28, 28], f16),), {}) +cnt: 1, ((T([4, 416, 28, 28], f16),), {}) +cnt: 1, ((T([4, 448, 28, 28], f16),), {}) +cnt: 1, ((T([4, 480, 28, 28], f16),), {}) +cnt: 1, ((T([4, 512, 28, 28], f16),), {}) +cnt: 1, ((T([4, 256, 14, 14], f16),), {}) +cnt: 24, ((T([4, 128, 14, 14], f16),), {}) +cnt: 1, ((T([4, 288, 14, 14], f16),), {}) +cnt: 1, ((T([4, 320, 14, 14], f16),), {}) +cnt: 1, ((T([4, 352, 14, 14], f16),), {}) +cnt: 1, ((T([4, 384, 14, 14], f16),), {}) +cnt: 1, ((T([4, 416, 14, 14], f16),), {}) +cnt: 1, ((T([4, 448, 14, 14], f16),), {}) +cnt: 1, ((T([4, 480, 14, 14], f16),), {}) +cnt: 1, ((T([4, 512, 14, 14], f16),), {}) +cnt: 1, ((T([4, 544, 14, 14], f16),), {}) +cnt: 1, ((T([4, 576, 14, 14], f16),), {}) +cnt: 1, ((T([4, 608, 14, 14], f16),), {}) +cnt: 1, ((T([4, 640, 14, 14], f16),), {}) +cnt: 1, ((T([4, 672, 14, 14], f16),), {}) +cnt: 1, ((T([4, 704, 14, 14], f16),), {}) +cnt: 1, ((T([4, 736, 14, 14], f16),), {}) +cnt: 1, ((T([4, 768, 14, 14], f16),), {}) +cnt: 1, ((T([4, 800, 14, 14], f16),), {}) +cnt: 1, ((T([4, 832, 14, 14], f16),), {}) +cnt: 1, ((T([4, 864, 14, 14], f16),), {}) +cnt: 1, ((T([4, 896, 14, 14], f16),), {}) +cnt: 1, ((T([4, 928, 14, 14], f16),), {}) +cnt: 1, ((T([4, 960, 14, 14], f16),), {}) +cnt: 1, ((T([4, 992, 14, 14], f16),), {}) +cnt: 1, ((T([4, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([4, 512, 7, 7], f16),), {}) +cnt: 16, ((T([4, 128, 7, 7], f16),), {}) +cnt: 1, ((T([4, 544, 7, 7], f16),), {}) +cnt: 1, ((T([4, 576, 7, 7], f16),), {}) +cnt: 1, ((T([4, 608, 7, 7], f16),), {}) +cnt: 1, ((T([4, 640, 7, 7], f16),), {}) +cnt: 1, ((T([4, 672, 7, 7], f16),), {}) +cnt: 1, ((T([4, 704, 7, 7], f16),), {}) +cnt: 1, ((T([4, 736, 7, 7], f16),), {}) +cnt: 1, ((T([4, 768, 7, 7], f16),), {}) +cnt: 1, ((T([4, 800, 7, 7], f16),), {}) +cnt: 1, ((T([4, 832, 7, 7], f16),), {}) +cnt: 1, ((T([4, 864, 7, 7], f16),), {}) +cnt: 1, ((T([4, 896, 7, 7], f16),), {}) +cnt: 1, ((T([4, 928, 7, 7], f16),), {}) +cnt: 1, ((T([4, 960, 7, 7], f16),), {}) +cnt: 1, ((T([4, 992, 7, 7], f16),), {}) +cnt: 1, ((T([4, 1024, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([4, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([4, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([4, 1024, 7, 7], f16), T([4, 1024, 7, 7], f16), 0), {}) +cnt: 16, ((T([4, 128, 7, 7], f16), T([4, 128, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 992, 7, 7], f16), T([4, 992, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 960, 7, 7], f16), T([4, 960, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 928, 7, 7], f16), T([4, 928, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 896, 7, 7], f16), T([4, 896, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 864, 7, 7], f16), T([4, 864, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 832, 7, 7], f16), T([4, 832, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 800, 7, 7], f16), T([4, 800, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 768, 7, 7], f16), T([4, 768, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 736, 7, 7], f16), T([4, 736, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 704, 7, 7], f16), T([4, 704, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 672, 7, 7], f16), T([4, 672, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 640, 7, 7], f16), T([4, 640, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 608, 7, 7], f16), T([4, 608, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 576, 7, 7], f16), T([4, 576, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 544, 7, 7], f16), T([4, 544, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 512, 7, 7], f16), T([4, 512, 7, 7], f16), 0), {}) +cnt: 1, ((T([4, 1024, 14, 14], f16), T([4, 1024, 14, 14], f16), 0), {}) +cnt: 24, ((T([4, 128, 14, 14], f16), T([4, 128, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 992, 14, 14], f16), T([4, 992, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 960, 14, 14], f16), T([4, 960, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 928, 14, 14], f16), T([4, 928, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 896, 14, 14], f16), T([4, 896, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 864, 14, 14], f16), T([4, 864, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 832, 14, 14], f16), T([4, 832, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 800, 14, 14], f16), T([4, 800, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 768, 14, 14], f16), T([4, 768, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 736, 14, 14], f16), T([4, 736, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 704, 14, 14], f16), T([4, 704, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 672, 14, 14], f16), T([4, 672, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 640, 14, 14], f16), T([4, 640, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 608, 14, 14], f16), T([4, 608, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 576, 14, 14], f16), T([4, 576, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 544, 14, 14], f16), T([4, 544, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 512, 14, 14], f16), T([4, 512, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 480, 14, 14], f16), T([4, 480, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 448, 14, 14], f16), T([4, 448, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 416, 14, 14], f16), T([4, 416, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 384, 14, 14], f16), T([4, 384, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 352, 14, 14], f16), T([4, 352, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 320, 14, 14], f16), T([4, 320, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 288, 14, 14], f16), T([4, 288, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 256, 14, 14], f16), T([4, 256, 14, 14], f16), 0), {}) +cnt: 1, ((T([4, 512, 28, 28], f16), T([4, 512, 28, 28], f16), 0), {}) +cnt: 13, ((T([4, 128, 28, 28], f16), T([4, 128, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 480, 28, 28], f16), T([4, 480, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 448, 28, 28], f16), T([4, 448, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 416, 28, 28], f16), T([4, 416, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 384, 28, 28], f16), T([4, 384, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 352, 28, 28], f16), T([4, 352, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 320, 28, 28], f16), T([4, 320, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 288, 28, 28], f16), T([4, 288, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 256, 28, 28], f16), T([4, 256, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 224, 28, 28], f16), T([4, 224, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 192, 28, 28], f16), T([4, 192, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 160, 28, 28], f16), T([4, 160, 28, 28], f16), 0), {}) +cnt: 1, ((T([4, 256, 56, 56], f16), T([4, 256, 56, 56], f16), 0), {}) +cnt: 7, ((T([4, 128, 56, 56], f16), T([4, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 224, 56, 56], f16), T([4, 224, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 192, 56, 56], f16), T([4, 192, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 160, 56, 56], f16), T([4, 160, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 96, 56, 56], f16), T([4, 96, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 64, 56, 56], f16), T([4, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([4, 64, 112, 112], f16), T([4, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fambench_dlrm_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fambench_dlrm_training.txt new file mode 100644 index 000000000..89e383e39 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fambench_dlrm_training.txt @@ -0,0 +1,1063 @@ +Operator: aten._embedding_bag.default +cnt: 2, ((T([965, 192], f16), T([54824], i64), T([1024], i64), False, 0, True, T([54824], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54798], i64), T([1024], i64), False, 0, True, T([54798], f16)), {}) +cnt: 5, ((T([965, 192], f16), T([54763], i64), T([1024], i64), False, 0, True, T([54763], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54783], i64), T([1024], i64), False, 0, True, T([54783], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54762], i64), T([1024], i64), False, 0, True, T([54762], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54862], i64), T([1024], i64), False, 0, True, T([54862], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54743], i64), T([1024], i64), False, 0, True, T([54743], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54750], i64), T([1024], i64), False, 0, True, T([54750], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54705], i64), T([1024], i64), False, 0, True, T([54705], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54735], i64), T([1024], i64), False, 0, True, T([54735], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54736], i64), T([1024], i64), False, 0, True, T([54736], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54775], i64), T([1024], i64), False, 0, True, T([54775], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54710], i64), T([1024], i64), False, 0, True, T([54710], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54753], i64), T([1024], i64), False, 0, True, T([54753], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54833], i64), T([1024], i64), False, 0, True, T([54833], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54767], i64), T([1024], i64), False, 0, True, T([54767], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54749], i64), T([1024], i64), False, 0, True, T([54749], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54795], i64), T([1024], i64), False, 0, True, T([54795], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54813], i64), T([1024], i64), False, 0, True, T([54813], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54730], i64), T([1024], i64), False, 0, True, T([54730], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54768], i64), T([1024], i64), False, 0, True, T([54768], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54826], i64), T([1024], i64), False, 0, True, T([54826], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54701], i64), T([1024], i64), False, 0, True, T([54701], f16)), {}) +cnt: 6, ((T([965, 192], f16), T([54761], i64), T([1024], i64), False, 0, True, T([54761], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54807], i64), T([1024], i64), False, 0, True, T([54807], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54744], i64), T([1024], i64), False, 0, True, T([54744], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54745], i64), T([1024], i64), False, 0, True, T([54745], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54723], i64), T([1024], i64), False, 0, True, T([54723], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54797], i64), T([1024], i64), False, 0, True, T([54797], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54786], i64), T([1024], i64), False, 0, True, T([54786], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54816], i64), T([1024], i64), False, 0, True, T([54816], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54725], i64), T([1024], i64), False, 0, True, T([54725], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54819], i64), T([1024], i64), False, 0, True, T([54819], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54855], i64), T([1024], i64), False, 0, True, T([54855], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54782], i64), T([1024], i64), False, 0, True, T([54782], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54712], i64), T([1024], i64), False, 0, True, T([54712], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54799], i64), T([1024], i64), False, 0, True, T([54799], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54801], i64), T([1024], i64), False, 0, True, T([54801], f16)), {}) +cnt: 5, ((T([965, 192], f16), T([54818], i64), T([1024], i64), False, 0, True, T([54818], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54779], i64), T([1024], i64), False, 0, True, T([54779], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54719], i64), T([1024], i64), False, 0, True, T([54719], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54778], i64), T([1024], i64), False, 0, True, T([54778], f16)), {}) +cnt: 6, ((T([965, 192], f16), T([54760], i64), T([1024], i64), False, 0, True, T([54760], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54802], i64), T([1024], i64), False, 0, True, T([54802], f16)), {}) +cnt: 5, ((T([965, 192], f16), T([54776], i64), T([1024], i64), False, 0, True, T([54776], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54828], i64), T([1024], i64), False, 0, True, T([54828], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54715], i64), T([1024], i64), False, 0, True, T([54715], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54843], i64), T([1024], i64), False, 0, True, T([54843], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54756], i64), T([1024], i64), False, 0, True, T([54756], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54766], i64), T([1024], i64), False, 0, True, T([54766], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54697], i64), T([1024], i64), False, 0, True, T([54697], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54792], i64), T([1024], i64), False, 0, True, T([54792], f16)), {}) +cnt: 5, ((T([965, 192], f16), T([54793], i64), T([1024], i64), False, 0, True, T([54793], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54727], i64), T([1024], i64), False, 0, True, T([54727], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54733], i64), T([1024], i64), False, 0, True, T([54733], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54692], i64), T([1024], i64), False, 0, True, T([54692], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54758], i64), T([1024], i64), False, 0, True, T([54758], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54820], i64), T([1024], i64), False, 0, True, T([54820], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54787], i64), T([1024], i64), False, 0, True, T([54787], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54815], i64), T([1024], i64), False, 0, True, T([54815], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54814], i64), T([1024], i64), False, 0, True, T([54814], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54759], i64), T([1024], i64), False, 0, True, T([54759], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54757], i64), T([1024], i64), False, 0, True, T([54757], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54821], i64), T([1024], i64), False, 0, True, T([54821], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54769], i64), T([1024], i64), False, 0, True, T([54769], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54842], i64), T([1024], i64), False, 0, True, T([54842], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54718], i64), T([1024], i64), False, 0, True, T([54718], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54771], i64), T([1024], i64), False, 0, True, T([54771], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54844], i64), T([1024], i64), False, 0, True, T([54844], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54838], i64), T([1024], i64), False, 0, True, T([54838], f16)), {}) +cnt: 5, ((T([965, 192], f16), T([54781], i64), T([1024], i64), False, 0, True, T([54781], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54804], i64), T([1024], i64), False, 0, True, T([54804], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54788], i64), T([1024], i64), False, 0, True, T([54788], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54774], i64), T([1024], i64), False, 0, True, T([54774], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54829], i64), T([1024], i64), False, 0, True, T([54829], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54738], i64), T([1024], i64), False, 0, True, T([54738], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54777], i64), T([1024], i64), False, 0, True, T([54777], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54811], i64), T([1024], i64), False, 0, True, T([54811], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54772], i64), T([1024], i64), False, 0, True, T([54772], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54800], i64), T([1024], i64), False, 0, True, T([54800], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54741], i64), T([1024], i64), False, 0, True, T([54741], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54794], i64), T([1024], i64), False, 0, True, T([54794], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54773], i64), T([1024], i64), False, 0, True, T([54773], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54803], i64), T([1024], i64), False, 0, True, T([54803], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54789], i64), T([1024], i64), False, 0, True, T([54789], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54707], i64), T([1024], i64), False, 0, True, T([54707], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54737], i64), T([1024], i64), False, 0, True, T([54737], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54722], i64), T([1024], i64), False, 0, True, T([54722], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54747], i64), T([1024], i64), False, 0, True, T([54747], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54770], i64), T([1024], i64), False, 0, True, T([54770], f16)), {}) +cnt: 4, ((T([965, 192], f16), T([54780], i64), T([1024], i64), False, 0, True, T([54780], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54731], i64), T([1024], i64), False, 0, True, T([54731], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54836], i64), T([1024], i64), False, 0, True, T([54836], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54839], i64), T([1024], i64), False, 0, True, T([54839], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54714], i64), T([1024], i64), False, 0, True, T([54714], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54785], i64), T([1024], i64), False, 0, True, T([54785], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54729], i64), T([1024], i64), False, 0, True, T([54729], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54812], i64), T([1024], i64), False, 0, True, T([54812], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54734], i64), T([1024], i64), False, 0, True, T([54734], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54791], i64), T([1024], i64), False, 0, True, T([54791], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54827], i64), T([1024], i64), False, 0, True, T([54827], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54717], i64), T([1024], i64), False, 0, True, T([54717], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54716], i64), T([1024], i64), False, 0, True, T([54716], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54830], i64), T([1024], i64), False, 0, True, T([54830], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54732], i64), T([1024], i64), False, 0, True, T([54732], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54835], i64), T([1024], i64), False, 0, True, T([54835], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54831], i64), T([1024], i64), False, 0, True, T([54831], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54748], i64), T([1024], i64), False, 0, True, T([54748], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54746], i64), T([1024], i64), False, 0, True, T([54746], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54711], i64), T([1024], i64), False, 0, True, T([54711], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54739], i64), T([1024], i64), False, 0, True, T([54739], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54713], i64), T([1024], i64), False, 0, True, T([54713], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54847], i64), T([1024], i64), False, 0, True, T([54847], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54809], i64), T([1024], i64), False, 0, True, T([54809], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54742], i64), T([1024], i64), False, 0, True, T([54742], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54704], i64), T([1024], i64), False, 0, True, T([54704], f16)), {}) +cnt: 3, ((T([965, 192], f16), T([54784], i64), T([1024], i64), False, 0, True, T([54784], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54796], i64), T([1024], i64), False, 0, True, T([54796], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54754], i64), T([1024], i64), False, 0, True, T([54754], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54751], i64), T([1024], i64), False, 0, True, T([54751], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54764], i64), T([1024], i64), False, 0, True, T([54764], f16)), {}) +cnt: 2, ((T([965, 192], f16), T([54687], i64), T([1024], i64), False, 0, True, T([54687], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54740], i64), T([1024], i64), False, 0, True, T([54740], f16)), {}) +cnt: 1, ((T([965, 192], f16), T([54765], i64), T([1024], i64), False, 0, True, T([54765], f16)), {}) +Operator: aten._embedding_bag_per_sample_weights_backward.default +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54765], i64), T([1024], i64), T([54765], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54704], i64), T([1024], i64), T([54704], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54786], i64), T([1024], i64), T([54786], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54804], i64), T([1024], i64), T([54804], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54757], i64), T([1024], i64), T([54757], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54746], i64), T([1024], i64), T([54746], i64), 0), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54781], i64), T([1024], i64), T([54781], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54687], i64), T([1024], i64), T([54687], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54738], i64), T([1024], i64), T([54738], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54784], i64), T([1024], i64), T([54784], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54787], i64), T([1024], i64), T([54787], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54768], i64), T([1024], i64), T([54768], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54697], i64), T([1024], i64), T([54697], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54833], i64), T([1024], i64), T([54833], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54809], i64), T([1024], i64), T([54809], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54713], i64), T([1024], i64), T([54713], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54814], i64), T([1024], i64), T([54814], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54802], i64), T([1024], i64), T([54802], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54789], i64), T([1024], i64), T([54789], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54743], i64), T([1024], i64), T([54743], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54731], i64), T([1024], i64), T([54731], i64), 0), {}) +cnt: 6, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54760], i64), T([1024], i64), T([54760], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54771], i64), T([1024], i64), T([54771], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54723], i64), T([1024], i64), T([54723], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54812], i64), T([1024], i64), T([54812], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54799], i64), T([1024], i64), T([54799], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54745], i64), T([1024], i64), T([54745], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54753], i64), T([1024], i64), T([54753], i64), 0), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54763], i64), T([1024], i64), T([54763], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54795], i64), T([1024], i64), T([54795], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54740], i64), T([1024], i64), T([54740], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54707], i64), T([1024], i64), T([54707], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54798], i64), T([1024], i64), T([54798], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54751], i64), T([1024], i64), T([54751], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54788], i64), T([1024], i64), T([54788], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54780], i64), T([1024], i64), T([54780], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54824], i64), T([1024], i64), T([54824], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54764], i64), T([1024], i64), T([54764], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54797], i64), T([1024], i64), T([54797], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54739], i64), T([1024], i64), T([54739], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54791], i64), T([1024], i64), T([54791], i64), 0), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54776], i64), T([1024], i64), T([54776], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54754], i64), T([1024], i64), T([54754], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54777], i64), T([1024], i64), T([54777], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54794], i64), T([1024], i64), T([54794], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54742], i64), T([1024], i64), T([54742], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54748], i64), T([1024], i64), T([54748], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54729], i64), T([1024], i64), T([54729], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54815], i64), T([1024], i64), T([54815], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54796], i64), T([1024], i64), T([54796], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54730], i64), T([1024], i64), T([54730], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54773], i64), T([1024], i64), T([54773], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54801], i64), T([1024], i64), T([54801], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54744], i64), T([1024], i64), T([54744], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54847], i64), T([1024], i64), T([54847], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54766], i64), T([1024], i64), T([54766], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54778], i64), T([1024], i64), T([54778], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54711], i64), T([1024], i64), T([54711], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54826], i64), T([1024], i64), T([54826], i64), 0), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54793], i64), T([1024], i64), T([54793], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54792], i64), T([1024], i64), T([54792], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54831], i64), T([1024], i64), T([54831], i64), 0), {}) +cnt: 6, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54761], i64), T([1024], i64), T([54761], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54835], i64), T([1024], i64), T([54835], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54732], i64), T([1024], i64), T([54732], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54830], i64), T([1024], i64), T([54830], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54775], i64), T([1024], i64), T([54775], i64), 0), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54719], i64), T([1024], i64), T([54719], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54722], i64), T([1024], i64), T([54722], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54716], i64), T([1024], i64), T([54716], i64), 0), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54818], i64), T([1024], i64), T([54818], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54783], i64), T([1024], i64), T([54783], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54717], i64), T([1024], i64), T([54717], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54827], i64), T([1024], i64), T([54827], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54734], i64), T([1024], i64), T([54734], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54779], i64), T([1024], i64), T([54779], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54785], i64), T([1024], i64), T([54785], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54714], i64), T([1024], i64), T([54714], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54772], i64), T([1024], i64), T([54772], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54839], i64), T([1024], i64), T([54839], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54836], i64), T([1024], i64), T([54836], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54774], i64), T([1024], i64), T([54774], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54803], i64), T([1024], i64), T([54803], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54770], i64), T([1024], i64), T([54770], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54747], i64), T([1024], i64), T([54747], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54737], i64), T([1024], i64), T([54737], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54741], i64), T([1024], i64), T([54741], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54800], i64), T([1024], i64), T([54800], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54811], i64), T([1024], i64), T([54811], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54758], i64), T([1024], i64), T([54758], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54829], i64), T([1024], i64), T([54829], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54838], i64), T([1024], i64), T([54838], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54759], i64), T([1024], i64), T([54759], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54733], i64), T([1024], i64), T([54733], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54844], i64), T([1024], i64), T([54844], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54718], i64), T([1024], i64), T([54718], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54842], i64), T([1024], i64), T([54842], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54769], i64), T([1024], i64), T([54769], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54821], i64), T([1024], i64), T([54821], i64), 0), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54782], i64), T([1024], i64), T([54782], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54710], i64), T([1024], i64), T([54710], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54820], i64), T([1024], i64), T([54820], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54692], i64), T([1024], i64), T([54692], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54727], i64), T([1024], i64), T([54727], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54767], i64), T([1024], i64), T([54767], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54819], i64), T([1024], i64), T([54819], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54756], i64), T([1024], i64), T([54756], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54843], i64), T([1024], i64), T([54843], i64), 0), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54735], i64), T([1024], i64), T([54735], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54715], i64), T([1024], i64), T([54715], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54828], i64), T([1024], i64), T([54828], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54712], i64), T([1024], i64), T([54712], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54855], i64), T([1024], i64), T([54855], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54725], i64), T([1024], i64), T([54725], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54816], i64), T([1024], i64), T([54816], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54807], i64), T([1024], i64), T([54807], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54701], i64), T([1024], i64), T([54701], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54813], i64), T([1024], i64), T([54813], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54749], i64), T([1024], i64), T([54749], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54736], i64), T([1024], i64), T([54736], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54705], i64), T([1024], i64), T([54705], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54750], i64), T([1024], i64), T([54750], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54862], i64), T([1024], i64), T([54862], i64), 0), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), T([965, 192], f16), T([54762], i64), T([1024], i64), T([54762], i64), 0), {}) +Operator: aten._sparse_coo_tensor_with_dims_and_tensors.default +cnt: 2, ((1, 1, [965, 192], T([1, 54765], i64), T([54765, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54704], i64), T([54704, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54786], i64), T([54786, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54804], i64), T([54804, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54757], i64), T([54757, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54746], i64), T([54746, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 10, ((1, 1, [965, 192], T([1, 54781], i64), T([54781, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54687], i64), T([54687, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54738], i64), T([54738, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54784], i64), T([54784, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54787], i64), T([54787, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54768], i64), T([54768, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54697], i64), T([54697, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54833], i64), T([54833, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54809], i64), T([54809, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54713], i64), T([54713, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54814], i64), T([54814, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54802], i64), T([54802, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54789], i64), T([54789, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54743], i64), T([54743, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54731], i64), T([54731, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 12, ((1, 1, [965, 192], T([1, 54760], i64), T([54760, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54771], i64), T([54771, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54723], i64), T([54723, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54812], i64), T([54812, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54799], i64), T([54799, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54745], i64), T([54745, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54753], i64), T([54753, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 10, ((1, 1, [965, 192], T([1, 54763], i64), T([54763, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54795], i64), T([54795, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54740], i64), T([54740, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54707], i64), T([54707, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54798], i64), T([54798, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54751], i64), T([54751, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54788], i64), T([54788, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54780], i64), T([54780, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54824], i64), T([54824, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54764], i64), T([54764, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54797], i64), T([54797, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54739], i64), T([54739, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54791], i64), T([54791, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 10, ((1, 1, [965, 192], T([1, 54776], i64), T([54776, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54754], i64), T([54754, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54777], i64), T([54777, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54794], i64), T([54794, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54742], i64), T([54742, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54748], i64), T([54748, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54729], i64), T([54729, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54815], i64), T([54815, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54796], i64), T([54796, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54730], i64), T([54730, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54773], i64), T([54773, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54801], i64), T([54801, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54744], i64), T([54744, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54847], i64), T([54847, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54766], i64), T([54766, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54778], i64), T([54778, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54711], i64), T([54711, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54826], i64), T([54826, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 10, ((1, 1, [965, 192], T([1, 54793], i64), T([54793, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54792], i64), T([54792, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54831], i64), T([54831, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 12, ((1, 1, [965, 192], T([1, 54761], i64), T([54761, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54835], i64), T([54835, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54732], i64), T([54732, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54830], i64), T([54830, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54775], i64), T([54775, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 8, ((1, 1, [965, 192], T([1, 54719], i64), T([54719, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54722], i64), T([54722, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54716], i64), T([54716, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 10, ((1, 1, [965, 192], T([1, 54818], i64), T([54818, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54783], i64), T([54783, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54717], i64), T([54717, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54827], i64), T([54827, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54734], i64), T([54734, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54779], i64), T([54779, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54785], i64), T([54785, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54714], i64), T([54714, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54772], i64), T([54772, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54839], i64), T([54839, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54836], i64), T([54836, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54774], i64), T([54774, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54803], i64), T([54803, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54770], i64), T([54770, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54747], i64), T([54747, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54737], i64), T([54737, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54741], i64), T([54741, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54800], i64), T([54800, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54811], i64), T([54811, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54758], i64), T([54758, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54829], i64), T([54829, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54838], i64), T([54838, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54759], i64), T([54759, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54733], i64), T([54733, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54844], i64), T([54844, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54718], i64), T([54718, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54842], i64), T([54842, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54769], i64), T([54769, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54821], i64), T([54821, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 6, ((1, 1, [965, 192], T([1, 54782], i64), T([54782, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54710], i64), T([54710, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54820], i64), T([54820, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54692], i64), T([54692, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54727], i64), T([54727, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54767], i64), T([54767, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54819], i64), T([54819, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54756], i64), T([54756, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54843], i64), T([54843, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 4, ((1, 1, [965, 192], T([1, 54735], i64), T([54735, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54715], i64), T([54715, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54828], i64), T([54828, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54712], i64), T([54712, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54855], i64), T([54855, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54725], i64), T([54725, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54816], i64), T([54816, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54807], i64), T([54807, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54701], i64), T([54701, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54813], i64), T([54813, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54749], i64), T([54749, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54736], i64), T([54736, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54705], i64), T([54705, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54750], i64), T([54750, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54862], i64), T([54862, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +cnt: 2, ((1, 1, [965, 192], T([1, 54762], i64), T([54762, 192], f16)), {'dtype': f16, 'layout': torch.sparse_coo, 'device': 'cuda', 'pin_memory': None}) +Operator: aten.add.Tensor +cnt: 1, ((T([1024, 249, 192], f16), T([1024, 249, 192], f16, stride=(47808, 1, 249))), {}) +cnt: 1, ((T([1024, 192], f16, stride=(31068, 1)), T([1024, 192], f16, stride=(47808, 1))), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1500], f16), T([1024, 2000], f16), T([2000, 1500], f16, stride=(1, 2000))), {}) +cnt: 2, ((T([1500], f16), T([1024, 1500], f16), T([1500, 1500], f16, stride=(1, 1500))), {}) +cnt: 1, ((T([192], f16), T([1024, 1500], f16), T([1500, 192], f16, stride=(1, 1500))), {}) +cnt: 1, ((T([4000], f16), T([1024, 31068], f16), T([31068, 4000], f16, stride=(1, 31068))), {}) +cnt: 8, ((T([4000], f16), T([1024, 4000], f16), T([4000, 4000], f16, stride=(1, 4000))), {}) +cnt: 1, ((T([1], f16), T([1024, 4000], f16), T([4000, 1], f16)), {}) +Operator: aten.bmm.default +cnt: 1, ((T([1024, 249, 192], f16), T([1024, 192, 249], f16, stride=(47808, 1, 192))), {}) +cnt: 1, ((T([1024, 192, 249], f16, stride=(47808, 1, 192)), T([1024, 249, 249], f16)), {}) +cnt: 1, ((T([1024, 249, 249], f16), T([1024, 249, 192], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16), T([1024, 192], f16)], 1), {}) +cnt: 1, (([T([1024, 192], f16), T([1024, 30876], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([1024, 2000], f16),), {}) +cnt: 1, ((T([248, 1024], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1024, 2000], f16), T([1024, 2000], f16)), {}) +cnt: 1, ((T([248, 1024], i64), T([248, 1024], i64)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 1024), {}) +Operator: aten.gather.default +cnt: 2, ((T([965], f16), 0, T([54824], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54798], i64)), {}) +cnt: 5, ((T([965], f16), 0, T([54763], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54783], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54762], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54862], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54743], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54750], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54705], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54735], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54736], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54775], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54710], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54753], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54833], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54767], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54749], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54795], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54813], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54730], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54768], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54826], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54701], i64)), {}) +cnt: 6, ((T([965], f16), 0, T([54761], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54807], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54744], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54745], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54723], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54797], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54786], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54816], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54725], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54819], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54855], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54782], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54712], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54799], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54801], i64)), {}) +cnt: 5, ((T([965], f16), 0, T([54818], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54779], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54719], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54778], i64)), {}) +cnt: 6, ((T([965], f16), 0, T([54760], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54802], i64)), {}) +cnt: 5, ((T([965], f16), 0, T([54776], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54828], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54715], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54843], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54756], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54766], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54697], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54792], i64)), {}) +cnt: 5, ((T([965], f16), 0, T([54793], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54727], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54733], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54692], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54758], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54820], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54787], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54815], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54814], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54759], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54757], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54821], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54769], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54842], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54718], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54771], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54844], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54838], i64)), {}) +cnt: 5, ((T([965], f16), 0, T([54781], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54804], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54788], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54774], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54829], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54738], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54777], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54811], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54772], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54800], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54741], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54794], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54773], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54803], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54789], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54707], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54737], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54722], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54747], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54770], i64)), {}) +cnt: 4, ((T([965], f16), 0, T([54780], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54731], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54836], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54839], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54714], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54785], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54729], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54812], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54734], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54791], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54827], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54717], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54716], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54830], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54732], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54835], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54831], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54748], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54746], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54711], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54739], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54713], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54847], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54809], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54742], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54704], i64)), {}) +cnt: 3, ((T([965], f16), 0, T([54784], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54796], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54754], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54751], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54764], i64)), {}) +cnt: 2, ((T([965], f16), 0, T([54687], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54740], i64)), {}) +cnt: 1, ((T([965], f16), 0, T([54765], i64)), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([1024, 249, 249], f16), [None, T([30876], i64), T([30876], i64)]), {}) +Operator: aten.index_put.default +cnt: 1, ((T([1024, 249, 249], f16), [None, T([30876], i64), T([30876], i64)], T([1024, 30876], f16, stride=(31068, 1)), True), {}) +Operator: aten.index_select.default +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54765], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54704], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54786], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54804], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54757], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54746], i64)), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54781], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54687], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54738], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54784], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54787], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54768], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54697], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54833], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54809], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54713], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54814], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54802], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54789], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54743], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54731], i64)), {}) +cnt: 6, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54760], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54771], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54723], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54812], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54799], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54745], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54753], i64)), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54763], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54795], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54740], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54707], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54798], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54751], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54788], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54780], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54824], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54764], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54797], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54739], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54791], i64)), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54776], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54754], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54777], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54794], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54742], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54748], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54729], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54815], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54796], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54730], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54773], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54801], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54744], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54847], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54766], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54778], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54711], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54826], i64)), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54793], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54792], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54831], i64)), {}) +cnt: 6, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54761], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54835], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54732], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54830], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54775], i64)), {}) +cnt: 4, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54719], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54722], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54716], i64)), {}) +cnt: 5, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54818], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54783], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54717], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54827], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54734], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54779], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54785], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54714], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54772], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54839], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54836], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54774], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54803], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54770], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54747], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54737], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54741], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54800], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54811], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54758], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54829], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54838], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54759], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54733], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54844], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54718], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54842], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54769], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54821], i64)), {}) +cnt: 3, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54782], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54710], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54820], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54692], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54727], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54767], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54819], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54756], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54843], i64)), {}) +cnt: 2, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54735], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54715], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54828], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54712], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54855], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54725], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54816], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54807], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54701], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54813], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54749], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54736], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54705], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54750], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54862], i64)), {}) +cnt: 1, ((T([1024, 192], f16, stride=(47808, 1)), 0, T([54762], i64)), {}) +Operator: aten.mm.default +cnt: 1, ((T([1024, 1], f16), T([1, 4000], f16)), {}) +cnt: 1, ((T([1, 1024], f16), T([1024, 4000], f16)), {}) +cnt: 8, ((T([1024, 4000], f16), T([4000, 4000], f16)), {}) +cnt: 8, ((T([4000, 1024], f16, stride=(1, 4000)), T([1024, 4000], f16)), {}) +cnt: 1, ((T([1024, 4000], f16), T([4000, 31068], f16)), {}) +cnt: 1, ((T([4000, 1024], f16, stride=(1, 4000)), T([1024, 31068], f16)), {}) +cnt: 1, ((T([1024, 192], f16), T([192, 1500], f16)), {}) +cnt: 1, ((T([192, 1024], f16, stride=(1, 192)), T([1024, 1500], f16)), {}) +cnt: 2, ((T([1024, 1500], f16), T([1500, 1500], f16)), {}) +cnt: 2, ((T([1500, 1024], f16, stride=(1, 1500)), T([1024, 1500], f16)), {}) +cnt: 1, ((T([1500, 1024], f16, stride=(1, 1500)), T([1024, 2000], f16)), {}) +Operator: aten.mul_.Tensor +cnt: 1, ((T([54765, 192], f16), T([54765, 1], f16)), {}) +cnt: 2, ((T([54704, 192], f16), T([54704, 1], f16)), {}) +cnt: 4, ((T([54786, 192], f16), T([54786, 1], f16)), {}) +cnt: 2, ((T([54804, 192], f16), T([54804, 1], f16)), {}) +cnt: 3, ((T([54757, 192], f16), T([54757, 1], f16)), {}) +cnt: 2, ((T([54746, 192], f16), T([54746, 1], f16)), {}) +cnt: 5, ((T([54781, 192], f16), T([54781, 1], f16)), {}) +cnt: 2, ((T([54687, 192], f16), T([54687, 1], f16)), {}) +cnt: 2, ((T([54738, 192], f16), T([54738, 1], f16)), {}) +cnt: 3, ((T([54784, 192], f16), T([54784, 1], f16)), {}) +cnt: 4, ((T([54787, 192], f16), T([54787, 1], f16)), {}) +cnt: 3, ((T([54768, 192], f16), T([54768, 1], f16)), {}) +cnt: 2, ((T([54697, 192], f16), T([54697, 1], f16)), {}) +cnt: 4, ((T([54833, 192], f16), T([54833, 1], f16)), {}) +cnt: 2, ((T([54809, 192], f16), T([54809, 1], f16)), {}) +cnt: 2, ((T([54713, 192], f16), T([54713, 1], f16)), {}) +cnt: 2, ((T([54814, 192], f16), T([54814, 1], f16)), {}) +cnt: 2, ((T([54802, 192], f16), T([54802, 1], f16)), {}) +cnt: 2, ((T([54789, 192], f16), T([54789, 1], f16)), {}) +cnt: 2, ((T([54743, 192], f16), T([54743, 1], f16)), {}) +cnt: 2, ((T([54731, 192], f16), T([54731, 1], f16)), {}) +cnt: 6, ((T([54760, 192], f16), T([54760, 1], f16)), {}) +cnt: 3, ((T([54771, 192], f16), T([54771, 1], f16)), {}) +cnt: 2, ((T([54723, 192], f16), T([54723, 1], f16)), {}) +cnt: 2, ((T([54812, 192], f16), T([54812, 1], f16)), {}) +cnt: 3, ((T([54799, 192], f16), T([54799, 1], f16)), {}) +cnt: 3, ((T([54745, 192], f16), T([54745, 1], f16)), {}) +cnt: 4, ((T([54753, 192], f16), T([54753, 1], f16)), {}) +cnt: 5, ((T([54763, 192], f16), T([54763, 1], f16)), {}) +cnt: 2, ((T([54795, 192], f16), T([54795, 1], f16)), {}) +cnt: 1, ((T([54740, 192], f16), T([54740, 1], f16)), {}) +cnt: 2, ((T([54707, 192], f16), T([54707, 1], f16)), {}) +cnt: 2, ((T([54798, 192], f16), T([54798, 1], f16)), {}) +cnt: 2, ((T([54751, 192], f16), T([54751, 1], f16)), {}) +cnt: 2, ((T([54788, 192], f16), T([54788, 1], f16)), {}) +cnt: 4, ((T([54780, 192], f16), T([54780, 1], f16)), {}) +cnt: 2, ((T([54824, 192], f16), T([54824, 1], f16)), {}) +cnt: 1, ((T([54764, 192], f16), T([54764, 1], f16)), {}) +cnt: 2, ((T([54797, 192], f16), T([54797, 1], f16)), {}) +cnt: 3, ((T([54739, 192], f16), T([54739, 1], f16)), {}) +cnt: 2, ((T([54791, 192], f16), T([54791, 1], f16)), {}) +cnt: 5, ((T([54776, 192], f16), T([54776, 1], f16)), {}) +cnt: 1, ((T([54754, 192], f16), T([54754, 1], f16)), {}) +cnt: 2, ((T([54777, 192], f16), T([54777, 1], f16)), {}) +cnt: 2, ((T([54794, 192], f16), T([54794, 1], f16)), {}) +cnt: 2, ((T([54742, 192], f16), T([54742, 1], f16)), {}) +cnt: 3, ((T([54748, 192], f16), T([54748, 1], f16)), {}) +cnt: 2, ((T([54729, 192], f16), T([54729, 1], f16)), {}) +cnt: 2, ((T([54815, 192], f16), T([54815, 1], f16)), {}) +cnt: 1, ((T([54796, 192], f16), T([54796, 1], f16)), {}) +cnt: 3, ((T([54730, 192], f16), T([54730, 1], f16)), {}) +cnt: 2, ((T([54773, 192], f16), T([54773, 1], f16)), {}) +cnt: 4, ((T([54801, 192], f16), T([54801, 1], f16)), {}) +cnt: 2, ((T([54744, 192], f16), T([54744, 1], f16)), {}) +cnt: 1, ((T([54847, 192], f16), T([54847, 1], f16)), {}) +cnt: 3, ((T([54766, 192], f16), T([54766, 1], f16)), {}) +cnt: 3, ((T([54778, 192], f16), T([54778, 1], f16)), {}) +cnt: 1, ((T([54711, 192], f16), T([54711, 1], f16)), {}) +cnt: 2, ((T([54826, 192], f16), T([54826, 1], f16)), {}) +cnt: 5, ((T([54793, 192], f16), T([54793, 1], f16)), {}) +cnt: 3, ((T([54792, 192], f16), T([54792, 1], f16)), {}) +cnt: 1, ((T([54831, 192], f16), T([54831, 1], f16)), {}) +cnt: 6, ((T([54761, 192], f16), T([54761, 1], f16)), {}) +cnt: 1, ((T([54835, 192], f16), T([54835, 1], f16)), {}) +cnt: 1, ((T([54732, 192], f16), T([54732, 1], f16)), {}) +cnt: 1, ((T([54830, 192], f16), T([54830, 1], f16)), {}) +cnt: 3, ((T([54775, 192], f16), T([54775, 1], f16)), {}) +cnt: 4, ((T([54719, 192], f16), T([54719, 1], f16)), {}) +cnt: 2, ((T([54722, 192], f16), T([54722, 1], f16)), {}) +cnt: 1, ((T([54716, 192], f16), T([54716, 1], f16)), {}) +cnt: 5, ((T([54818, 192], f16), T([54818, 1], f16)), {}) +cnt: 2, ((T([54783, 192], f16), T([54783, 1], f16)), {}) +cnt: 1, ((T([54717, 192], f16), T([54717, 1], f16)), {}) +cnt: 1, ((T([54827, 192], f16), T([54827, 1], f16)), {}) +cnt: 1, ((T([54734, 192], f16), T([54734, 1], f16)), {}) +cnt: 3, ((T([54779, 192], f16), T([54779, 1], f16)), {}) +cnt: 1, ((T([54785, 192], f16), T([54785, 1], f16)), {}) +cnt: 1, ((T([54714, 192], f16), T([54714, 1], f16)), {}) +cnt: 2, ((T([54772, 192], f16), T([54772, 1], f16)), {}) +cnt: 1, ((T([54839, 192], f16), T([54839, 1], f16)), {}) +cnt: 1, ((T([54836, 192], f16), T([54836, 1], f16)), {}) +cnt: 2, ((T([54774, 192], f16), T([54774, 1], f16)), {}) +cnt: 2, ((T([54803, 192], f16), T([54803, 1], f16)), {}) +cnt: 1, ((T([54770, 192], f16), T([54770, 1], f16)), {}) +cnt: 1, ((T([54747, 192], f16), T([54747, 1], f16)), {}) +cnt: 1, ((T([54737, 192], f16), T([54737, 1], f16)), {}) +cnt: 1, ((T([54741, 192], f16), T([54741, 1], f16)), {}) +cnt: 1, ((T([54800, 192], f16), T([54800, 1], f16)), {}) +cnt: 1, ((T([54811, 192], f16), T([54811, 1], f16)), {}) +cnt: 2, ((T([54758, 192], f16), T([54758, 1], f16)), {}) +cnt: 1, ((T([54829, 192], f16), T([54829, 1], f16)), {}) +cnt: 1, ((T([54838, 192], f16), T([54838, 1], f16)), {}) +cnt: 2, ((T([54759, 192], f16), T([54759, 1], f16)), {}) +cnt: 2, ((T([54733, 192], f16), T([54733, 1], f16)), {}) +cnt: 1, ((T([54844, 192], f16), T([54844, 1], f16)), {}) +cnt: 1, ((T([54718, 192], f16), T([54718, 1], f16)), {}) +cnt: 1, ((T([54842, 192], f16), T([54842, 1], f16)), {}) +cnt: 1, ((T([54769, 192], f16), T([54769, 1], f16)), {}) +cnt: 1, ((T([54821, 192], f16), T([54821, 1], f16)), {}) +cnt: 3, ((T([54782, 192], f16), T([54782, 1], f16)), {}) +cnt: 2, ((T([54710, 192], f16), T([54710, 1], f16)), {}) +cnt: 1, ((T([54820, 192], f16), T([54820, 1], f16)), {}) +cnt: 1, ((T([54692, 192], f16), T([54692, 1], f16)), {}) +cnt: 1, ((T([54727, 192], f16), T([54727, 1], f16)), {}) +cnt: 2, ((T([54767, 192], f16), T([54767, 1], f16)), {}) +cnt: 2, ((T([54819, 192], f16), T([54819, 1], f16)), {}) +cnt: 1, ((T([54756, 192], f16), T([54756, 1], f16)), {}) +cnt: 1, ((T([54843, 192], f16), T([54843, 1], f16)), {}) +cnt: 2, ((T([54735, 192], f16), T([54735, 1], f16)), {}) +cnt: 1, ((T([54715, 192], f16), T([54715, 1], f16)), {}) +cnt: 1, ((T([54828, 192], f16), T([54828, 1], f16)), {}) +cnt: 1, ((T([54712, 192], f16), T([54712, 1], f16)), {}) +cnt: 1, ((T([54855, 192], f16), T([54855, 1], f16)), {}) +cnt: 1, ((T([54725, 192], f16), T([54725, 1], f16)), {}) +cnt: 1, ((T([54816, 192], f16), T([54816, 1], f16)), {}) +cnt: 1, ((T([54807, 192], f16), T([54807, 1], f16)), {}) +cnt: 1, ((T([54701, 192], f16), T([54701, 1], f16)), {}) +cnt: 1, ((T([54813, 192], f16), T([54813, 1], f16)), {}) +cnt: 1, ((T([54749, 192], f16), T([54749, 1], f16)), {}) +cnt: 1, ((T([54736, 192], f16), T([54736, 1], f16)), {}) +cnt: 1, ((T([54705, 192], f16), T([54705, 1], f16)), {}) +cnt: 1, ((T([54750, 192], f16), T([54750, 1], f16)), {}) +cnt: 1, ((T([54862, 192], f16), T([54862, 1], f16)), {}) +cnt: 1, ((T([54762, 192], f16), T([54762, 1], f16)), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([1024, 30876], f16, stride=(31068, 1)), [1024, 249, 249]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([54765], f16), [965]), {}) +cnt: 2, ((T([54704], f16), [965]), {}) +cnt: 4, ((T([54786], f16), [965]), {}) +cnt: 2, ((T([54804], f16), [965]), {}) +cnt: 3, ((T([54757], f16), [965]), {}) +cnt: 2, ((T([54746], f16), [965]), {}) +cnt: 5, ((T([54781], f16), [965]), {}) +cnt: 2, ((T([54687], f16), [965]), {}) +cnt: 2, ((T([54738], f16), [965]), {}) +cnt: 3, ((T([54784], f16), [965]), {}) +cnt: 4, ((T([54787], f16), [965]), {}) +cnt: 3, ((T([54768], f16), [965]), {}) +cnt: 2, ((T([54697], f16), [965]), {}) +cnt: 4, ((T([54833], f16), [965]), {}) +cnt: 2, ((T([54809], f16), [965]), {}) +cnt: 2, ((T([54713], f16), [965]), {}) +cnt: 2, ((T([54814], f16), [965]), {}) +cnt: 2, ((T([54802], f16), [965]), {}) +cnt: 2, ((T([54789], f16), [965]), {}) +cnt: 2, ((T([54743], f16), [965]), {}) +cnt: 2, ((T([54731], f16), [965]), {}) +cnt: 6, ((T([54760], f16), [965]), {}) +cnt: 3, ((T([54771], f16), [965]), {}) +cnt: 2, ((T([54723], f16), [965]), {}) +cnt: 2, ((T([54812], f16), [965]), {}) +cnt: 3, ((T([54799], f16), [965]), {}) +cnt: 3, ((T([54745], f16), [965]), {}) +cnt: 4, ((T([54753], f16), [965]), {}) +cnt: 5, ((T([54763], f16), [965]), {}) +cnt: 2, ((T([54795], f16), [965]), {}) +cnt: 1, ((T([54740], f16), [965]), {}) +cnt: 2, ((T([54707], f16), [965]), {}) +cnt: 2, ((T([54798], f16), [965]), {}) +cnt: 2, ((T([54751], f16), [965]), {}) +cnt: 2, ((T([54788], f16), [965]), {}) +cnt: 4, ((T([54780], f16), [965]), {}) +cnt: 2, ((T([54824], f16), [965]), {}) +cnt: 1, ((T([54764], f16), [965]), {}) +cnt: 2, ((T([54797], f16), [965]), {}) +cnt: 3, ((T([54739], f16), [965]), {}) +cnt: 2, ((T([54791], f16), [965]), {}) +cnt: 5, ((T([54776], f16), [965]), {}) +cnt: 1, ((T([54754], f16), [965]), {}) +cnt: 2, ((T([54777], f16), [965]), {}) +cnt: 2, ((T([54794], f16), [965]), {}) +cnt: 2, ((T([54742], f16), [965]), {}) +cnt: 3, ((T([54748], f16), [965]), {}) +cnt: 2, ((T([54729], f16), [965]), {}) +cnt: 2, ((T([54815], f16), [965]), {}) +cnt: 1, ((T([54796], f16), [965]), {}) +cnt: 3, ((T([54730], f16), [965]), {}) +cnt: 2, ((T([54773], f16), [965]), {}) +cnt: 4, ((T([54801], f16), [965]), {}) +cnt: 2, ((T([54744], f16), [965]), {}) +cnt: 1, ((T([54847], f16), [965]), {}) +cnt: 3, ((T([54766], f16), [965]), {}) +cnt: 3, ((T([54778], f16), [965]), {}) +cnt: 1, ((T([54711], f16), [965]), {}) +cnt: 2, ((T([54826], f16), [965]), {}) +cnt: 5, ((T([54793], f16), [965]), {}) +cnt: 3, ((T([54792], f16), [965]), {}) +cnt: 1, ((T([54831], f16), [965]), {}) +cnt: 6, ((T([54761], f16), [965]), {}) +cnt: 1, ((T([54835], f16), [965]), {}) +cnt: 1, ((T([54732], f16), [965]), {}) +cnt: 1, ((T([54830], f16), [965]), {}) +cnt: 3, ((T([54775], f16), [965]), {}) +cnt: 4, ((T([54719], f16), [965]), {}) +cnt: 2, ((T([54722], f16), [965]), {}) +cnt: 1, ((T([54716], f16), [965]), {}) +cnt: 5, ((T([54818], f16), [965]), {}) +cnt: 2, ((T([54783], f16), [965]), {}) +cnt: 1, ((T([54717], f16), [965]), {}) +cnt: 1, ((T([54827], f16), [965]), {}) +cnt: 1, ((T([54734], f16), [965]), {}) +cnt: 3, ((T([54779], f16), [965]), {}) +cnt: 1, ((T([54785], f16), [965]), {}) +cnt: 1, ((T([54714], f16), [965]), {}) +cnt: 2, ((T([54772], f16), [965]), {}) +cnt: 1, ((T([54839], f16), [965]), {}) +cnt: 1, ((T([54836], f16), [965]), {}) +cnt: 2, ((T([54774], f16), [965]), {}) +cnt: 2, ((T([54803], f16), [965]), {}) +cnt: 1, ((T([54770], f16), [965]), {}) +cnt: 1, ((T([54747], f16), [965]), {}) +cnt: 1, ((T([54737], f16), [965]), {}) +cnt: 1, ((T([54741], f16), [965]), {}) +cnt: 1, ((T([54800], f16), [965]), {}) +cnt: 1, ((T([54811], f16), [965]), {}) +cnt: 2, ((T([54758], f16), [965]), {}) +cnt: 1, ((T([54829], f16), [965]), {}) +cnt: 1, ((T([54838], f16), [965]), {}) +cnt: 2, ((T([54759], f16), [965]), {}) +cnt: 2, ((T([54733], f16), [965]), {}) +cnt: 1, ((T([54844], f16), [965]), {}) +cnt: 1, ((T([54718], f16), [965]), {}) +cnt: 1, ((T([54842], f16), [965]), {}) +cnt: 1, ((T([54769], f16), [965]), {}) +cnt: 1, ((T([54821], f16), [965]), {}) +cnt: 3, ((T([54782], f16), [965]), {}) +cnt: 2, ((T([54710], f16), [965]), {}) +cnt: 1, ((T([54820], f16), [965]), {}) +cnt: 1, ((T([54692], f16), [965]), {}) +cnt: 1, ((T([54727], f16), [965]), {}) +cnt: 2, ((T([54767], f16), [965]), {}) +cnt: 2, ((T([54819], f16), [965]), {}) +cnt: 1, ((T([54756], f16), [965]), {}) +cnt: 1, ((T([54843], f16), [965]), {}) +cnt: 2, ((T([54735], f16), [965]), {}) +cnt: 1, ((T([54715], f16), [965]), {}) +cnt: 1, ((T([54828], f16), [965]), {}) +cnt: 1, ((T([54712], f16), [965]), {}) +cnt: 1, ((T([54855], f16), [965]), {}) +cnt: 1, ((T([54725], f16), [965]), {}) +cnt: 1, ((T([54816], f16), [965]), {}) +cnt: 1, ((T([54807], f16), [965]), {}) +cnt: 1, ((T([54701], f16), [965]), {}) +cnt: 1, ((T([54813], f16), [965]), {}) +cnt: 1, ((T([54749], f16), [965]), {}) +cnt: 1, ((T([54736], f16), [965]), {}) +cnt: 1, ((T([54705], f16), [965]), {}) +cnt: 1, ((T([54750], f16), [965]), {}) +cnt: 1, ((T([54862], f16), [965]), {}) +cnt: 1, ((T([54762], f16), [965]), {}) +Operator: aten.relu.default +cnt: 3, ((T([1024, 1500], f16),), {}) +cnt: 1, ((T([1024, 192], f16),), {}) +cnt: 9, ((T([1024, 4000], f16),), {}) +Operator: aten.scatter_add.default +cnt: 1, ((T([965], f16), 0, T([54765], i64), T([54765], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54704], i64), T([54704], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54786], i64), T([54786], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54804], i64), T([54804], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54757], i64), T([54757], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54746], i64), T([54746], f16)), {}) +cnt: 5, ((T([965], f16), 0, T([54781], i64), T([54781], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54687], i64), T([54687], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54738], i64), T([54738], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54784], i64), T([54784], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54787], i64), T([54787], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54768], i64), T([54768], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54697], i64), T([54697], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54833], i64), T([54833], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54809], i64), T([54809], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54713], i64), T([54713], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54814], i64), T([54814], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54802], i64), T([54802], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54789], i64), T([54789], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54743], i64), T([54743], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54731], i64), T([54731], f16)), {}) +cnt: 6, ((T([965], f16), 0, T([54760], i64), T([54760], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54771], i64), T([54771], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54723], i64), T([54723], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54812], i64), T([54812], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54799], i64), T([54799], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54745], i64), T([54745], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54753], i64), T([54753], f16)), {}) +cnt: 5, ((T([965], f16), 0, T([54763], i64), T([54763], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54795], i64), T([54795], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54740], i64), T([54740], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54707], i64), T([54707], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54798], i64), T([54798], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54751], i64), T([54751], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54788], i64), T([54788], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54780], i64), T([54780], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54824], i64), T([54824], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54764], i64), T([54764], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54797], i64), T([54797], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54739], i64), T([54739], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54791], i64), T([54791], f16)), {}) +cnt: 5, ((T([965], f16), 0, T([54776], i64), T([54776], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54754], i64), T([54754], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54777], i64), T([54777], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54794], i64), T([54794], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54742], i64), T([54742], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54748], i64), T([54748], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54729], i64), T([54729], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54815], i64), T([54815], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54796], i64), T([54796], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54730], i64), T([54730], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54773], i64), T([54773], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54801], i64), T([54801], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54744], i64), T([54744], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54847], i64), T([54847], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54766], i64), T([54766], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54778], i64), T([54778], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54711], i64), T([54711], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54826], i64), T([54826], f16)), {}) +cnt: 5, ((T([965], f16), 0, T([54793], i64), T([54793], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54792], i64), T([54792], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54831], i64), T([54831], f16)), {}) +cnt: 6, ((T([965], f16), 0, T([54761], i64), T([54761], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54835], i64), T([54835], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54732], i64), T([54732], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54830], i64), T([54830], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54775], i64), T([54775], f16)), {}) +cnt: 4, ((T([965], f16), 0, T([54719], i64), T([54719], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54722], i64), T([54722], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54716], i64), T([54716], f16)), {}) +cnt: 5, ((T([965], f16), 0, T([54818], i64), T([54818], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54783], i64), T([54783], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54717], i64), T([54717], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54827], i64), T([54827], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54734], i64), T([54734], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54779], i64), T([54779], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54785], i64), T([54785], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54714], i64), T([54714], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54772], i64), T([54772], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54839], i64), T([54839], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54836], i64), T([54836], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54774], i64), T([54774], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54803], i64), T([54803], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54770], i64), T([54770], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54747], i64), T([54747], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54737], i64), T([54737], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54741], i64), T([54741], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54800], i64), T([54800], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54811], i64), T([54811], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54758], i64), T([54758], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54829], i64), T([54829], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54838], i64), T([54838], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54759], i64), T([54759], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54733], i64), T([54733], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54844], i64), T([54844], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54718], i64), T([54718], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54842], i64), T([54842], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54769], i64), T([54769], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54821], i64), T([54821], f16)), {}) +cnt: 3, ((T([965], f16), 0, T([54782], i64), T([54782], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54710], i64), T([54710], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54820], i64), T([54820], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54692], i64), T([54692], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54727], i64), T([54727], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54767], i64), T([54767], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54819], i64), T([54819], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54756], i64), T([54756], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54843], i64), T([54843], f16)), {}) +cnt: 2, ((T([965], f16), 0, T([54735], i64), T([54735], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54715], i64), T([54715], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54828], i64), T([54828], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54712], i64), T([54712], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54855], i64), T([54855], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54725], i64), T([54725], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54816], i64), T([54816], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54807], i64), T([54807], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54701], i64), T([54701], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54813], i64), T([54813], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54749], i64), T([54749], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54736], i64), T([54736], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54705], i64), T([54705], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54750], i64), T([54750], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54862], i64), T([54862], f16)), {}) +cnt: 1, ((T([965], f16), 0, T([54762], i64), T([54762], f16)), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([1024, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([1024, 1], f16, stride=(0, 0)), T([1024, 1], f16)), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([1024, 249, 249], f16), [1024, 249, 249], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1024, 1], f16), [0], True), {}) +cnt: 9, ((T([1024, 4000], f16), [0], True), {}) +cnt: 1, ((T([1024, 192], f16), [0], True), {}) +cnt: 3, ((T([1024, 1500], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([1024, 1], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 9, ((T([1024, 4000], f16), T([1024, 4000], f16), 0), {}) +cnt: 1, ((T([1024, 192], f16), T([1024, 192], f16), 0), {}) +cnt: 3, ((T([1024, 1500], f16), T([1024, 1500], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fastNLP_Bert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fastNLP_Bert_training.txt new file mode 100644 index 000000000..14639db6d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/fastNLP_Bert_training.txt @@ -0,0 +1,157 @@ +Operator: aten._index_put_impl_.default +cnt: 1, ((T([6, 474, 768], f16), [T([6, 474], i64, stride=(1, 0)), T([6, 474], i64, stride=(475, 1))], T([6, 474, 768], f16), True, True), {}) +Operator: aten._softmax.default +cnt: 12, ((T([6, 12, 476, 476], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([6, 12, 476, 476], f16), T([6, 12, 476, 476], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([6, 474], i64),), {'dtype': i64, 'layout': torch.strided, 'device': "torch.device('cpu')"}) +cnt: 1, ((T([6], i64),), {'dtype': i64, 'device': 'cuda'}) +cnt: 1, ((T([6, 476], b8),), {'dtype': i64}) +cnt: 1, ((T([6, 1, 1, 476], i64),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([6, 12, 476, 64], f16), [72, 476, 64]), {}) +cnt: 12, ((T([6, 12, 64, 476], f16), [72, 64, 476]), {}) +cnt: 12, ((T([72, 476, 476], f16), [6, 12, 476, 476]), {}) +cnt: 12, ((T([72, 476, 64], f16), [6, 12, 476, 64]), {}) +cnt: 24, ((T([6, 476, 12, 64], f16), [6, 476, 768]), {}) +cnt: 12, ((T([6, 476, 768], f16), [2856, 768]), {}) +Operator: aten.add.Tensor +cnt: 6, ((T([], i64), 1), {}) +cnt: 6, ((T([], i64), 2), {}) +cnt: 1, ((T([6], i64), 1), {}) +cnt: 74, ((T([6, 476, 768], f16), T([6, 476, 768], f16)), {}) +cnt: 12, ((T([6, 12, 476, 476], f16), T([6, 1, 1, 476], f16)), {}) +cnt: 12, ((T([6, 476, 3072], f16), 1.0), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 1, ((T([], f16), T([], f16)), {}) +cnt: 1, ((T([6, 474, 2], f16), T([6, 474, 2], f16)), {}) +cnt: 12, ((T([6, 476, 3072], f16), T([6, 476, 3072], f16)), {}) +Operator: aten.addmm.default +cnt: 48, ((T([768], f16), T([2856, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2856, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2856, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([6, 768], f16, stride=(365568, 1)), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2], f16), T([2844, 768], f16), T([768, 2], f16, stride=(1, 768))), {}) +Operator: aten.bitwise_xor.Tensor +cnt: 1, ((T([6, 1], i64, stride=(476, 1)), T([6, 476], i64)), {}) +Operator: aten.bmm.default +cnt: 12, ((T([72, 476, 64], f16), T([72, 64, 476], f16)), {}) +cnt: 12, ((T([72, 476, 476], f16), T([72, 476, 64], f16)), {}) +cnt: 12, ((T([72, 476, 476], f16, stride=(226576, 1, 476)), T([72, 476, 64], f16)), {}) +cnt: 12, ((T([72, 476, 64], f16), T([72, 64, 476], f16, stride=(30464, 1, 64))), {}) +cnt: 12, ((T([72, 64, 476], f16, stride=(30464, 1, 64)), T([72, 476, 476], f16)), {}) +cnt: 12, ((T([72, 476, 476], f16), T([72, 476, 64], f16, stride=(30464, 1, 476))), {}) +Operator: aten.cat.default +cnt: 1, (([T([6, 474, 768], f16)], -1), {}) +Operator: aten.clone.default +cnt: 1, ((T([6, 474], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([6, 474], i64), T([6, 474], i64)), {}) +cnt: 6, ((T([474], i64), T([474], i64)), {}) +cnt: 1, ((T([6, 474], i64, stride=(475, 1)), T([6, 474], i64)), {}) +cnt: 1, ((T([6, 474, 768], f16), T([6, 474, 768], f16)), {}) +cnt: 1, ((T([1, 6, 474, 768], f16), T([1, 6, 474, 768], f16)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([6, 476], i64), -1), {}) +cnt: 1, ((T([6, 474], i64), -1), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([6, 12, 476, 476], f16), 8.0), {}) +cnt: 24, ((T([6, 476, 3072], f16), 1.4142135623730951), {}) +cnt: 4, ((T([], f16), 2844), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([21128, 768], f16), T([6, 476], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([6, 476], i64, stride=(0, 1))), {}) +cnt: 1, ((T([2, 768], f16), T([6, 476], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([6, 476, 768], f16), T([6, 476], i64), 2, -1, False), {}) +cnt: 1, ((T([6, 476, 768], f16), T([6, 476], i64, stride=(0, 1)), 512, -1, False), {}) +cnt: 1, ((T([6, 476, 768], f16), T([6, 476], i64), 21128, 0, False), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([6, 474], b8), False), {}) +cnt: 1, ((T([6, 476], i64), 511), {}) +cnt: 1, ((T([6, 474, 1], b8), False), {}) +Operator: aten.erf.default +cnt: 12, ((T([6, 476, 3072], f16),), {}) +Operator: aten.exp.default +cnt: 12, ((T([6, 476, 3072], f16),), {}) +Operator: aten.fill_.Scalar +cnt: 6, ((T([476], i64), 1), {}) +cnt: 1, ((T([6], i64, stride=(476,)), 2057), {}) +Operator: aten.flip.default +cnt: 2, ((T([6, 476], i64), [-1]), {}) +Operator: aten.fmod.Scalar +cnt: 1, ((T([6, 476], i64), 2), {}) +Operator: aten.ge.Scalar +cnt: 1, ((T([6, 474], i64, stride=(475, 1)), 474), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([2869], i64), [T([6, 474], i64)]), {}) +cnt: 1, ((T([6, 474, 768], f16, stride=(365568, 768, 1)), [T([6, 474], i64, stride=(1, 0)), T([6, 474], i64, stride=(475, 1))]), {}) +Operator: aten.index_put_.default +cnt: 1, ((T([6, 476], i64), [T([6], i64), T([6], i64)], T([], i64)), {}) +Operator: aten.masked_fill.Scalar +cnt: 1, ((T([6, 474], i64), T([6, 474], b8), 0), {}) +cnt: 2, ((T([6, 474, 768], f16), T([6, 474, 1], b8), 0), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([6, 474], i64, stride=(475, 1)), T([6, 474], b8), 0), {}) +Operator: aten.max.default +cnt: 2, ((T([6], i64),), {}) +Operator: aten.mm.default +cnt: 1, ((T([2844, 2], f16), T([2, 768], f16)), {}) +cnt: 1, ((T([2, 2844], f16, stride=(1, 2)), T([2844, 768], f16)), {}) +cnt: 12, ((T([2856, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2856], f16, stride=(1, 768)), T([2856, 3072], f16)), {}) +cnt: 12, ((T([2856, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2856], f16, stride=(1, 3072)), T([2856, 768], f16)), {}) +cnt: 48, ((T([2856, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 2856], f16, stride=(1, 768)), T([2856, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 12, ((T([6, 476, 3072], f16), 1.1283791670955126), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([6, 1, 1, 476], f16), -10000.0), {}) +cnt: 24, ((T([6, 476, 3072], f16), 0.5), {}) +cnt: 48, ((T([6, 476, 3072], f16), T([6, 476, 3072], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([6, 476, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([6, 476, 768], f16), T([6, 476, 768], f16), [768], T([6, 476, 1], f32), T([6, 476, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([6, 474], i64), 0), {}) +Operator: aten.neg.default +cnt: 12, ((T([6, 476, 3072], f16),), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([1, 6, 474, 768], f16), [1, 6, 474, 768], [2184192, 364032, 768, 1]), {}) +Operator: aten.new_full.default +cnt: 1, ((T([6, 474], i64), [6, 476], 2457), {'dtype': i64, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 1, ((T([6, 476, 768], f16), [1, 6, 474, 768]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([6, 474], i64), [6, 475]), {'dtype': i64, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([6, 474, 768], f16), [6, 474, 768]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([6, 476, 3072], f16), 2), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([6, 1, 1, 476], f16), 1.0), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([6, 474], f16, stride=(0, 0)), [6, 474, 2], 2, 1), {}) +cnt: 1, ((T([6, 474], f16, stride=(0, 0)), [6, 474, 2], 2, 0), {}) +Operator: aten.slice_backward.default +cnt: 2, ((T([6, 474, 2], f16), [6, 474, 2], 1, 0, 9223372036854775807, 1), {}) +cnt: 2, ((T([6, 474, 2], f16), [6, 474, 2], 0, 0, 9223372036854775807, 1), {}) +cnt: 1, ((T([6, 474, 768], f16), [6, 476, 768], 1, 1, -1, 1), {}) +cnt: 1, ((T([6, 476, 768], f16), [6, 476, 768], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.stack.default +cnt: 1, (([T([6, 474, 768], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2844, 2], f16), [0], True), {}) +cnt: 60, ((T([2856, 768], f16), [0], True), {}) +cnt: 12, ((T([2856, 3072], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 2, ((T([6, 474], f16, stride=(948, 2)),), {}) +Operator: aten.sum.dim_IntList +cnt: 1, ((T([6, 474], b8), [-1]), {}) +cnt: 2, ((T([6, 474], i64), [-1]), {}) +Operator: aten.tanh.default +cnt: 1, ((T([6, 768], f16),), {}) +Operator: aten.unbind.int +cnt: 1, ((T([1, 6, 474, 768], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Albert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Albert_training.txt new file mode 100644 index 000000000..9dc41c8ff --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Albert_training.txt @@ -0,0 +1,110 @@ +Operator: aten._softmax.default +cnt: 12, ((T([8, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([8, 12, 512, 512], f16), T([8, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([8, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([8, 12, 512, 64], f16), [96, 512, 64]), {}) +cnt: 12, ((T([8, 12, 64, 512], f16), [96, 64, 512]), {}) +cnt: 12, ((T([96, 512, 512], f16), [8, 12, 512, 512]), {}) +cnt: 12, ((T([96, 512, 64], f16), [8, 12, 512, 64]), {}) +cnt: 36, ((T([8, 512, 12, 64], f16), [8, 512, 768]), {}) +cnt: 12, ((T([8, 512, 768], f16), [4096, 768]), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([8, 512, 128], f16), T([8, 512, 128], f16)), {}) +cnt: 12, ((T([8, 12, 512, 512], f16), T([8, 1, 1, 512], f16)), {}) +cnt: 72, ((T([8, 512, 768], f16), T([8, 512, 768], f16)), {}) +cnt: 36, ((T([8, 512, 3072], f16), T([8, 512, 3072], f16)), {}) +cnt: 12, ((T([8, 512, 3072], f16), 1.0), {}) +cnt: 1, ((T([8, 512, 128], f16), 1.0), {}) +cnt: 99, ((T([768], f16), T([768], f16)), {}) +cnt: 11, ((T([768, 3072], f16), T([768, 3072], f16)), {}) +cnt: 11, ((T([3072], f16), T([3072], f16)), {}) +cnt: 11, ((T([3072, 768], f16), T([3072, 768], f16)), {}) +cnt: 44, ((T([768, 768], f16), T([768, 768], f16)), {}) +cnt: 1, ((T([30000, 128], f16), T([30000, 128], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([8, 512, 128], f16), T([1, 512, 128], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([768], f16), T([4096, 128], f16), T([128, 768], f16, stride=(1, 128))), {}) +cnt: 48, ((T([768], f16), T([4096, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([4096, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([4096, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([128], f16), T([4096, 768], f16), T([768, 128], f16, stride=(1, 768))), {}) +cnt: 1, ((T([30000], f16), T([4096, 128], f16), T([128, 30000], f16, stride=(1, 128))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([96, 512, 64], f16), T([96, 64, 512], f16)), {}) +cnt: 12, ((T([96, 512, 512], f16), T([96, 512, 64], f16)), {}) +cnt: 12, ((T([96, 512, 512], f16, stride=(262144, 1, 512)), T([96, 512, 64], f16)), {}) +cnt: 12, ((T([96, 512, 64], f16), T([96, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([96, 64, 512], f16, stride=(32768, 1, 64)), T([96, 512, 512], f16)), {}) +cnt: 12, ((T([96, 512, 512], f16), T([96, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 512], i64), T([8, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([8, 12, 512, 512], f16), 8.0), {}) +cnt: 2, ((T([], f16), 122880000), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30000, 128], f16), T([8, 512], i64), 0), {}) +cnt: 1, ((T([2, 128], f16), T([8, 512], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 128], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 128], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([8, 512, 128], f16), T([8, 512], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([8, 512, 128], f16), T([8, 512], i64), 30000, 0, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 30000], f16, stride=(0, 0)), T([30000, 128], f16)), {}) +cnt: 1, ((T([30000, 4096], f16, stride=(0, 0)), T([4096, 128], f16)), {}) +cnt: 1, ((T([4096, 128], f16), T([128, 768], f16)), {}) +cnt: 1, ((T([128, 4096], f16, stride=(1, 128)), T([4096, 768], f16)), {}) +cnt: 12, ((T([4096, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 3072], f16)), {}) +cnt: 12, ((T([4096, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 4096], f16, stride=(1, 3072)), T([4096, 768], f16)), {}) +cnt: 48, ((T([4096, 768], f16), T([768, 768], f16)), {}) +cnt: 48, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 768], f16)), {}) +cnt: 1, ((T([4096, 768], f16), T([768, 128], f16)), {}) +cnt: 1, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 128], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([8, 512, 128], f16), 3.0), {}) +cnt: 12, ((T([8, 512, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([8, 1, 1, 512], f16), -65504.0), {}) +cnt: 24, ((T([8, 512, 3072], f16), 0.5), {}) +cnt: 24, ((T([8, 512, 3072], f16), 0.044715), {}) +cnt: 24, ((T([8, 512, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([8, 512, 3072], f16), T([8, 512, 3072], f16)), {}) +cnt: 2, ((T([8, 512, 128], f16), 0.5), {}) +cnt: 2, ((T([8, 512, 128], f16), 0.044715), {}) +cnt: 2, ((T([8, 512, 128], f16), 0.7978845608028654), {}) +cnt: 4, ((T([8, 512, 128], f16), T([8, 512, 128], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 2, ((T([8, 512, 128], f16), [128], T([128], f16), T([128], f16), 1e-12), {}) +cnt: 24, ((T([8, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 2, ((T([8, 512, 128], f16), T([8, 512, 128], f16), [128], T([8, 512, 1], f32), T([8, 512, 1], f32), T([128], f16), T([128], f16), [True, True, True]), {}) +cnt: 24, ((T([8, 512, 768], f16), T([8, 512, 768], f16), [768], T([8, 512, 1], f32), T([8, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([8, 512, 3072], f16), 3.0), {}) +cnt: 1, ((T([8, 512, 128], f16), 3.0), {}) +cnt: 1, ((T([8, 512, 128], f16), 2.0), {}) +cnt: 12, ((T([8, 512, 3072], f16), 2.0), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([8, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([4096, 30000], f16, stride=(0, 0)), [0], True), {}) +cnt: 1, ((T([4096, 128], f16), [0], True), {}) +cnt: 61, ((T([4096, 768], f16), [0], True), {}) +cnt: 12, ((T([4096, 3072], f16), [0], True), {}) +cnt: 1, ((T([8, 512, 128], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([8, 512, 30000], f16),), {}) +Operator: aten.tanh.default +cnt: 12, ((T([8, 512, 3072], f16),), {}) +cnt: 1, ((T([8, 512, 128], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([8, 512, 128], f16), T([8, 512, 128], f16)), {}) +cnt: 12, ((T([8, 512, 3072], f16), T([8, 512, 3072], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bart_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bart_training.txt new file mode 100644 index 000000000..96ff5f455 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bart_training.txt @@ -0,0 +1,76 @@ +Operator: aten._softmax.default +cnt: 18, ((T([48, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 18, ((T([48, 512, 512], f16), T([48, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([512, 512], f32),), {'dtype': f16}) +cnt: 1, ((T([4, 1, 512, 512], f16, stride=(0, 262144, 512, 1)),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 54, ((T([4, 512, 12, 64], f16), [4, 512, 768]), {}) +cnt: 1, ((T([2048, 50265], f16), [4, 512, 50265]), {}) +cnt: 18, ((T([4, 12, 512, 64], f16), [48, 512, 64]), {}) +cnt: 18, ((T([4, 512, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([4, 512], i64, stride=(0, 1)), 2), {}) +cnt: 97, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 1, ((T([512], i64), 1), {}) +cnt: 6, ((T([4, 12, 512, 512], f16), T([4, 1, 512, 512], f16)), {}) +cnt: 1, ((T([4, 512, 50265], f16), T([1, 50265], f16)), {}) +cnt: 2, ((T([50265, 768], f16), T([50265, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 72, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +Operator: aten.any.default +cnt: 12, ((T([4, 512, 768], b8),), {}) +Operator: aten.bmm.default +cnt: 36, ((T([48, 512, 64], f16), T([48, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 36, ((T([48, 512, 512], f16), T([48, 512, 64], f16)), {}) +cnt: 18, ((T([48, 512, 512], f16, stride=(262144, 1, 512)), T([48, 512, 64], f16)), {}) +cnt: 18, ((T([48, 64, 512], f16, stride=(32768, 1, 64)), T([48, 512, 512], f16)), {}) +Operator: aten.clone.default +cnt: 2, ((T([4, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 2, ((T([4, 512], i64), T([4, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 102942720), {}) +Operator: aten.embedding.default +cnt: 2, ((T([50265, 768], f16), T([4, 512], i64), 1), {}) +cnt: 2, ((T([1026, 768], f16), T([4, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 2, ((T([4, 512, 768], f16), T([4, 512], i64), 1026, -1, False), {}) +cnt: 2, ((T([4, 512, 768], f16), T([4, 512], i64), 50265, 1, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 512, 3072], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.isinf.default +cnt: 6, ((T([4, 512, 768], f16),), {}) +Operator: aten.isnan.default +cnt: 6, ((T([4, 512, 768], f16),), {}) +Operator: aten.lt.Tensor +cnt: 1, ((T([512], i64), T([512, 1], i64)), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([512, 512], f32), T([512, 512], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 768], f16), T([768, 50265], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50265, 2048], f16, stride=(0, 0)), T([2048, 768], f16)), {}) +cnt: 1, ((T([2048, 50265], f16, stride=(0, 0)), T([50265, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 72, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 72, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([4, 512, 768], f16), 1.0), {}) +cnt: 36, ((T([4, 512, 768], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 32, ((T([4, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 32, ((T([4, 512, 768], f16), T([4, 512, 768], f16), [768], T([4, 512, 1], f32), T([4, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.sum.SymInt +cnt: 84, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([4, 512, 50265], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bert_training.txt new file mode 100644 index 000000000..59a786f12 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Bert_training.txt @@ -0,0 +1,76 @@ +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([4, 1, 1, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 12, 512, 64], f16), [48, 512, 64]), {}) +cnt: 12, ((T([4, 12, 64, 512], f16), [48, 64, 512]), {}) +cnt: 12, ((T([48, 512, 512], f16), [4, 12, 512, 512]), {}) +cnt: 12, ((T([48, 512, 64], f16), [4, 12, 512, 64]), {}) +cnt: 24, ((T([4, 512, 12, 64], f16), [4, 512, 768]), {}) +cnt: 12, ((T([4, 512, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 73, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 1, 1, 512], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([4, 512, 768], f16), T([1, 512, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([2048, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16, stride=(262144, 1, 512)), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([48, 64, 512], f16, stride=(32768, 1, 64)), T([48, 512, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([4, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([4, 512], i64), T([4, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([4, 12, 512, 512], f16), 8.0), {}) +cnt: 2, ((T([], f16), 62509056), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([4, 512], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([4, 512], i64, stride=(0, 1))), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([4, 512, 768], f16), T([4, 512], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([4, 512, 768], f16), T([4, 512], i64), 30522, 0, False), {}) +Operator: aten.gelu.default +cnt: 12, ((T([4, 512, 3072], f16),), {}) +cnt: 1, ((T([4, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 12, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 30522], f16, stride=(0, 0)), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 2048], f16, stride=(0, 0)), T([2048, 768], f16)), {}) +cnt: 49, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([4, 1, 1, 512], f16), -65504.0), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([4, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([4, 512, 768], f16), T([4, 512, 768], f16), [768], T([4, 512, 1], f32), T([4, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([4, 1, 1, 512], f16), 1.0), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 30522], f16, stride=(0, 0)), [0], True), {}) +cnt: 61, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 1, ((T([4, 512, 768], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([4, 512, 30522], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_BigBird_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_BigBird_training.txt new file mode 100644 index 000000000..924d9eb84 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_BigBird_training.txt @@ -0,0 +1,235 @@ +Operator: aten._softmax.default +cnt: 24, ((T([2, 12, 64, 1024], f16), -1, False), {}) +cnt: 24, ((T([2, 12, 64, 448], f16), -1, False), {}) +cnt: 12, ((T([2, 12, 12, 64, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 24, ((T([2, 12, 64, 1024], f16), T([2, 12, 64, 1024], f16), -1, f16), {}) +cnt: 24, ((T([2, 12, 64, 448], f16), T([2, 12, 64, 448], f16), -1, f16), {}) +cnt: 12, ((T([2, 12, 12, 64, 512], f16), T([2, 12, 12, 64, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 12, ((T([2, 1, 12, 64, 192], f32),), {'dtype': f16}) +cnt: 12, ((T([2, 1, 1024, 1], f32),), {'dtype': f16}) +cnt: 12, ((T([2, 1, 1, 1024], f32),), {'dtype': f16}) +cnt: 12, ((T([12, 14, 3], i32),), {'dtype': i64, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 24, ((T([2, 12, 16, 64, 64], f16), [384, 64, 64]), {}) +cnt: 96, ((T([2, 12, 64, 64], f16), [24, 64, 64]), {}) +cnt: 48, ((T([2, 12, 1024, 64], f16), [24, 1024, 64]), {}) +cnt: 24, ((T([2, 12, 12, 64, 64], f16), [288, 64, 64]), {}) +cnt: 24, ((T([2, 12, 12, 192, 64], f16), [288, 192, 64]), {}) +cnt: 24, ((T([2, 12, 12, 64, 64, 1], f16), [24, 768, 64]), {}) +cnt: 48, ((T([2, 12, 64, 64, 1, 1], f16), [24, 64, 64]), {}) +cnt: 24, ((T([2, 1024, 12, 64], f16), [2, 1024, 768]), {}) +cnt: 12, ((T([2, 1024, 768], f16), [2048, 768]), {}) +Operator: aten.add.Tensor +cnt: 76, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16)), {}) +cnt: 24, ((T([1008], i64), T([1008], i64)), {}) +cnt: 36, ((T([2, 1024, 3072], f16), T([2, 1024, 3072], f16)), {}) +cnt: 12, ((T([2, 1024, 3072], f16), 1.0), {}) +cnt: 1, ((T([2, 1024, 768], f16), 1.0), {}) +cnt: 360, ((T([2, 12, 16, 64, 64], f16), T([2, 12, 16, 64, 64], f16)), {}) +cnt: 36, ((T([2, 12, 12, 64, 512], f16), T([2, 12, 12, 64, 512], f16)), {}) +cnt: 48, ((T([2, 12, 14, 192, 64], f16), T([2, 12, 14, 192, 64], f16)), {}) +cnt: 36, ((T([2, 12, 12, 64, 64], f16), T([2, 12, 12, 64, 64], f16)), {}) +cnt: 24, ((T([2, 12, 1024, 64], f16), T([2, 12, 1024, 64], f16)), {}) +cnt: 12, ((T([2, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024)), T([2, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024))), {}) +cnt: 12, ((T([2, 12, 1024, 64], f16, stride=(786432, 65536, 1, 1024)), T([2, 12, 1024, 64], f16)), {}) +cnt: 1, ((T([50358, 768], f16), T([50358, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([2, 1024, 768], f16), T([1, 1024, 768], f16)), {}) +cnt: 24, ((T([2, 12, 64, 1024], f16), T([2, 1, 1, 1024], f16)), {}) +cnt: 24, ((T([2, 12, 64, 448], f16), T([2, 12, 64, 448], f32)), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f16), T([2, 1, 12, 64, 192], f16)), {}) +cnt: 24, ((T([2, 12, 12, 64, 64], f16), T([2, 1, 1, 1, 64], f16)), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f16), T([2, 12, 12, 64, 192], f32)), {}) +cnt: 36, ((T([2, 12, 12, 64, 64], f16), T([2, 12, 12, 64, 64], f16)), {}) +Operator: aten.addmm.default +cnt: 49, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([2, 768], f16, stride=(786432, 1)), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50358], f16), T([2048, 768], f16), T([768, 50358], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 48, ((T([24, 64, 64], f16), T([24, 64, 1024], f16, stride=(65536, 1, 64))), {}) +cnt: 48, ((T([24, 64, 1024], f16), T([24, 1024, 64], f16)), {}) +cnt: 48, ((T([24, 64, 64], f16), T([24, 64, 448], f16, stride=(28672, 1, 64))), {}) +cnt: 48, ((T([24, 64, 448], f16), T([24, 448, 64], f16)), {}) +cnt: 48, ((T([288, 64, 64], f16), T([288, 64, 192], f16, stride=(12288, 1, 64))), {}) +cnt: 24, ((T([24, 768, 64], f16), T([24, 64, 64], f16)), {}) +cnt: 24, ((T([288, 64, 192], f16, stride=(32768, 512, 1)), T([288, 192, 64], f16)), {}) +cnt: 24, ((T([24, 768, 64], f16, stride=(393216, 512, 1)), T([24, 64, 64], f16)), {}) +cnt: 24, ((T([24, 1024, 64], f16, stride=(65536, 1, 1024)), T([24, 64, 64], f16)), {}) +cnt: 24, ((T([24, 64, 64], f16, stride=(4096, 1, 64)), T([24, 64, 1024], f16)), {}) +cnt: 24, ((T([24, 448, 64], f16, stride=(28672, 1, 448)), T([24, 64, 64], f16)), {}) +cnt: 24, ((T([24, 64, 64], f16, stride=(4096, 1, 64)), T([24, 64, 448], f16)), {}) +cnt: 24, ((T([24, 64, 768], f16, stride=(393216, 1, 512)), T([24, 768, 64], f16)), {}) +cnt: 48, ((T([24, 768, 64], f16), T([24, 64, 64], f16, stride=(4096, 1, 64))), {}) +cnt: 24, ((T([288, 192, 64], f16, stride=(32768, 1, 512)), T([288, 64, 64], f16)), {}) +cnt: 24, ((T([24, 64, 768], f16, stride=(49152, 1, 64)), T([24, 768, 64], f16)), {}) +cnt: 24, ((T([288, 64, 64], f16, stride=(4096, 1, 64)), T([288, 64, 192], f16)), {}) +cnt: 24, ((T([288, 64, 192], f16), T([288, 192, 64], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([2, 12, 64], f32, stride=(1024, 64, 1)), T([2, 12, 64], f32, stride=(1024, 64, 1)), T([2, 12, 64], f32, stride=(1024, 64, 1))], 2), {}) +cnt: 12, (([T([1, 12, 14, 3], i64), T([1, 12, 14, 3], i64)],), {}) +cnt: 48, (([T([2, 12, 64, 64], f16, stride=(786432, 64, 768, 1)), T([2, 12, 64, 64], f16, stride=(786432, 64, 768, 1)), T([2, 12, 64, 64], f16, stride=(786432, 64, 768, 1)), T([2, 12, 64, 64], f16, stride=(786432, 64, 768, 1)), T([2, 12, 192, 64], f16, stride=(2064384, 172032, 64, 1))], 2), {}) +cnt: 12, (([T([2, 1, 1, 192], f16, stride=(1024, 1024, 1024, 1)), T([2, 1, 1, 64], f16, stride=(1024, 1024, 1024, 1)), T([2, 1, 1, 192], f16)], 3), {}) +cnt: 24, (([T([2, 12, 64, 256], f32), T([2, 12, 64, 192], f32, stride=(2064384, 172032, 192, 1))], 3), {}) +cnt: 24, (([T([2, 12, 12, 64, 64], f16, stride=(786432, 64, 49152, 768, 1)), T([2, 12, 12, 64, 64], f16, stride=(786432, 64, 49152, 768, 1)), T([2, 12, 12, 64, 64], f16, stride=(786432, 64, 49152, 768, 1))], 3), {}) +cnt: 12, (([T([2, 12, 12, 64, 64], f16), T([2, 12, 12, 64, 192], f16), T([2, 12, 12, 64, 192], f16), T([2, 12, 12, 64, 64], f16)], -1), {}) +cnt: 12, (([T([2, 1, 1, 64], f16, stride=(1024, 1024, 1024, 1)), T([2, 1, 1, 192], f16, stride=(1024, 1024, 1024, 1)), T([2, 1, 1, 192], f16)], 3), {}) +cnt: 12, (([T([2, 12, 1, 64, 64], f16), T([2, 12, 1, 64, 64], f16), T([2, 12, 12, 64, 64], f16), T([2, 12, 1, 64, 64], f16), T([2, 12, 1, 64, 64], f16)], 2), {}) +Operator: aten.clone.default +cnt: 1, ((T([2, 1024], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([2, 1024], i64), T([2, 1024], i64)), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16), T([2, 12, 12, 64, 64], f16, stride=(786432, 64, 49152, 768, 1))), {}) +cnt: 36, ((T([288, 64, 64], f16), T([288, 64, 64], f16)), {}) +cnt: 36, ((T([2, 12, 12, 64, 64], f16), T([2, 12, 12, 64, 64], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 103133184), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50358, 768], f16), T([2, 1024], i64), 0), {}) +cnt: 1, ((T([2, 768], f16), T([2, 1024], i64, stride=(0, 1))), {}) +cnt: 1, ((T([4096, 768], f16), T([1, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 1024, 768], f16), T([1, 1024], i64), 4096, -1, False), {}) +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024], i64, stride=(0, 1)), 2, -1, False), {}) +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024], i64), 50358, 0, False), {}) +Operator: aten.floor_divide.default +cnt: 24, ((T([1008], i64), 42), {}) +Operator: aten.index.Tensor +cnt: 24, ((T([16, 64], f32), [T([504], i64)]), {}) +Operator: aten.index_add.default +cnt: 24, ((T([384, 64, 64], f16), 0, T([1008], i64), T([1008, 64, 64], f16)), {}) +Operator: aten.index_select.default +cnt: 24, ((T([384, 64, 64], f16), 0, T([1008], i64)), {}) +Operator: aten.minimum.default +cnt: 24, ((T([2, 1, 1, 448], f16), T([2, 12, 64, 448], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 50358], f16, stride=(0, 0)), T([50358, 768], f16)), {}) +cnt: 1, ((T([50358, 2048], f16, stride=(0, 0)), T([2048, 768], f16)), {}) +cnt: 49, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +Operator: aten.mul.Scalar +cnt: 1, ((T([2, 1024, 768], f16), 3.0), {}) +cnt: 12, ((T([2, 1024, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([2, 12, 64, 1], f32, stride=(1024, 64, 1, 1)), T([2, 12, 1, 192], f32)), {}) +cnt: 12, ((T([2, 1, 14, 64, 1], f32, stride=(1024, 1, 64, 1, 1)), T([2, 12, 14, 1, 192], f32)), {}) +cnt: 24, ((T([1008], i64), 16), {}) +cnt: 48, ((T([2, 12, 64, 1024], f16), 0.125), {}) +cnt: 24, ((T([2, 1, 1, 1024], f16), -10000.0), {}) +cnt: 48, ((T([2, 12, 64, 448], f16), 0.125), {}) +cnt: 24, ((T([2, 12, 64, 448], f32), -10000.0), {}) +cnt: 24, ((T([2, 12, 12, 64, 192], f16), 0.125), {}) +cnt: 24, ((T([2, 12, 12, 64, 64], f16), 0.125), {}) +cnt: 12, ((T([2, 1, 12, 64, 192], f16), -10000.0), {}) +cnt: 24, ((T([2, 1, 1, 1, 64], f16), -10000.0), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f32), -10000.0), {}) +cnt: 12, ((T([2, 12, 1024, 64], f16), T([2, 1, 1024, 1], f16)), {}) +cnt: 24, ((T([2, 1024, 3072], f16), 0.5), {}) +cnt: 24, ((T([2, 1024, 3072], f16), 0.044715), {}) +cnt: 24, ((T([2, 1024, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([2, 1024, 3072], f16), T([2, 1024, 3072], f16)), {}) +cnt: 2, ((T([2, 1024, 768], f16), 0.5), {}) +cnt: 2, ((T([2, 1024, 768], f16), 0.044715), {}) +cnt: 2, ((T([2, 1024, 768], f16), 0.7978845608028654), {}) +cnt: 4, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16)), {}) +cnt: 12, ((T([2, 12, 1024, 64], f16, stride=(786432, 64, 768, 1)), T([2, 1, 1024, 1], f16)), {}) +cnt: 24, ((T([2, 12, 12, 64, 64], f16, stride=(4718592, 393216, 32768, 512, 1)), 0.125), {}) +cnt: 24, ((T([2, 12, 12, 64, 192], f16, stride=(4718592, 393216, 32768, 512, 1)), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([2, 1024, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16), [768], T([2, 1024, 1], f32), T([2, 1024, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 36, ((T([288, 64, 64], f16), [288, 64, 64], [4096, 64, 1]), {}) +Operator: aten.new_ones.default +cnt: 24, ((T([2, 1, 1, 1024], f16), [2, 1, 1, 192]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 24, ((T([2, 12, 14, 64, 192], f32), [2, 12, 64, 256]), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(786432, 64, 49152, 768, 1)), [1179648]), {}) +cnt: 24, ((T([1008, 64, 64], f16), [384, 64, 64]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([2, 1024, 3072], f16), 3.0), {}) +cnt: 1, ((T([2, 1024, 768], f16), 3.0), {}) +cnt: 1, ((T([2, 1024, 768], f16), 2.0), {}) +cnt: 12, ((T([2, 1024, 3072], f16), 2.0), {}) +Operator: aten.rsub.Scalar +cnt: 24, ((T([2, 1, 1, 1024], f16), 1.0), {}) +cnt: 24, ((T([2, 12, 64, 448], f32), 1.0), {}) +cnt: 12, ((T([2, 1, 12, 64, 192], f16), 1.0), {}) +cnt: 24, ((T([2, 1, 1, 1, 64], f16, stride=(1024, 1024, 1024, 64, 1)), 1.0), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f32, stride=(2064384, 172032, 12288, 192, 1)), 1.0), {}) +Operator: aten.select_backward.default +cnt: 24, ((T([2, 12, 64, 64], f16), [2, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([2, 12, 64, 64], f16), [2, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([2, 12, 192, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 14, 192, 64], 2, -1), {}) +cnt: 24, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, -3), {}) +cnt: 24, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([2, 12, 192, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 14, 192, 64], 2, -1), {}) +cnt: 24, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, -2), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, -3), {}) +cnt: 24, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, 0), {}) +cnt: 24, ((T([2, 12, 64, 64], f16), [2, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(49152, 4096, 1, 64)), [2, 12, 16, 64, 64], 2, -1), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(49152, 4096, 1, 64)), [2, 12, 16, 64, 64], 2, 0), {}) +cnt: 12, ((T([2, 12, 64, 64], f16), [2, 12, 16, 64, 64], 2, 1), {}) +cnt: 12, ((T([2, 12, 192, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 14, 192, 64], 2, 0), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, 2), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 64, 1)), [2, 12, 16, 64, 64], 2, 1), {}) +cnt: 12, ((T([2, 12, 192, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 14, 192, 64], 2, 0), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, 2), {}) +cnt: 12, ((T([2, 12, 64, 64], f16, stride=(344064, 28672, 1, 448)), [2, 12, 16, 64, 64], 2, 1), {}) +Operator: aten.slice_backward.default +cnt: 372, ((T([2, 12, 16, 64, 64], f16), [2, 12, 16, 64, 64], 1, 0, 9223372036854775807, 1), {}) +cnt: 372, ((T([2, 12, 16, 64, 64], f16), [2, 12, 16, 64, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 72, ((T([2, 12, 14, 192, 64], f16), [2, 12, 14, 192, 64], 1, 0, 9223372036854775807, 1), {}) +cnt: 72, ((T([2, 12, 14, 192, 64], f16), [2, 12, 14, 192, 64], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16), [2, 12, 12, 64, 512], 4, -64, 9223372036854775807, 1), {}) +cnt: 48, ((T([2, 12, 12, 64, 512], f16), [2, 12, 12, 64, 512], 3, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([2, 12, 12, 64, 512], f16), [2, 12, 12, 64, 512], 2, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([2, 12, 12, 64, 512], f16), [2, 12, 12, 64, 512], 1, 0, 9223372036854775807, 1), {}) +cnt: 48, ((T([2, 12, 12, 64, 512], f16), [2, 12, 12, 64, 512], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16), [2, 12, 12, 64, 512], 4, 0, 64, 1), {}) +cnt: 12, ((T([2, 12, 12, 192, 64], f16), [2, 12, 14, 192, 64], 2, 1, -1, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f16), [2, 12, 12, 64, 512], 4, 256, -64, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 192], f16), [2, 12, 12, 64, 512], 4, 64, 256, 1), {}) +cnt: 12, ((T([2, 12, 12, 192, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [2, 12, 14, 192, 64], 2, 1, -1, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16), [2, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [2, 12, 16, 64, 64], 2, 3, -1, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [2, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 64, 1)), [2, 12, 16, 64, 64], 2, 1, -3, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [2, 12, 16, 64, 64], 2, 3, -1, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [2, 12, 16, 64, 64], 2, 2, -2, 1), {}) +cnt: 12, ((T([2, 12, 12, 64, 64], f16, stride=(1769472, 147456, 12288, 1, 192)), [2, 12, 16, 64, 64], 2, 1, -3, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([504, 64], f32), T([504, 64], f32)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 50358], f16, stride=(0, 0)), [0], True), {}) +cnt: 61, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 1, ((T([2, 1024, 768], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([2, 1024, 50358], f16),), {}) +Operator: aten.tanh.default +cnt: 12, ((T([2, 1024, 3072], f16),), {}) +cnt: 1, ((T([2, 768], f16),), {}) +cnt: 1, ((T([2, 1024, 768], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16)), {}) +cnt: 12, ((T([2, 1024, 3072], f16), T([2, 1024, 3072], f16)), {}) +Operator: aten.unbind.int +cnt: 12, ((T([2, 16, 64], f32),), {}) +cnt: 12, ((T([2, 12, 14, 3], i64),), {}) +Operator: aten.unsqueeze_.default +cnt: 1, ((T([2, 12, 64, 192], f32), 1), {}) +cnt: 12, ((T([12, 14, 3], i64), 0), {}) +cnt: 48, ((T([2, 12, 64, 64], f16), 2), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_DistilBert_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_DistilBert_training.txt new file mode 100644 index 000000000..225446dad --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_DistilBert_training.txt @@ -0,0 +1,73 @@ +Operator: aten._softmax.default +cnt: 6, ((T([8, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([8, 12, 512, 512], f16), T([8, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 18, ((T([8, 12, 512, 64], f16), [96, 512, 64]), {}) +cnt: 6, ((T([8, 12, 64, 512], f16), [96, 64, 512]), {}) +cnt: 6, ((T([96, 512, 512], f16), [8, 12, 512, 512]), {}) +cnt: 6, ((T([96, 512, 64], f16), [8, 12, 512, 64]), {}) +cnt: 12, ((T([8, 512, 12, 64], f16), [8, 512, 768]), {}) +cnt: 6, ((T([8, 512, 768], f16), [4096, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([8, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 36, ((T([8, 512, 768], f16), T([8, 512, 768], f16)), {}) +cnt: 1, ((T([30522, 768], f16), T([30522, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 25, ((T([768], f16), T([4096, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 6, ((T([3072], f16), T([4096, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 6, ((T([768], f16), T([4096, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([30522], f16), T([4096, 768], f16), T([768, 30522], f16, stride=(1, 768))), {}) +Operator: aten.bmm.default +cnt: 6, ((T([96, 512, 64], f16), T([96, 64, 512], f16)), {}) +cnt: 6, ((T([96, 512, 512], f16), T([96, 512, 64], f16)), {}) +cnt: 6, ((T([96, 512, 512], f16, stride=(262144, 1, 512)), T([96, 512, 64], f16)), {}) +cnt: 6, ((T([96, 512, 64], f16), T([96, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 6, ((T([96, 64, 512], f16, stride=(32768, 1, 64)), T([96, 512, 512], f16)), {}) +cnt: 6, ((T([96, 512, 512], f16), T([96, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 512], i64), T([8, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 6, ((T([8, 12, 512, 64], f16, stride=(393216, 64, 768, 1)), 8.0), {}) +cnt: 2, ((T([], f16), 125018112), {}) +cnt: 6, ((T([8, 12, 512, 64], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([30522, 768], f16), T([8, 512], i64), 0), {}) +cnt: 1, ((T([512, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 512, -1, False), {}) +cnt: 1, ((T([8, 512, 768], f16), T([8, 512], i64), 30522, 0, False), {}) +Operator: aten.eq.Scalar +cnt: 6, ((T([8, 512], f32), 0), {}) +Operator: aten.gelu.default +cnt: 6, ((T([8, 512, 3072], f16),), {}) +cnt: 1, ((T([8, 512, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([8, 512, 768], f16), T([8, 512, 768], f16)), {}) +cnt: 6, ((T([8, 512, 3072], f16), T([8, 512, 3072], f16)), {}) +Operator: aten.masked_fill.Scalar +cnt: 6, ((T([8, 12, 512, 512], f16), T([8, 12, 512, 512], b8, stride=(512, 0, 0, 1)), 0), {}) +Operator: aten.masked_fill.Tensor +cnt: 6, ((T([8, 12, 512, 512], f16), T([8, 12, 512, 512], b8, stride=(512, 0, 0, 1)), T([], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([4096, 30522], f16, stride=(0, 0)), T([30522, 768], f16)), {}) +cnt: 1, ((T([30522, 4096], f16, stride=(0, 0)), T([4096, 768], f16)), {}) +cnt: 25, ((T([4096, 768], f16), T([768, 768], f16)), {}) +cnt: 25, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 768], f16)), {}) +cnt: 6, ((T([4096, 768], f16), T([768, 3072], f16)), {}) +cnt: 6, ((T([768, 4096], f16, stride=(1, 768)), T([4096, 3072], f16)), {}) +cnt: 6, ((T([4096, 3072], f16), T([3072, 768], f16)), {}) +cnt: 6, ((T([3072, 4096], f16, stride=(1, 3072)), T([4096, 768], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 14, ((T([8, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-12), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 14, ((T([8, 512, 768], f16), T([8, 512, 768], f16), [768], T([8, 512, 1], f32), T([8, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([4096, 30522], f16, stride=(0, 0)), [0], True), {}) +cnt: 31, ((T([4096, 768], f16), [0], True), {}) +cnt: 6, ((T([4096, 3072], f16), [0], True), {}) +cnt: 1, ((T([8, 512, 768], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([8, 512, 30522], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_GPT2_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_GPT2_training.txt new file mode 100644 index 000000000..7a2ca611a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_GPT2_training.txt @@ -0,0 +1,88 @@ +Operator: aten._softmax.default +cnt: 12, ((T([4, 12, 512, 512], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([4, 12, 512, 512], f16), T([4, 12, 512, 512], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 12, ((T([1, 1, 512, 512], u8, stride=(1048576, 1048576, 1024, 1)),), {'dtype': torch.bool}) +cnt: 12, ((T([], f16),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([4, 12, 512, 64], f16), [48, 512, 64]), {}) +cnt: 12, ((T([4, 12, 64, 512], f16), [48, 64, 512]), {}) +cnt: 12, ((T([48, 512, 512], f16), [4, 12, 512, 512]), {}) +cnt: 12, ((T([48, 512, 64], f16), [4, 12, 512, 64]), {}) +cnt: 1, ((T([2048, 50257], f16), [4, 512, 50257]), {}) +cnt: 24, ((T([4, 512, 12, 64], f16), [4, 512, 768]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([4, 512, 768], f16), T([1, 512, 768], f16)), {}) +cnt: 48, ((T([4, 512, 768], f16), T([4, 512, 768], f16)), {}) +cnt: 36, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +cnt: 12, ((T([4, 512, 3072], f16), 1.0), {}) +cnt: 1, ((T([50257, 768], f16), T([50257, 768], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([2304], f16), T([2048, 768], f16), T([768, 2304], f16)), {}) +cnt: 12, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16)), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16, stride=(262144, 1, 512)), T([48, 512, 64], f16)), {}) +cnt: 12, ((T([48, 512, 64], f16), T([48, 64, 512], f16, stride=(32768, 1, 64))), {}) +cnt: 12, ((T([48, 64, 512], f16, stride=(32768, 1, 64)), T([48, 512, 512], f16)), {}) +cnt: 12, ((T([48, 512, 512], f16), T([48, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.cat.default +cnt: 12, (([T([4, 512, 768], f16), T([4, 512, 768], f16, stride=(393216, 1, 512)), T([4, 512, 768], f16)], 2), {}) +Operator: aten.clone.default +cnt: 1, ((T([4, 512], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([4, 512], i64), T([4, 512], i64)), {}) +Operator: aten.div.Tensor +cnt: 24, ((T([4, 12, 512, 512], f16), T([], f16)), {}) +cnt: 2, ((T([], f16), 102926336), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50257, 768], f16), T([4, 512], i64)), {}) +cnt: 1, ((T([1024, 768], f16), T([1, 512], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([1, 512, 768], f16), T([1, 512], i64), 1024, -1, False), {}) +cnt: 1, ((T([4, 512, 768], f16), T([4, 512], i64), 50257, -1, False), {}) +Operator: aten.mm.default +cnt: 1, ((T([2048, 768], f16), T([768, 50257], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50257, 2048], f16, stride=(0, 0)), T([2048, 768], f16)), {}) +cnt: 1, ((T([2048, 50257], f16, stride=(0, 0)), T([50257, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 2304], f16), T([2304, 768], f16, stride=(1, 2304))), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 2304], f16)), {}) +Operator: aten.mul.Scalar +cnt: 12, ((T([4, 512, 3072], f16), 3.0), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([4, 512, 3072], f16), 0.5), {}) +cnt: 24, ((T([4, 512, 3072], f16), 0.044715), {}) +cnt: 24, ((T([4, 512, 3072], f16), 0.7978845608028654), {}) +cnt: 48, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([4, 512, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([4, 512, 768], f16), T([4, 512, 768], f16), [768], T([4, 512, 1], f32), T([4, 512, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.pow.Tensor_Scalar +cnt: 12, ((T([4, 512, 3072], f16), 3.0), {}) +cnt: 12, ((T([4, 512, 3072], f16), 2.0), {}) +Operator: aten.split.Tensor +cnt: 12, ((T([4, 512, 2304], f16), 768, 2), {}) +Operator: aten.sum.SymInt +cnt: 24, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 12, ((T([2048, 2304], f16), [0], True), {}) +cnt: 1, ((T([4, 512, 768], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([4, 512, 50257], f16),), {}) +Operator: aten.tanh.default +cnt: 12, ((T([4, 512, 3072], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 12, ((T([4, 512, 3072], f16), T([4, 512, 3072], f16)), {}) +Operator: aten.where.self +cnt: 24, ((T([1, 1, 512, 512], b8), T([4, 12, 512, 512], f16), T([], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Longformer_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Longformer_training.txt new file mode 100644 index 000000000..23725d8af --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/hf_Longformer_training.txt @@ -0,0 +1,189 @@ +Operator: aten._softmax.default +cnt: 12, ((T([2, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), -1, True), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([2, 1024, 12, 513], f32), T([2, 1024, 12, 513], f32), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([2, 1, 1, 1024], f32),), {'dtype': f16}) +cnt: 1, ((T([2, 1024], b8),), {'dtype': i32}) +cnt: 1, ((T([2, 1024], i64),), {'dtype': i32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([2, 1024], i32),), {'dtype': i64}) +cnt: 12, ((T([2, 1024, 1, 1], b8),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 12, ((T([2, 1024, 12, 513], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 12, ((T([2, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([1024, 2, 768], f16), [2048, 768]), {}) +cnt: 36, ((T([2048, 768], f16), [1024, 2, 768]), {}) +cnt: 12, ((T([24, 3, 512, 64, 1], f16), [72, 512, 64]), {}) +cnt: 12, ((T([24, 3, 64, 512, 1], f16), [72, 64, 512]), {}) +cnt: 12, ((T([2, 12, 1024, 513], f16), [24, 4, 256, 513]), {}) +cnt: 12, ((T([24, 4, 768, 64, 1], f16), [96, 768, 64]), {}) +cnt: 24, ((T([1024, 2, 12, 64], f16), [1024, 2, 768]), {}) +cnt: 12, ((T([2, 1024, 768], f16), [2048, 768]), {}) +cnt: 12, ((T([2048, 768], f16), [2, 1024, 768]), {}) +cnt: 12, ((T([2, 12, 1024, 64], f16), [24, 4, 256, 64]), {}) +cnt: 12, ((T([24, 4, 768, 64], i64), [4718592]), {}) +cnt: 12, ((T([24, 3, 512, 64], f16), [2359296]), {}) +cnt: 24, ((T([24, 3, 512, 64], i64), [2359296]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([2, 1024], i64), 1), {}) +cnt: 38, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16)), {}) +cnt: 36, ((T([1024, 2, 768], f16), T([768], f16)), {}) +cnt: 12, ((T([2, 1024, 768], f16), T([768], f16)), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 36, ((T([24, 3, 512, 513], f16), T([24, 3, 512, 513], f16)), {}) +cnt: 24, ((T([1024, 2, 768], f16), T([1024, 2, 768], f16)), {}) +cnt: 12, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16, stride=(768, 1536, 1))), {}) +cnt: 1, ((T([50265, 768], f16), T([50265, 768], f16)), {}) +Operator: aten.add_.Tensor +cnt: 12, ((T([2, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), T([2, 1024, 1, 513], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([3072], f16), T([2048, 768], f16), T([768, 3072], f16, stride=(1, 768))), {}) +cnt: 12, ((T([768], f16), T([2048, 3072], f16), T([3072, 768], f16, stride=(1, 3072))), {}) +cnt: 1, ((T([768], f16), T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([50265], f16), T([2048, 768], f16), T([768, 50265], f16, stride=(1, 768))), {}) +Operator: aten.any.default +cnt: 1, ((T([2048], b8),), {}) +Operator: aten.bmm.default +cnt: 12, ((T([72, 512, 64], f16), T([72, 64, 512], f16)), {}) +cnt: 12, ((T([96, 256, 768], f16, stride=(197120, 769, 1)), T([96, 768, 64], f16)), {}) +cnt: 12, ((T([96, 768, 256], f16, stride=(197120, 1, 769)), T([96, 256, 64], f16)), {}) +cnt: 12, ((T([96, 256, 64], f16), T([96, 64, 768], f16, stride=(49152, 1, 64))), {}) +cnt: 12, ((T([72, 64, 512], f16, stride=(32768, 1, 64)), T([72, 512, 512], f16)), {}) +cnt: 12, ((T([72, 512, 512], f16), T([72, 512, 64], f16, stride=(32768, 1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([2, 1024], i64),), {}) +Operator: aten.constant_pad_nd.default +cnt: 12, ((T([24, 3, 512, 512], f16), [0, 0, 0, 1], 0.0), {}) +cnt: 12, ((T([2, 3, 512, 512], f16), [0, 0, 0, 1], 0.0), {}) +cnt: 12, ((T([24, 1024, 64], f16, stride=(64, 1536, 1)), [0, 0, 256, 256], -1.0), {}) +cnt: 12, ((T([24, 4, 256, 513], f16), [0, 257], 0.0), {}) +cnt: 12, ((T([24, 4, 256, 770], f16), [0, -257]), {}) +cnt: 12, ((T([24, 1536, 64], f16), [0, 0, -256, -256]), {}) +cnt: 12, ((T([24, 3, 513, 512], f16), [0, 0, 0, -1]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([2, 1024], i64), T([2, 1024], i64)), {}) +cnt: 12, ((T([24, 3, 256, 257], f16, stride=(525312, 131328, 513, 1)), T([24, 3, 256, 257], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([24, 256, 257], f16, stride=(525312, 513, 1)), T([24, 256, 257], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([24, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([24, 3, 256, 256], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([24, 255, 255], f16, stride=(525312, 513, 1)), T([24, 255, 255], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([2, 3, 256, 257], f16, stride=(525312, 131328, 513, 1)), T([2, 3, 256, 257], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([2, 256, 257], f16, stride=(525312, 513, 1)), T([2, 256, 257], f16, stride=(787968, 513, 1))), {}) +cnt: 12, ((T([2, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([2, 3, 256, 256], f16, stride=(787968, 262656, 513, 1))), {}) +cnt: 12, ((T([2, 255, 255], f16, stride=(525312, 513, 1)), T([2, 255, 255], f16, stride=(787968, 513, 1))), {}) +cnt: 24, ((T([2, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), T([2, 1024, 12, 513], f16)), {}) +cnt: 84, ((T([24, 4, 256, 513], f16), T([24, 4, 256, 513], f16)), {}) +cnt: 24, ((T([2, 256, 12, 257], f16, stride=(6303744, 513, 525312, 1)), T([2, 256, 12, 257], f16)), {}) +cnt: 12, ((T([24, 255, 255], f16, stride=(525312, 513, 1)), T([24, 255, 255], f16)), {}) +cnt: 12, ((T([24, 3, 256, 256], f16, stride=(525312, 131328, 513, 1)), T([24, 3, 256, 256], f16)), {}) +cnt: 12, ((T([24, 256, 257], f16, stride=(525312, 513, 1)), T([24, 256, 257], f16)), {}) +Operator: aten.cumsum.default +cnt: 1, ((T([2, 1024], i32), 1), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 102942720), {}) +cnt: 2, ((T([], f16), 1), {}) +cnt: 12, ((T([1024, 2, 768], f16), 8.0), {}) +Operator: aten.div_.Tensor +cnt: 12, ((T([1024, 2, 768], f16), 8.0), {}) +Operator: aten.embedding.default +cnt: 1, ((T([50265, 768], f16), T([2, 1024], i64), 1), {}) +cnt: 1, ((T([4098, 768], f16), T([2, 1024], i64), 1), {}) +cnt: 1, ((T([1, 768], f16), T([2, 1024], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024], i64), 1, -1, False), {}) +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024], i64), 4098, 1, False), {}) +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024], i64), 50265, 1, False), {}) +Operator: aten.eq.Scalar +cnt: 24, ((T([2, 256, 12, 257], f16, stride=(0, 257, 0, 1)), 1), {}) +cnt: 24, ((T([2, 256, 1, 257], f16, stride=(0, 257, 257, 1)), 1), {}) +Operator: aten.flip.default +cnt: 24, ((T([256, 257], f16), [0]), {}) +cnt: 24, ((T([1, 256, 1, 257], f16), [1, 3]), {}) +Operator: aten.gelu.default +cnt: 12, ((T([2, 1024, 3072], f16),), {}) +cnt: 1, ((T([2, 1024, 768], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16)), {}) +cnt: 12, ((T([2, 1024, 3072], f16), T([2, 1024, 3072], f16)), {}) +Operator: aten.gt.Scalar +cnt: 1, ((T([2, 1024], f16), 0), {}) +Operator: aten.index_add_.default +cnt: 12, ((T([2359296], f16), 0, T([4718592], i64), T([4718592], f16)), {}) +cnt: 24, ((T([1572864], f16), 0, T([2359296], i64), T([2359296], f16)), {}) +Operator: aten.lt.Scalar +cnt: 1, ((T([2, 1024], f16), 0), {}) +Operator: aten.masked_fill.Scalar +cnt: 12, ((T([2, 1024, 1, 1], f16), T([2, 1024, 1, 1], b8), -65504.0), {}) +cnt: 12, ((T([2, 1024, 12, 513], f32), T([2, 1024, 1, 1], b8), 0.0), {}) +cnt: 12, ((T([2, 1024, 12, 513], f32, stride=(6303744, 513, 525312, 1)), T([2, 1024, 1, 1], b8), 0), {}) +cnt: 24, ((T([2, 256, 12, 257], f16), T([2, 256, 12, 257], b8), 0), {}) +Operator: aten.masked_fill_.Scalar +cnt: 24, ((T([2, 256, 12, 257], f16, stride=(6303744, 513, 525312, 1)), T([2, 256, 12, 257], b8), -inf), {}) +cnt: 24, ((T([2, 256, 1, 257], f16, stride=(525312, 513, 525312, 1)), T([2, 256, 1, 257], b8), -inf), {}) +Operator: aten.mm.default +cnt: 48, ((T([2048, 768], f16), T([768, 768], f16, stride=(1, 768))), {}) +cnt: 1, ((T([2048, 50265], f16, stride=(0, 0)), T([50265, 768], f16)), {}) +cnt: 1, ((T([50265, 2048], f16, stride=(0, 0)), T([2048, 768], f16)), {}) +cnt: 49, ((T([2048, 768], f16), T([768, 768], f16)), {}) +cnt: 49, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 768], f16)), {}) +cnt: 12, ((T([2048, 768], f16), T([768, 3072], f16)), {}) +cnt: 12, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 3072], f16)), {}) +cnt: 12, ((T([2048, 3072], f16), T([3072, 768], f16)), {}) +cnt: 12, ((T([3072, 2048], f16, stride=(1, 3072)), T([2048, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([2, 1, 1, 1024], f16), -65504.0), {}) +cnt: 1, ((T([2, 1024], i32), T([2, 1024], i32)), {}) +cnt: 12, ((T([2, 3, 512, 1], f16, stride=(1024, 256, 1, 1)), T([2, 3, 1, 512], f16, stride=(1024, 256, 1, 1))), {}) +Operator: aten.native_layer_norm.default +cnt: 26, ((T([2, 1024, 768], f16), [768], T([768], f16), T([768], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 26, ((T([2, 1024, 768], f16), T([2, 1024, 768], f16), [768], T([2, 1024, 1], f32), T([2, 1024, 1], f32), T([768], f16), T([768], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 1, ((T([2, 1024], i64), 1), {}) +cnt: 12, ((T([2, 1024], f16), 0), {}) +Operator: aten.new_empty.default +cnt: 12, ((T([24, 3, 512, 513], f16), [24, 4, 256, 513]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([2, 3, 512, 513], f16), [2, 4, 256, 513]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_empty_strided.default +cnt: 84, ((T([24, 4, 256, 513], f16), [24, 4, 256, 513], [525312, 131328, 513, 1]), {}) +Operator: aten.new_ones.default +cnt: 12, ((T([2, 1024, 12, 513], f16, stride=(6303744, 513, 525312, 1)), [256, 257]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([2, 1024, 1, 1], f16), [2, 1024, 1, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 12, ((T([2, 1024, 1, 513], f16), [256, 257]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 12, ((T([24, 4, 768, 64], f16), [2359296]), {}) +cnt: 12, ((T([2, 1024, 12, 513], f16), [12607488]), {}) +cnt: 12, ((T([24, 3, 512, 64], f16, stride=(98304, 32768, 1, 512)), [1572864]), {}) +cnt: 12, ((T([24, 3, 512, 64], f16), [1572864]), {}) +Operator: aten.rsub.Scalar +cnt: 1, ((T([2, 1, 1, 1024], f16), 1.0), {}) +Operator: aten.select_backward.default +cnt: 12, ((T([24, 512, 513], f16), [24, 3, 512, 513], 1, 0), {}) +cnt: 12, ((T([24, 512, 513], f16), [24, 3, 512, 513], 1, -1), {}) +Operator: aten.slice_backward.default +cnt: 12, ((T([24, 4, 256, 768], f16), [24, 4, 256, 769], 3, 0, -1, 1), {}) +cnt: 12, ((T([24, 4, 256, 769], f16), [24, 4, 256, 769], 2, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 4, 256, 769], f16), [24, 4, 256, 769], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 4, 256, 769], f16), [24, 4, 256, 769], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 4, 196864], f16), [24, 4, 197120], 2, 0, -256, 1), {}) +cnt: 12, ((T([24, 4, 197120], f16), [24, 4, 197120], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 4, 197120], f16), [24, 4, 197120], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 255, 255], f16), [24, 255, 513], 2, -255, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 255, 513], f16), [24, 512, 513], 1, 0, 255, 1), {}) +cnt: 48, ((T([24, 3, 512, 513], f16), [24, 3, 512, 513], 0, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 3, 256, 256], f16), [24, 3, 256, 513], 3, 257, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 3, 256, 513], f16), [24, 3, 512, 513], 2, -257, -1, 1), {}) +cnt: 24, ((T([24, 3, 512, 513], f16), [24, 3, 512, 513], 1, 0, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 256, 257], f16), [24, 256, 513], 2, 0, 257, 1), {}) +cnt: 12, ((T([24, 256, 513], f16), [24, 512, 513], 1, 256, 9223372036854775807, 1), {}) +cnt: 12, ((T([24, 3, 256, 257], f16), [24, 3, 256, 513], 3, 0, 257, 1), {}) +cnt: 12, ((T([24, 3, 256, 513], f16), [24, 3, 512, 513], 2, 0, 256, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([2048, 50265], f16, stride=(0, 0)), [0], True), {}) +cnt: 13, ((T([2048, 768], f16), [0], True), {}) +cnt: 12, ((T([2048, 3072], f16), [0], True), {}) +cnt: 12, ((T([2, 1024, 768], f16), [0, 1], True), {}) +cnt: 36, ((T([1024, 2, 768], f16), [0, 1], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([2, 1024, 50265], f16),), {}) +Operator: aten.tril.default +cnt: 24, ((T([256, 257], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/maml_omniglot_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/maml_omniglot_training.txt new file mode 100644 index 000000000..3121d116d --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/maml_omniglot_training.txt @@ -0,0 +1,49 @@ +Operator: aten.addmm.default +cnt: 1, ((T([5], f16), T([5, 64], f16), T([64, 5], f16, stride=(1, 64))), {}) +Operator: aten.clone.default +cnt: 1, ((T([5, 1, 28, 28], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([5, 1, 28, 28], f16), T([64, 1, 3, 3], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([5, 64, 13, 13], f16, stride=(10816, 1, 832, 64)), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([5, 64, 5, 5], f16, stride=(1600, 1, 320, 64)), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), T([5, 64, 5, 5], f16, stride=(1600, 1, 320, 64)), T([64, 64, 3, 3], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), T([5, 64, 13, 13], f16, stride=(10816, 1, 832, 64)), T([64, 64, 3, 3], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), T([5, 1, 28, 28], f16), T([64, 1, 3, 3], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([5, 1, 28, 28], f16), T([5, 1, 28, 28], f16)), {}) +cnt: 2, ((T([64, 64, 3, 3], f16), T([64, 64, 3, 3], f16, stride=(576, 1, 192, 64))), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 25), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), [2, 2], [2, 2]), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), [2, 2], [2, 2]), {}) +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), [2, 2], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([5, 64, 1, 1], f16), T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), [2, 2], [2, 2], [0, 0], [1, 1], False, T([5, 64, 1, 1], i64)), {}) +cnt: 1, ((T([5, 64, 5, 5], f16, stride=(1600, 1, 320, 64)), T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), [2, 2], [2, 2], [0, 0], [1, 1], False, T([5, 64, 5, 5], i64, stride=(1600, 1, 320, 64))), {}) +cnt: 1, ((T([5, 64, 13, 13], f16, stride=(10816, 1, 832, 64)), T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), [2, 2], [2, 2], [0, 0], [1, 1], False, T([5, 64, 13, 13], i64, stride=(10816, 1, 832, 64))), {}) +Operator: aten.mm.default +cnt: 2, ((T([5, 5], f16, stride=(0, 0)), T([5, 64], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 1.0, 1e-05), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 1.0, 1e-05), {}) +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 1.0, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 2, ((T([64, 64, 3, 3], f16, stride=(576, 1, 192, 64)), [64, 64, 3, 3], [576, 9, 3, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.relu_.default +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)),), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)),), {}) +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([5, 5], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([5, 5], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), T([5, 64, 3, 3], f16, stride=(576, 1, 192, 64)), 0), {}) +cnt: 1, ((T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), T([5, 64, 11, 11], f16, stride=(7744, 1, 704, 64)), 0), {}) +cnt: 1, ((T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), T([5, 64, 26, 26], f16, stride=(43264, 1, 1664, 64)), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mnasnet1_0_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mnasnet1_0_training.txt new file mode 100644 index 000000000..4f81a1146 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mnasnet1_0_training.txt @@ -0,0 +1,163 @@ +Operator: aten.add.Tensor +cnt: 4, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16)), {}) +cnt: 4, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16)), {}) +cnt: 4, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16)), {}) +cnt: 2, ((T([32, 96, 14, 14], f16), T([32, 96, 14, 14], f16)), {}) +cnt: 6, ((T([32, 192, 7, 7], f16), T([32, 192, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([48, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 48, 112, 112], f16), T([48, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 48), {}) +cnt: 1, ((T([32, 48, 56, 56], f16), T([24, 48, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 72, 56, 56], f16), T([72, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 72), {}) +cnt: 2, ((T([32, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([40, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([96, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 96, 14, 14], f16), T([576, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([32, 576, 14, 14], f16), T([96, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 576, 14, 14], f16), T([576, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([32, 576, 7, 7], f16), T([192, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([32, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), T([32, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 192, 7, 7], f16), T([32, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([32, 192, 7, 7], f16), T([32, 576, 7, 7], f16), T([192, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 576, 7, 7], f16), T([32, 576, 14, 14], f16), T([576, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 2, ((T([32, 576, 14, 14], f16), T([32, 96, 14, 14], f16), T([576, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 96, 14, 14], f16), T([32, 576, 14, 14], f16), T([96, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 576, 14, 14], f16), T([32, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 1, ((T([32, 96, 14, 14], f16), T([32, 480, 14, 14], f16), T([96, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 80, 14, 14], f16), T([32, 480, 14, 14], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([32, 72, 28, 28], f16), T([40, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 3, ((T([32, 72, 56, 56], f16), T([32, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([32, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), T([72, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 48, 56, 56], f16), T([24, 48, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 48, 56, 56], f16), T([32, 48, 112, 112], f16), T([48, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 48, [True, True, False]), {}) +cnt: 1, ((T([32, 48, 112, 112], f16), T([32, 16, 112, 112], f16), T([48, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 1280, 7, 7], f16), [2, 3]), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 1280], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 3, ((T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 5, ((T([32, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 3, ((T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 2, ((T([32, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 3, ((T([32, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.00029999999999996696, 1e-05), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), False, 0.00029999999999996696, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([32, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([32, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 576, 7, 7], f16), T([32, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 576, 14, 14], f16), T([32, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 96, 14, 14], f16), T([32, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 48, 56, 56], f16), T([32, 48, 56, 56], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 48, 112, 112], f16), T([32, 48, 112, 112], f16), T([48], f16), T([48], f16), T([48], f16), T([48], f32), T([48], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 32, 112, 112], f16),), {}) +cnt: 1, ((T([32, 48, 112, 112], f16),), {}) +cnt: 1, ((T([32, 48, 56, 56], f16),), {}) +cnt: 5, ((T([32, 72, 56, 56], f16),), {}) +cnt: 1, ((T([32, 72, 28, 28], f16),), {}) +cnt: 4, ((T([32, 120, 28, 28], f16),), {}) +cnt: 1, ((T([32, 240, 28, 28], f16),), {}) +cnt: 1, ((T([32, 240, 14, 14], f16),), {}) +cnt: 6, ((T([32, 480, 14, 14], f16),), {}) +cnt: 3, ((T([32, 576, 14, 14], f16),), {}) +cnt: 1, ((T([32, 576, 7, 7], f16),), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16),), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 1280, 7, 7], f16), 0), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 576, 7, 7], f16), T([32, 576, 7, 7], f16), 0), {}) +cnt: 3, ((T([32, 576, 14, 14], f16), T([32, 576, 14, 14], f16), 0), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16), 0), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16), 0), {}) +cnt: 5, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 48, 56, 56], f16), T([32, 48, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 48, 112, 112], f16), T([32, 48, 112, 112], f16), 0), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v2_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v2_training.txt new file mode 100644 index 000000000..185ce981a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v2_training.txt @@ -0,0 +1,165 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([96, 24, 56, 56], f16), T([96, 24, 56, 56], f16)), {}) +cnt: 4, ((T([96, 32, 28, 28], f16), T([96, 32, 28, 28], f16)), {}) +cnt: 6, ((T([96, 64, 14, 14], f16), T([96, 64, 14, 14], f16)), {}) +cnt: 4, ((T([96, 96, 14, 14], f16), T([96, 96, 14, 14], f16)), {}) +cnt: 4, ((T([96, 160, 7, 7], f16), T([96, 160, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([96, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([96, 3, 224, 224], f16),), {}) +cnt: 2, ((T([96, 32, 112, 112], f16),), {}) +cnt: 1, ((T([96, 96, 112, 112], f16),), {}) +cnt: 1, ((T([96, 96, 56, 56], f16),), {}) +cnt: 3, ((T([96, 144, 56, 56], f16),), {}) +cnt: 1, ((T([96, 144, 28, 28], f16),), {}) +cnt: 5, ((T([96, 192, 28, 28], f16),), {}) +cnt: 1, ((T([96, 192, 14, 14], f16),), {}) +cnt: 8, ((T([96, 384, 14, 14], f16),), {}) +cnt: 5, ((T([96, 576, 14, 14], f16),), {}) +cnt: 1, ((T([96, 576, 7, 7], f16),), {}) +cnt: 6, ((T([96, 960, 7, 7], f16),), {}) +cnt: 1, ((T([96, 1280, 7, 7], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([96, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([96, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([96, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([96, 144, 56, 56], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), T([32, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 32, 28, 28], f16), T([192, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([96, 192, 28, 28], f16), T([192, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 2, ((T([96, 192, 28, 28], f16), T([32, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 192, 28, 28], f16), T([192, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 192), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), T([64, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([96, 64, 14, 14], f16), T([384, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([96, 384, 14, 14], f16), T([384, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 384), {}) +cnt: 3, ((T([96, 384, 14, 14], f16), T([64, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 384, 14, 14], f16), T([96, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 96, 14, 14], f16), T([576, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([96, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 2, ((T([96, 576, 14, 14], f16), T([96, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 576, 14, 14], f16), T([576, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 576), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), T([160, 576, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 160, 7, 7], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([96, 960, 7, 7], f16), T([960, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 960), {}) +cnt: 2, ((T([96, 960, 7, 7], f16), T([160, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 960, 7, 7], f16), T([320, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([96, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([96, 1280, 7, 7], f16), T([96, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 320, 7, 7], f16), T([96, 960, 7, 7], f16), T([320, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([96, 960, 7, 7], f16), T([96, 960, 7, 7], f16), T([960, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 960, [True, True, False]), {}) +cnt: 3, ((T([96, 960, 7, 7], f16), T([96, 160, 7, 7], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([96, 160, 7, 7], f16), T([96, 960, 7, 7], f16), T([160, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 160, 7, 7], f16), T([96, 576, 7, 7], f16), T([160, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), T([96, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 3, ((T([96, 576, 14, 14], f16), T([96, 96, 14, 14], f16), T([576, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([96, 96, 14, 14], f16), T([96, 576, 14, 14], f16), T([96, 576, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([96, 576, 14, 14], f16), T([96, 576, 14, 14], f16), T([576, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 576, [True, True, False]), {}) +cnt: 1, ((T([96, 96, 14, 14], f16), T([96, 384, 14, 14], f16), T([96, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([96, 384, 14, 14], f16), T([96, 384, 14, 14], f16), T([384, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 384, [True, True, False]), {}) +cnt: 4, ((T([96, 384, 14, 14], f16), T([96, 64, 14, 14], f16), T([384, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([96, 64, 14, 14], f16), T([96, 384, 14, 14], f16), T([64, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 64, 14, 14], f16), T([96, 192, 14, 14], f16), T([64, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), T([96, 192, 28, 28], f16), T([192, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 3, ((T([96, 192, 28, 28], f16), T([96, 32, 28, 28], f16), T([192, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([96, 32, 28, 28], f16), T([96, 192, 28, 28], f16), T([32, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([96, 192, 28, 28], f16), T([96, 192, 28, 28], f16), T([192, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 192, [True, True, False]), {}) +cnt: 1, ((T([96, 32, 28, 28], f16), T([96, 144, 28, 28], f16), T([32, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), T([96, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 2, ((T([96, 144, 56, 56], f16), T([96, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 24, 56, 56], f16), T([96, 144, 56, 56], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 144, 56, 56], f16), T([96, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([96, 24, 56, 56], f16), T([96, 96, 56, 56], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), T([96, 96, 112, 112], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), T([96, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 16, 112, 112], f16), T([96, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([96, 32, 112, 112], f16), T([96, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([96, 32, 112, 112], f16), T([96, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([96, 3, 224, 224], f16), T([96, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([96, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 96000), {}) +Operator: aten.hardtanh_.default +cnt: 2, ((T([96, 32, 112, 112], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 3, ((T([96, 144, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), 0.0, 6.0), {}) +cnt: 5, ((T([96, 192, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), 0.0, 6.0), {}) +cnt: 8, ((T([96, 384, 14, 14], f16), 0.0, 6.0), {}) +cnt: 5, ((T([96, 576, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), 0.0, 6.0), {}) +cnt: 6, ((T([96, 960, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 1280, 7, 7], f16), 0.0, 6.0), {}) +Operator: aten.hardtanh_backward.default +cnt: 1, ((T([96, 1280, 7, 7], f16), T([96, 1280, 7, 7], f16), 0.0, 6.0), {}) +cnt: 6, ((T([96, 960, 7, 7], f16), T([96, 960, 7, 7], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), T([96, 576, 7, 7], f16), 0.0, 6.0), {}) +cnt: 5, ((T([96, 576, 14, 14], f16), T([96, 576, 14, 14], f16), 0.0, 6.0), {}) +cnt: 8, ((T([96, 384, 14, 14], f16), T([96, 384, 14, 14], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), T([96, 192, 14, 14], f16), 0.0, 6.0), {}) +cnt: 5, ((T([96, 192, 28, 28], f16), T([96, 192, 28, 28], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), T([96, 144, 28, 28], f16), 0.0, 6.0), {}) +cnt: 3, ((T([96, 144, 56, 56], f16), T([96, 144, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), T([96, 96, 56, 56], f16), 0.0, 6.0), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), T([96, 96, 112, 112], f16), 0.0, 6.0), {}) +cnt: 2, ((T([96, 32, 112, 112], f16), T([96, 32, 112, 112], f16), 0.0, 6.0), {}) +Operator: aten.mean.dim +cnt: 1, ((T([96, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([96, 1000], f16, stride=(0, 0)), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 96], f16, stride=(0, 0)), T([96, 1280], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([96, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([96, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([96, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([96, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([96, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 8, ((T([96, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([96, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([96, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([96, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([96, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([96, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([96, 1280, 7, 7], f16), T([96, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 320, 7, 7], f16), T([96, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([96, 960, 7, 7], f16), T([96, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([96, 160, 7, 7], f16), T([96, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 576, 7, 7], f16), T([96, 576, 7, 7], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([96, 576, 14, 14], f16), T([96, 576, 14, 14], f16), T([576], f16), T([576], f16), T([576], f16), T([576], f32), T([576], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([96, 96, 14, 14], f16), T([96, 96, 14, 14], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([96, 384, 14, 14], f16), T([96, 384, 14, 14], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([96, 64, 14, 14], f16), T([96, 64, 14, 14], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 192, 14, 14], f16), T([96, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([96, 192, 28, 28], f16), T([96, 192, 28, 28], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([96, 32, 28, 28], f16), T([96, 32, 28, 28], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 144, 28, 28], f16), T([96, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([96, 144, 56, 56], f16), T([96, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([96, 24, 56, 56], f16), T([96, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 96, 56, 56], f16), T([96, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 96, 112, 112], f16), T([96, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([96, 16, 112, 112], f16), T([96, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([96, 32, 112, 112], f16), T([96, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([96, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([96, 1000], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v3_large_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v3_large_training.txt new file mode 100644 index 000000000..07ba40cf1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/mobilenet_v3_large_training.txt @@ -0,0 +1,277 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([32, 960, 7, 7], f16), T([32, 960, 7, 7], f16)), {}) +cnt: 2, ((T([32, 160, 7, 7], f16), T([32, 160, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 1, ((T([32, 112, 14, 14], f16), T([32, 112, 14, 14], f16)), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16)), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16)), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16)), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16)), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16)), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16)), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16)), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16)), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16)), {}) +cnt: 1, ((T([32, 112, 14, 14], f16), T([32, 112, 14, 14], f16)), {}) +cnt: 2, ((T([32, 160, 7, 7], f16), T([32, 160, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1280], f16), T([32, 960], f16), T([960, 1280], f16, stride=(1, 960))), {}) +cnt: 1, ((T([1000], f16), T([32, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +cnt: 1, ((T([32, 16, 112, 112], f16),), {}) +cnt: 1, ((T([32, 240, 28, 28], f16),), {}) +cnt: 1, ((T([32, 240, 14, 14], f16),), {}) +cnt: 2, ((T([32, 200, 14, 14], f16),), {}) +cnt: 4, ((T([32, 184, 14, 14], f16),), {}) +cnt: 2, ((T([32, 480, 14, 14], f16),), {}) +cnt: 3, ((T([32, 672, 14, 14], f16),), {}) +cnt: 1, ((T([32, 672, 7, 7], f16),), {}) +cnt: 5, ((T([32, 960, 7, 7], f16),), {}) +cnt: 1, ((T([32, 1280], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([16, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([16, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([64, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 64), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([24, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([72, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 72, 56, 56], f16), T([72, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([32, 72, 56, 56], f16), T([24, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 72, 56, 56], f16), T([72, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 72), {}) +cnt: 1, ((T([32, 72, 1, 1], f16), T([24, 72, 1, 1], f16), T([24], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 24, 1, 1], f16), T([72, 24, 1, 1], f16), T([72], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([40, 72, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([120, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([120, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 120), {}) +cnt: 2, ((T([32, 120, 1, 1], f16), T([32, 120, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 32, 1, 1], f16), T([120, 32, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([40, 120, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([200, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 200, 14, 14], f16), T([200, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 200), {}) +cnt: 1, ((T([32, 200, 14, 14], f16), T([80, 200, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 80, 14, 14], f16), T([184, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 184, 14, 14], f16), T([184, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 184), {}) +cnt: 2, ((T([32, 184, 14, 14], f16), T([80, 184, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([32, 480, 1, 1], f16), T([120, 480, 1, 1], f16), T([120], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 120, 1, 1], f16), T([480, 120, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([672, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 672), {}) +cnt: 2, ((T([32, 672, 1, 1], f16), T([168, 672, 1, 1], f16), T([168], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 168, 1, 1], f16), T([672, 168, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([160, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 160, 7, 7], f16), T([960, 160, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), T([960, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 960), {}) +cnt: 2, ((T([32, 960, 1, 1], f16), T([240, 960, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 240, 1, 1], f16), T([960, 240, 1, 1], f16), T([960], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), T([160, 960, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 960, 7, 7], f16), T([32, 160, 7, 7], f16), T([960, 160, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 160, 7, 7], f16), T([32, 960, 7, 7], f16), T([160, 960, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 960, 1, 1], f16), T([32, 240, 1, 1], f16), T([960, 240, 1, 1], f16), [960], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 240, 1, 1], f16), T([32, 960, 1, 1], f16), T([240, 960, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), T([32, 960, 7, 7], f16), T([960, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 960, [True, True, False]), {}) +cnt: 1, ((T([32, 160, 7, 7], f16), T([32, 672, 7, 7], f16), T([160, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 672, 1, 1], f16), T([32, 168, 1, 1], f16), T([672, 168, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 168, 1, 1], f16), T([32, 672, 1, 1], f16), T([168, 672, 1, 1], f16), [168], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([32, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 112, 14, 14], f16), T([32, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16), T([672, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([32, 112, 14, 14], f16), T([32, 480, 14, 14], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 480, 1, 1], f16), T([32, 120, 1, 1], f16), T([480, 120, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 120, 1, 1], f16), T([32, 480, 1, 1], f16), T([120, 480, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 80, 14, 14], f16), T([32, 184, 14, 14], f16), T([80, 184, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 184, 14, 14], f16), T([32, 184, 14, 14], f16), T([184, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 184, [True, True, False]), {}) +cnt: 2, ((T([32, 184, 14, 14], f16), T([32, 80, 14, 14], f16), T([184, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([32, 200, 14, 14], f16), T([80, 200, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 14, 14], f16), T([32, 200, 14, 14], f16), T([200, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 200, [True, True, False]), {}) +cnt: 1, ((T([32, 200, 14, 14], f16), T([32, 80, 14, 14], f16), T([200, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 28, 28], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 120, 28, 28], f16), T([40, 120, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 120, 1, 1], f16), T([32, 32, 1, 1], f16), T([120, 32, 1, 1], f16), [120], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 32, 1, 1], f16), T([32, 120, 1, 1], f16), T([32, 120, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), T([120, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 120, [True, True, False]), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 40, 28, 28], f16), T([120, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([32, 72, 28, 28], f16), T([40, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 72, 1, 1], f16), T([32, 24, 1, 1], f16), T([72, 24, 1, 1], f16), [72], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 24, 1, 1], f16), T([32, 72, 1, 1], f16), T([24, 72, 1, 1], f16), [24], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 56, 56], f16), T([72, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 2, ((T([32, 72, 56, 56], f16), T([32, 24, 56, 56], f16), T([72, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 72, 56, 56], f16), T([24, 72, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), T([72, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 72, [True, True, False]), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 64, 56, 56], f16), T([24, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 112, 112], f16), T([64, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 64, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 16, 112, 112], f16), T([64, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), T([16, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), T([16, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 3, ((T([32, 960, 7, 7], f16, stride=(960, 1, 0, 0)), 49), {}) +cnt: 1, ((T([32, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 1, ((T([32, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 1, ((T([32, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 2, ((T([32, 120, 28, 28], f16, stride=(120, 1, 0, 0)), 784), {}) +cnt: 1, ((T([32, 72, 28, 28], f16, stride=(72, 1, 0, 0)), 784), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.hardsigmoid.default +cnt: 1, ((T([32, 72, 1, 1], f16),), {}) +cnt: 2, ((T([32, 120, 1, 1], f16),), {}) +cnt: 1, ((T([32, 480, 1, 1], f16),), {}) +cnt: 2, ((T([32, 672, 1, 1], f16),), {}) +cnt: 2, ((T([32, 960, 1, 1], f16),), {}) +Operator: aten.hardsigmoid_backward.default +cnt: 2, ((T([32, 960, 1, 1], f16), T([32, 960, 1, 1], f16)), {}) +cnt: 2, ((T([32, 672, 1, 1], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 1, ((T([32, 480, 1, 1], f16), T([32, 480, 1, 1], f16)), {}) +cnt: 2, ((T([32, 120, 1, 1], f16), T([32, 120, 1, 1], f16)), {}) +cnt: 1, ((T([32, 72, 1, 1], f16), T([32, 72, 1, 1], f16)), {}) +Operator: aten.hardswish_.default +cnt: 1, ((T([32, 16, 112, 112], f16),), {}) +cnt: 1, ((T([32, 240, 28, 28], f16),), {}) +cnt: 1, ((T([32, 240, 14, 14], f16),), {}) +cnt: 2, ((T([32, 200, 14, 14], f16),), {}) +cnt: 4, ((T([32, 184, 14, 14], f16),), {}) +cnt: 2, ((T([32, 480, 14, 14], f16),), {}) +cnt: 3, ((T([32, 672, 14, 14], f16),), {}) +cnt: 1, ((T([32, 672, 7, 7], f16),), {}) +cnt: 5, ((T([32, 960, 7, 7], f16),), {}) +cnt: 1, ((T([32, 1280], f16),), {}) +Operator: aten.hardswish_backward.default +cnt: 1, ((T([32, 1280], f16), T([32, 1280], f16)), {}) +cnt: 5, ((T([32, 960, 7, 7], f16), T([32, 960, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 3, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 4, ((T([32, 184, 14, 14], f16), T([32, 184, 14, 14], f16)), {}) +cnt: 2, ((T([32, 200, 14, 14], f16), T([32, 200, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16)), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 72, 28, 28], f16), [-1, -2], True), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), [-1, -2], True), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), [-1, -2], True), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), [-1, -2], True), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), [-1, -2], True), {}) +cnt: 3, ((T([32, 960, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 1280], f16)), {}) +cnt: 1, ((T([32, 1280], f16), T([1280, 960], f16)), {}) +cnt: 1, ((T([1280, 32], f16, stride=(1, 1280)), T([32, 960], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([32, 72, 1, 1], f16), T([32, 72, 28, 28], f16)), {}) +cnt: 2, ((T([32, 120, 1, 1], f16), T([32, 120, 28, 28], f16)), {}) +cnt: 1, ((T([32, 480, 1, 1], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 1, ((T([32, 672, 1, 1], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 1, ((T([32, 672, 1, 1], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 2, ((T([32, 960, 1, 1], f16), T([32, 960, 7, 7], f16)), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), T([32, 960, 1, 1], f16)), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), T([32, 960, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 1, 1], f16)), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 120, 1, 1], f16)), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16)), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 1, 1], f16)), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 3, ((T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.01, 0.001), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.01, 0.001), {}) +cnt: 3, ((T([32, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f16), False, 0.01, 0.001), {}) +cnt: 3, ((T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), False, 0.01, 0.001), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.01, 0.001), {}) +cnt: 4, ((T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), False, 0.01, 0.001), {}) +cnt: 2, ((T([32, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f16), False, 0.01, 0.001), {}) +cnt: 4, ((T([32, 184, 14, 14], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f16), False, 0.01, 0.001), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.01, 0.001), {}) +cnt: 2, ((T([32, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), False, 0.01, 0.001), {}) +cnt: 3, ((T([32, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.01, 0.001), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.01, 0.001), {}) +cnt: 3, ((T([32, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), False, 0.01, 0.001), {}) +cnt: 5, ((T([32, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f16), False, 0.01, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 5, ((T([32, 960, 7, 7], f16), T([32, 960, 7, 7], f16), T([960], f16), T([960], f16), T([960], f16), T([960], f32), T([960], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([32, 160, 7, 7], f16), T([32, 160, 7, 7], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 112, 14, 14], f16), T([32, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 0.001, [True, True, True]), {}) +cnt: 4, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), False, 0.001, [True, True, True]), {}) +cnt: 4, ((T([32, 184, 14, 14], f16), T([32, 184, 14, 14], f16), T([184], f16), T([184], f16), T([184], f16), T([184], f32), T([184], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 200, 14, 14], f16), T([32, 200, 14, 14], f16), T([200], f16), T([200], f16), T([200], f16), T([200], f32), T([200], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), False, 0.001, [True, True, True]), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), T([120], f16), T([120], f16), T([120], f16), T([120], f32), T([120], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), T([72], f16), T([72], f16), T([72], f16), T([72], f32), T([72], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), False, 0.001, [True, True, True]), {}) +Operator: aten.relu.default +cnt: 1, ((T([32, 24, 1, 1], f16),), {}) +cnt: 2, ((T([32, 32, 1, 1], f16),), {}) +cnt: 1, ((T([32, 120, 1, 1], f16),), {}) +cnt: 2, ((T([32, 168, 1, 1], f16),), {}) +cnt: 2, ((T([32, 240, 1, 1], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 16, 112, 112], f16),), {}) +cnt: 1, ((T([32, 64, 112, 112], f16),), {}) +cnt: 1, ((T([32, 64, 56, 56], f16),), {}) +cnt: 3, ((T([32, 72, 56, 56], f16),), {}) +cnt: 1, ((T([32, 72, 28, 28], f16),), {}) +cnt: 4, ((T([32, 120, 28, 28], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 1, ((T([32, 1280], f16), [0], True), {}) +cnt: 2, ((T([32, 960, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([32, 120, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([32, 240, 1, 1], f16), T([32, 240, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 168, 1, 1], f16), T([32, 168, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 120, 1, 1], f16), T([32, 120, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), 0), {}) +cnt: 4, ((T([32, 120, 28, 28], f16), T([32, 120, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 24, 1, 1], f16), T([32, 24, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 72, 28, 28], f16), T([32, 72, 28, 28], f16), 0), {}) +cnt: 3, ((T([32, 72, 56, 56], f16), T([32, 72, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/nvidia_deeprecommender_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/nvidia_deeprecommender_training.txt new file mode 100644 index 000000000..438f22893 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/nvidia_deeprecommender_training.txt @@ -0,0 +1,36 @@ +Operator: aten.addmm.default +cnt: 1, ((T([512], f16), T([256, 197951], f16), T([197951, 512], f16, stride=(1, 197951))), {}) +cnt: 2, ((T([512], f16), T([256, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 1, ((T([1024], f16), T([256, 512], f16), T([512, 1024], f16, stride=(1, 512))), {}) +cnt: 1, ((T([512], f16), T([256, 1024], f16), T([1024, 512], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([197951], f16), T([256, 512], f16), T([512, 197951], f16, stride=(1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([256, 197951], f16),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([256, 197951], f16), T([256, 197951], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 50675456), {}) +Operator: aten.elu.default +cnt: 4, ((T([256, 512], f16), 1.6732632423543772, 1.0507009873554805), {}) +cnt: 1, ((T([256, 1024], f16), 1.6732632423543772, 1.0507009873554805), {}) +cnt: 1, ((T([256, 197951], f16), 1.6732632423543772, 1.0507009873554805), {}) +Operator: aten.elu_backward.default +cnt: 1, ((T([256, 197951], f16, stride=(0, 0)), 1.6732632423543772, 1.0507009873554805, 1, False, T([256, 197951], f16)), {}) +cnt: 4, ((T([256, 512], f16), 1.6732632423543772, 1.0507009873554805, 1, False, T([256, 512], f16)), {}) +cnt: 1, ((T([256, 1024], f16), 1.6732632423543772, 1.0507009873554805, 1, False, T([256, 1024], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([256, 197951], f16), T([197951, 512], f16)), {}) +cnt: 1, ((T([197951, 256], f16, stride=(1, 197951)), T([256, 512], f16)), {}) +cnt: 2, ((T([256, 512], f16), T([512, 512], f16)), {}) +cnt: 2, ((T([512, 256], f16, stride=(1, 512)), T([256, 512], f16)), {}) +cnt: 1, ((T([256, 512], f16), T([512, 1024], f16)), {}) +cnt: 1, ((T([512, 256], f16, stride=(1, 512)), T([256, 1024], f16)), {}) +cnt: 1, ((T([256, 1024], f16), T([1024, 512], f16)), {}) +cnt: 1, ((T([1024, 256], f16, stride=(1, 1024)), T([256, 512], f16)), {}) +cnt: 1, ((T([512, 256], f16, stride=(1, 512)), T([256, 197951], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([256, 197951], f16), [0], True), {}) +cnt: 4, ((T([256, 512], f16), [0], True), {}) +cnt: 1, ((T([256, 1024], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([256, 197951], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_CycleGAN_and_pix2pix_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_CycleGAN_and_pix2pix_training.txt new file mode 100644 index 000000000..81c5a051f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_CycleGAN_and_pix2pix_training.txt @@ -0,0 +1,67 @@ +Operator: aten.add.Tensor +cnt: 18, ((T([1, 256, 64, 64], f16), T([1, 256, 64, 64], f16)), {}) +Operator: aten.clone.default +cnt: 1, ((T([1, 3, 256, 256], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([1, 3, 262, 262], f16), T([64, 3, 7, 7], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 64, 256, 256], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 128, 128, 128], f16), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 18, ((T([1, 256, 66, 66], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 256, 64, 64], f16), T([256, 128, 3, 3], f16), T([128], f16), [2, 2], [1, 1], [1, 1], True, [1, 1], 1), {}) +cnt: 1, ((T([1, 128, 128, 128], f16), T([128, 64, 3, 3], f16), T([64], f16), [2, 2], [1, 1], [1, 1], True, [1, 1], 1), {}) +cnt: 1, ((T([1, 64, 262, 262], f16), T([3, 64, 7, 7], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([1, 3, 256, 256], f16), T([1, 64, 262, 262], f16), T([3, 64, 7, 7], f16), [3], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 256, 256], f16), T([1, 128, 128, 128], f16), T([128, 64, 3, 3], f16), [64], [2, 2], [1, 1], [1, 1], True, [1, 1], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 128, 128], f16), T([1, 256, 64, 64], f16), T([256, 128, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], True, [1, 1], 1, [True, True, True]), {}) +cnt: 18, ((T([1, 256, 64, 64], f16), T([1, 256, 66, 66], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 64, 64], f16), T([1, 128, 128, 128], f16), T([256, 128, 3, 3], f16), [256], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 128, 128], f16), T([1, 64, 256, 256], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 256, 256], f16), T([1, 3, 262, 262], f16), T([64, 3, 7, 7], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1, 3, 256, 256], f16), T([1, 3, 256, 256], f16)), {}) +cnt: 2, ((T([64, 256, 256], f16), T([64, 256, 256], f16)), {}) +cnt: 4, ((T([1, 64, 256, 256], f16), T([1, 64, 256, 256], f16)), {}) +cnt: 2, ((T([128, 128, 128], f16), T([128, 128, 128], f16)), {}) +cnt: 4, ((T([1, 128, 128, 128], f16), T([1, 128, 128, 128], f16)), {}) +cnt: 10, ((T([256, 64, 64], f16), T([256, 64, 64], f16)), {}) +cnt: 20, ((T([1, 256, 64, 64], f16), T([1, 256, 64, 64], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 196608), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([1, 64, 256, 256], f16), None, None, None, None, True, 0.1, 1e-05), {}) +cnt: 2, ((T([1, 128, 128, 128], f16), None, None, None, None, True, 0.1, 1e-05), {}) +cnt: 19, ((T([1, 256, 64, 64], f16), None, None, None, None, True, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([1, 64, 256, 256], f16), T([1, 64, 256, 256], f16), None, None, None, T([64], f32), T([64], f32), True, 1e-05, [True, False, False]), {}) +cnt: 2, ((T([1, 128, 128, 128], f16), T([1, 128, 128, 128], f16), None, None, None, T([128], f32), T([128], f32), True, 1e-05, [True, False, False]), {}) +cnt: 19, ((T([1, 256, 64, 64], f16), T([1, 256, 64, 64], f16), None, None, None, T([256], f32), T([256], f32), True, 1e-05, [True, False, False]), {}) +Operator: aten.new_empty_strided.default +cnt: 2, ((T([1, 64, 256, 256], f16), [1, 64, 256, 256], [4194304, 65536, 256, 1]), {}) +cnt: 2, ((T([1, 128, 128, 128], f16), [1, 128, 128, 128], [2097152, 16384, 128, 1]), {}) +cnt: 10, ((T([1, 256, 64, 64], f16), [1, 256, 64, 64], [1048576, 4096, 64, 1]), {}) +Operator: aten.new_zeros.default +cnt: 2, ((T([64, 256, 256], f16), [4194304]), {}) +cnt: 2, ((T([128, 128, 128], f16), [2097152]), {}) +cnt: 10, ((T([256, 64, 64], f16), [1048576]), {}) +Operator: aten.reflection_pad2d.default +cnt: 1, ((T([1, 3, 256, 256], f16), [3, 3, 3, 3]), {}) +cnt: 18, ((T([1, 256, 64, 64], f16), [1, 1, 1, 1]), {}) +cnt: 1, ((T([1, 64, 256, 256], f16), [3, 3, 3, 3]), {}) +Operator: aten.reflection_pad2d_backward.default +cnt: 1, ((T([1, 64, 262, 262], f16), T([1, 64, 256, 256], f16), [3, 3, 3, 3]), {}) +cnt: 18, ((T([1, 256, 66, 66], f16), T([1, 256, 64, 64], f16), [1, 1, 1, 1]), {}) +Operator: aten.relu_.default +cnt: 2, ((T([1, 64, 256, 256], f16),), {}) +cnt: 2, ((T([1, 128, 128, 128], f16),), {}) +cnt: 10, ((T([1, 256, 64, 64], f16),), {}) +Operator: aten.sum.default +cnt: 1, ((T([1, 3, 256, 256], f16),), {}) +Operator: aten.tanh.default +cnt: 1, ((T([1, 3, 256, 256], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([1, 3, 256, 256], f16, stride=(0, 0, 0, 0)), T([1, 3, 256, 256], f16)), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([1, 64, 256, 256], f16), T([1, 64, 256, 256], f16), 0), {}) +cnt: 2, ((T([1, 128, 128, 128], f16), T([1, 128, 128, 128], f16), 0), {}) +cnt: 10, ((T([1, 256, 64, 64], f16), T([1, 256, 64, 64], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_stargan_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_stargan_training.txt new file mode 100644 index 000000000..a2969693e --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_stargan_training.txt @@ -0,0 +1,80 @@ +Operator: aten.add.Tensor +cnt: 12, ((T([16, 256, 32, 32], f16), T([16, 256, 32, 32], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([16, 3, 128, 128], f16), T([16, 5, 128, 128], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([16, 3, 128, 128], f16),), {}) +cnt: 1, ((T([16, 5], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([16, 8, 128, 128], f16), T([64, 8, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 64, 128, 128], f16), T([128, 64, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 128, 64, 64], f16), T([256, 128, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([16, 256, 32, 32], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 256, 32, 32], f16), T([256, 128, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], True, [0, 0], 1), {}) +cnt: 1, ((T([16, 128, 64, 64], f16), T([128, 64, 4, 4], f16), None, [2, 2], [1, 1], [1, 1], True, [0, 0], 1), {}) +cnt: 1, ((T([16, 64, 128, 128], f16), T([3, 64, 7, 7], f16), None, [1, 1], [3, 3], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([16, 3, 128, 128], f16), T([16, 64, 128, 128], f16), T([3, 64, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 64, 128, 128], f16), T([16, 128, 64, 64], f16), T([128, 64, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], True, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 128, 64, 64], f16), T([16, 256, 32, 32], f16), T([256, 128, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], True, [0, 0], 1, [True, True, False]), {}) +cnt: 12, ((T([16, 256, 32, 32], f16), T([16, 256, 32, 32], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 256, 32, 32], f16), T([16, 128, 64, 64], f16), T([256, 128, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 128, 64, 64], f16), T([16, 64, 128, 128], f16), T([128, 64, 4, 4], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 64, 128, 128], f16), T([16, 8, 128, 128], f16), T([64, 8, 7, 7], f16), [0], [1, 1], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([16, 3, 128, 128], f16), T([16, 3, 128, 128], f16)), {}) +cnt: 1, ((T([16, 5], f16), T([16, 5], f16)), {}) +cnt: 4, ((T([64], f16), T([64], f16)), {}) +cnt: 4, ((T([128], f16), T([128], f16)), {}) +cnt: 26, ((T([256], f16), T([256], f16)), {}) +cnt: 4, ((T([16, 64, 128, 128], f16), T([16, 64, 128, 128], f16)), {}) +cnt: 2, ((T([1, 1024, 128, 128], f16), T([1, 1024, 128, 128], f16)), {}) +cnt: 4, ((T([16, 128, 64, 64], f16), T([16, 128, 64, 64], f16)), {}) +cnt: 2, ((T([1, 2048, 64, 64], f16), T([1, 2048, 64, 64], f16)), {}) +cnt: 14, ((T([16, 256, 32, 32], f16), T([16, 256, 32, 32], f16)), {}) +cnt: 7, ((T([1, 4096, 32, 32], f16), T([1, 4096, 32, 32], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 786432), {}) +Operator: aten.mean.dim +cnt: 4, ((T([16, 64], f16), [0]), {}) +cnt: 4, ((T([16, 128], f16), [0]), {}) +cnt: 26, ((T([16, 256], f16), [0]), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([1, 1024, 128, 128], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([1, 2048, 64, 64], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), False, 0.1, 1e-05), {}) +cnt: 13, ((T([1, 4096, 32, 32], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([1, 1024, 128, 128], f16), T([1, 1024, 128, 128], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([1, 2048, 64, 64], f16), T([1, 2048, 64, 64], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), False, 1e-05, [True, True, True]), {}) +cnt: 13, ((T([1, 4096, 32, 32], f16), T([1, 4096, 32, 32], f16), T([4096], f16), T([4096], f16), T([4096], f16), T([4096], f32), T([4096], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 2, ((T([1, 1024, 128, 128], f16), [1, 1024, 128, 128], [16777216, 16384, 128, 1]), {}) +cnt: 2, ((T([1, 2048, 64, 64], f16), [1, 2048, 64, 64], [8388608, 4096, 64, 1]), {}) +cnt: 7, ((T([1, 4096, 32, 32], f16), [1, 4096, 32, 32], [4194304, 1024, 32, 1]), {}) +Operator: aten.new_zeros.default +cnt: 2, ((T([16, 64, 128, 128], f16), [16777216]), {}) +cnt: 2, ((T([16, 128, 64, 64], f16), [8388608]), {}) +cnt: 7, ((T([16, 256, 32, 32], f16), [4194304]), {}) +Operator: aten.relu_.default +cnt: 2, ((T([16, 64, 128, 128], f16),), {}) +cnt: 2, ((T([16, 128, 64, 64], f16),), {}) +cnt: 7, ((T([16, 256, 32, 32], f16),), {}) +Operator: aten.repeat.default +cnt: 1, ((T([16, 5, 1, 1], f16), [1, 1, 128, 128]), {}) +cnt: 8, ((T([64], f16), [16]), {}) +cnt: 8, ((T([128], f16), [16]), {}) +cnt: 52, ((T([256], f16), [16]), {}) +Operator: aten.sum.default +cnt: 1, ((T([16, 3, 128, 128], f16),), {}) +Operator: aten.sum.dim_IntList +cnt: 4, ((T([16, 64], f16), [0]), {}) +cnt: 4, ((T([16, 128], f16), [0]), {}) +cnt: 26, ((T([16, 256], f16), [0]), {}) +Operator: aten.tanh.default +cnt: 1, ((T([16, 3, 128, 128], f16),), {}) +Operator: aten.tanh_backward.default +cnt: 1, ((T([16, 3, 128, 128], f16, stride=(0, 0, 0, 0)), T([16, 3, 128, 128], f16)), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([16, 64, 128, 128], f16), T([16, 64, 128, 128], f16), 0), {}) +cnt: 2, ((T([16, 128, 64, 64], f16), T([16, 128, 64, 64], f16), 0), {}) +cnt: 7, ((T([16, 256, 32, 32], f16), T([16, 256, 32, 32], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_struct_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_struct_training.txt new file mode 100644 index 000000000..3512fcd8f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_struct_training.txt @@ -0,0 +1,63 @@ +Operator: aten._log_softmax.default +cnt: 1, ((T([30, 4771], f16, stride=(1, 30)), -1, False), {}) +cnt: 1, ((T([30, 3600], f16), -1, False), {}) +cnt: 1, ((T([30], f16), -1, False), {}) +Operator: aten._log_softmax_backward_data.default +cnt: 1, ((T([30], f16), T([30], f16), -1, f16), {}) +cnt: 1, ((T([30, 3600], f16), T([30, 3600], f16), -1, f16), {}) +cnt: 1, ((T([30, 4771], f16), T([30, 4771], f16), -1, f16), {}) +Operator: aten.add.Tensor +cnt: 4, ((T([30, 256], f16), T([30, 256], f16)), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 2, ((T([], f16), T([], f16)), {}) +cnt: 4, ((T([30, 256], f16, stride=(1, 30)), T([30, 256], f16)), {}) +Operator: aten.addmm.default +cnt: 10, ((T([256], f16), T([30, 256], f16), T([256, 256], f16, stride=(1, 256))), {}) +Operator: aten.bmm.default +cnt: 1, ((T([1, 4771, 256], f16), T([1, 256, 30], f16, stride=(256, 1, 256))), {}) +cnt: 1, ((T([1, 30, 256], f16), T([1, 256, 3600], f16, stride=(256, 1, 256))), {}) +cnt: 1, ((T([1, 1, 256], f16), T([1, 256, 30], f16, stride=(256, 1, 256))), {}) +cnt: 1, ((T([1, 256, 1], f16), T([1, 1, 30], f16)), {}) +cnt: 1, ((T([1, 1, 30], f16), T([1, 30, 256], f16)), {}) +cnt: 1, ((T([1, 256, 30], f16, stride=(7680, 1, 256)), T([1, 30, 3600], f16)), {}) +cnt: 1, ((T([1, 30, 3600], f16), T([1, 3600, 256], f16)), {}) +cnt: 1, ((T([1, 256, 4771], f16, stride=(1221376, 1, 256)), T([1, 4771, 30], f16, stride=(4771, 1, 4771))), {}) +cnt: 1, ((T([1, 4771, 30], f16, stride=(4771, 1, 4771)), T([1, 30, 256], f16)), {}) +Operator: aten.clone.default +cnt: 1, ((T([40, 29], i64, stride=(1, 40)),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([40, 29], i64, stride=(1, 40)), T([40, 29], i64, stride=(1, 40))), {}) +cnt: 1, ((T([60, 60, 256], f16), T([60, 60, 256], f16, stride=(60, 1, 3600))), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 34800), {}) +cnt: 2, ((T([], f16), 4320000), {}) +cnt: 2, ((T([], f16), 1200), {}) +cnt: 2, ((T([], f16), 3), {}) +Operator: aten.gather.default +cnt: 1, ((T([40, 29, 30, 4771], f16, stride=(0, 0, 4771, 1)), 3, T([40, 29, 30, 1], i64, stride=(1, 40, 0, 1))), {}) +Operator: aten.mm.default +cnt: 8, ((T([30, 256], f16), T([256, 256], f16)), {}) +cnt: 8, ((T([256, 30], f16, stride=(1, 256)), T([30, 256], f16)), {}) +cnt: 2, ((T([30, 256], f16, stride=(1, 30)), T([256, 256], f16)), {}) +cnt: 2, ((T([256, 30], f16), T([30, 256], f16)), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([60, 60, 256], f16, stride=(60, 1, 3600)), [60, 60, 256], [15360, 256, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.new_zeros.default +cnt: 1, ((T([40, 29, 30, 1], f16, stride=(0, 0, 0, 1)), [40, 29, 30, 4771]), {}) +Operator: aten.relu.default +cnt: 8, ((T([30, 256], f16),), {}) +Operator: aten.scatter_add.default +cnt: 1, ((T([40, 29, 30, 4771], f16), 3, T([40, 29, 30, 1], i64, stride=(1, 40, 0, 1)), T([40, 29, 30, 1], f16, stride=(0, 0, 0, 1))), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([40, 30], f16, stride=(0, 0)), [0], True), {}) +cnt: 8, ((T([30, 256], f16), [0], True), {}) +cnt: 2, ((T([30, 256], f16, stride=(1, 30)), [0], True), {}) +cnt: 1, ((T([40, 30, 60, 60], f16, stride=(0, 0, 0, 0)), [0], True), {}) +cnt: 1, ((T([40, 29, 30, 4771], f16), [0, 1], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([40, 29, 30], f16),), {}) +cnt: 1, ((T([40, 30, 60, 60], f16, stride=(0, 3600, 60, 1)),), {}) +cnt: 1, ((T([40, 30], f16, stride=(0, 1)),), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([30, 256], f16, stride=(1, 30)), T([30, 256], f16), 0), {}) +cnt: 4, ((T([30, 256], f16), T([30, 256], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_unet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_unet_training.txt new file mode 100644 index 000000000..e2e12ab9b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/pytorch_unet_training.txt @@ -0,0 +1,119 @@ +Operator: aten.add.Tensor +cnt: 1, ((T([1, 512, 80, 119], f16), T([1, 512, 80, 119], f16)), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([1, 256, 160, 239], f16)), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([1, 128, 320, 479], f16)), {}) +cnt: 1, ((T([1, 64, 640, 959], f16), T([1, 64, 640, 959], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1, 512, 80, 119], f16), T([1, 512, 80, 119], f16)], 1), {}) +cnt: 1, (([T([1, 256, 160, 239], f16), T([1, 256, 160, 239], f16)], 1), {}) +cnt: 1, (([T([1, 128, 320, 479], f16), T([1, 128, 320, 479], f16)], 1), {}) +cnt: 1, (([T([1, 64, 640, 959], f16), T([1, 64, 640, 959], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([1, 3, 640, 959], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([1, 512, 80, 118], f16), [0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([1, 256, 160, 238], f16), [0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([1, 128, 320, 478], f16), [0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([1, 64, 640, 958], f16), [0, 1, 0, 0], 0.0), {}) +cnt: 1, ((T([1, 64, 640, 959], f16), [0, -1, 0, 0]), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), [0, -1, 0, 0]), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), [0, -1, 0, 0]), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), [0, -1, 0, 0]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([1, 3, 640, 959], f16), T([64, 3, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 64, 640, 959], f16), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([128, 64, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 512, 40, 59], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 1024, 80, 119], f16), T([512, 1024, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), T([256, 512, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 512, 160, 239], f16), T([256, 512, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([128, 256, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 256, 320, 479], f16), T([128, 256, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([64, 128, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 128, 640, 959], f16), T([64, 128, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 64, 640, 959], f16), T([2, 64, 1, 1], f16), T([2], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([1, 2, 640, 959], f16, stride=(0, 0, 0, 0)), T([1, 64, 640, 959], f16), T([2, 64, 1, 1], f16), [2], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([1, 64, 640, 959], f16), T([1, 64, 640, 959], f16), T([64, 64, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 640, 959], f16), T([1, 128, 640, 959], f16), T([64, 128, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([1, 128, 320, 479], f16), T([64, 128, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([1, 256, 320, 479], f16), T([128, 256, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([1, 256, 160, 239], f16), T([128, 256, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([1, 512, 160, 239], f16), T([256, 512, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([1, 512, 80, 119], f16), T([256, 512, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), T([1, 1024, 80, 119], f16), T([512, 1024, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([1, 512, 40, 59], f16), T([1, 512, 40, 59], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), T([1, 512, 80, 119], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), T([1, 256, 80, 119], f16), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([1, 256, 160, 239], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), T([1, 128, 160, 239], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([1, 128, 320, 479], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), T([1, 64, 320, 479], f16), T([128, 64, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 640, 959], f16), T([1, 3, 640, 959], f16), T([64, 3, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1, 3, 640, 959], f16), T([1, 3, 640, 959], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 1227520), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([1, 64, 640, 959], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([1, 128, 320, 479], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([1, 256, 160, 239], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([1, 512, 80, 119], f16), [2, 2], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([1, 512, 40, 59], f16), T([1, 512, 80, 119], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([1, 512, 40, 59], i64)), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([1, 256, 160, 239], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([1, 256, 80, 119], i64)), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([1, 128, 320, 479], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([1, 128, 160, 239], i64)), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([1, 64, 640, 959], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([1, 64, 320, 479], i64)), {}) +Operator: aten.native_batch_norm.default +cnt: 4, ((T([1, 64, 640, 959], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([1, 128, 320, 479], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([1, 256, 160, 239], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([1, 512, 80, 119], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([1, 512, 40, 59], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([1, 64, 640, 959], f16), T([1, 64, 640, 959], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([1, 64, 320, 479], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([1, 128, 320, 479], f16), T([1, 128, 320, 479], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([1, 128, 160, 239], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([1, 256, 160, 239], f16), T([1, 256, 160, 239], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([1, 256, 80, 119], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([1, 512, 80, 119], f16), T([1, 512, 80, 119], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([1, 512, 40, 59], f16), T([1, 512, 40, 59], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 4, ((T([1, 64, 640, 959], f16),), {}) +cnt: 3, ((T([1, 128, 320, 479], f16),), {}) +cnt: 3, ((T([1, 256, 160, 239], f16),), {}) +cnt: 3, ((T([1, 512, 80, 119], f16),), {}) +cnt: 2, ((T([1, 512, 40, 59], f16),), {}) +cnt: 1, ((T([1, 256, 80, 119], f16),), {}) +cnt: 1, ((T([1, 128, 160, 239], f16),), {}) +cnt: 1, ((T([1, 64, 320, 479], f16),), {}) +Operator: aten.sum.default +cnt: 1, ((T([1, 2, 640, 959], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([1, 64, 640, 959], f16), T([1, 64, 640, 959], f16), 0), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), T([1, 64, 320, 479], f16), 0), {}) +cnt: 3, ((T([1, 128, 320, 479], f16), T([1, 128, 320, 479], f16), 0), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), T([1, 128, 160, 239], f16), 0), {}) +cnt: 3, ((T([1, 256, 160, 239], f16), T([1, 256, 160, 239], f16), 0), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), T([1, 256, 80, 119], f16), 0), {}) +cnt: 3, ((T([1, 512, 80, 119], f16), T([1, 512, 80, 119], f16), 0), {}) +cnt: 2, ((T([1, 512, 40, 59], f16), T([1, 512, 40, 59], f16), 0), {}) +Operator: aten.upsample_bilinear2d.vec +cnt: 1, ((T([1, 512, 40, 59], f16), None, True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 256, 80, 119], f16), None, True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 128, 160, 239], f16), None, True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 64, 320, 479], f16), None, True, [2.0, 2.0]), {}) +Operator: aten.upsample_bilinear2d_backward.vec +cnt: 1, ((T([1, 64, 640, 958], f16), None, [1, 64, 320, 479], True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 128, 320, 478], f16), None, [1, 128, 160, 239], True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 256, 160, 238], f16), None, [1, 256, 80, 119], True, [2.0, 2.0]), {}) +cnt: 1, ((T([1, 512, 80, 118], f16), None, [1, 512, 40, 59], True, [2.0, 2.0]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet18_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet18_training.txt new file mode 100644 index 000000000..f949353a3 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet18_training.txt @@ -0,0 +1,81 @@ +Operator: aten.add.Tensor +cnt: 1, ((T([16, 512, 7, 7], f16), T([16, 512, 7, 7], f16)), {}) +cnt: 2, ((T([16, 256, 14, 14], f16), T([16, 256, 14, 14], f16)), {}) +cnt: 2, ((T([16, 128, 28, 28], f16), T([16, 128, 28, 28], f16)), {}) +cnt: 3, ((T([16, 64, 56, 56], f16), T([16, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 2, ((T([16, 64, 56, 56], f16), T([16, 64, 56, 56], f16)), {}) +cnt: 2, ((T([16, 128, 28, 28], f16), T([16, 128, 28, 28], f16)), {}) +cnt: 2, ((T([16, 256, 14, 14], f16), T([16, 256, 14, 14], f16)), {}) +cnt: 2, ((T([16, 512, 7, 7], f16), T([16, 512, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([16, 512], f16), T([512, 1000], f16, stride=(1, 512))), {}) +Operator: aten.clone.default +cnt: 1, ((T([16, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([16, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([16, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 64, 56, 56], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([16, 128, 28, 28], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 128, 28, 28], f16), T([256, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([16, 256, 14, 14], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 128, 28, 28], f16), T([256, 128, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 256, 14, 14], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([16, 512, 7, 7], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([16, 256, 14, 14], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([16, 512, 7, 7], f16), T([16, 512, 7, 7], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 512, 7, 7], f16), T([16, 256, 14, 14], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 512, 7, 7], f16), T([16, 256, 14, 14], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([16, 256, 14, 14], f16), T([16, 256, 14, 14], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 256, 14, 14], f16), T([16, 128, 28, 28], f16), T([256, 128, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 256, 14, 14], f16), T([16, 128, 28, 28], f16), T([256, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([16, 128, 28, 28], f16), T([16, 128, 28, 28], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 128, 28, 28], f16), T([16, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 128, 28, 28], f16), T([16, 64, 56, 56], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([16, 64, 56, 56], f16), T([16, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([16, 64, 112, 112], f16), T([16, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([16, 3, 224, 224], f16), T([16, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([16, 512, 7, 7], f16, stride=(512, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 16000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([16, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([16, 64, 56, 56], f16), T([16, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([16, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([16, 512, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([16, 1000], f16, stride=(0, 0)), T([1000, 512], f16)), {}) +cnt: 1, ((T([1000, 16], f16, stride=(0, 0)), T([16, 512], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([16, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([16, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([16, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([16, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([16, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 5, ((T([16, 512, 7, 7], f16), T([16, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([16, 256, 14, 14], f16), T([16, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([16, 128, 28, 28], f16), T([16, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([16, 64, 56, 56], f16), T([16, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([16, 64, 112, 112], f16), T([16, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([16, 64, 112, 112], f16),), {}) +cnt: 4, ((T([16, 64, 56, 56], f16),), {}) +cnt: 4, ((T([16, 128, 28, 28], f16),), {}) +cnt: 4, ((T([16, 256, 14, 14], f16),), {}) +cnt: 4, ((T([16, 512, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([16, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([16, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 4, ((T([16, 512, 7, 7], f16), T([16, 512, 7, 7], f16), 0), {}) +cnt: 4, ((T([16, 256, 14, 14], f16), T([16, 256, 14, 14], f16), 0), {}) +cnt: 4, ((T([16, 128, 28, 28], f16), T([16, 128, 28, 28], f16), 0), {}) +cnt: 4, ((T([16, 64, 56, 56], f16), T([16, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([16, 64, 112, 112], f16), T([16, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet50_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet50_training.txt new file mode 100644 index 000000000..517a1e3f1 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnet50_training.txt @@ -0,0 +1,134 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +cnt: 6, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 6, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 64, 56, 56], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 128, 28, 28], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 28, 28], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 128, 28, 28], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([32, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([32, 1024, 14, 14], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([32, 256, 14, 14], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([512, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 512, 7, 7], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 512, 7, 7], f16), T([32, 512, 7, 7], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 512, 7, 7], f16), T([32, 2048, 7, 7], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 7, 7], f16), T([32, 512, 14, 14], f16), T([512, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([32, 1024, 14, 14], f16), T([32, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([32, 256, 14, 14], f16), T([32, 256, 14, 14], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([32, 256, 14, 14], f16), T([32, 1024, 14, 14], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 14, 14], f16), T([32, 256, 28, 28], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 128, 28, 28], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 128, 28, 28], f16), T([32, 128, 28, 28], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 128, 28, 28], f16), T([32, 512, 28, 28], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([32, 128, 56, 56], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 64, 56, 56], f16), T([32, 256, 56, 56], f16), T([64, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 11, ((T([32, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 7, ((T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 512, 7, 7], f16), T([32, 512, 7, 7], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 11, ((T([32, 256, 14, 14], f16), T([32, 256, 14, 14], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([32, 128, 28, 28], f16), T([32, 128, 28, 28], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 64, 112, 112], f16),), {}) +cnt: 6, ((T([32, 64, 56, 56], f16),), {}) +cnt: 3, ((T([32, 256, 56, 56], f16),), {}) +cnt: 1, ((T([32, 128, 56, 56], f16),), {}) +cnt: 7, ((T([32, 128, 28, 28], f16),), {}) +cnt: 4, ((T([32, 512, 28, 28], f16),), {}) +cnt: 1, ((T([32, 256, 28, 28], f16),), {}) +cnt: 11, ((T([32, 256, 14, 14], f16),), {}) +cnt: 6, ((T([32, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([32, 512, 14, 14], f16),), {}) +cnt: 5, ((T([32, 512, 7, 7], f16),), {}) +cnt: 3, ((T([32, 2048, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([32, 512, 7, 7], f16), T([32, 512, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), 0), {}) +cnt: 6, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), 0), {}) +cnt: 11, ((T([32, 256, 14, 14], f16), T([32, 256, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), 0), {}) +cnt: 4, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 7, ((T([32, 128, 28, 28], f16), T([32, 128, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 6, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnext50_32x4d_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnext50_32x4d_training.txt new file mode 100644 index 000000000..256d8ac32 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/resnext50_32x4d_training.txt @@ -0,0 +1,124 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([8, 2048, 7, 7], f16), T([8, 2048, 7, 7], f16)), {}) +cnt: 6, ((T([8, 1024, 14, 14], f16), T([8, 1024, 14, 14], f16)), {}) +cnt: 4, ((T([8, 512, 28, 28], f16), T([8, 512, 28, 28], f16)), {}) +cnt: 3, ((T([8, 256, 56, 56], f16), T([8, 256, 56, 56], f16)), {}) +cnt: 1, ((T([8, 64, 56, 56], f16), T([8, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 3, ((T([8, 256, 56, 56], f16), T([8, 256, 56, 56], f16)), {}) +cnt: 4, ((T([8, 512, 28, 28], f16), T([8, 512, 28, 28], f16)), {}) +cnt: 6, ((T([8, 1024, 14, 14], f16), T([8, 1024, 14, 14], f16)), {}) +cnt: 3, ((T([8, 2048, 7, 7], f16), T([8, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([8, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([8, 3, 224, 224], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 64, 56, 56], f16), T([128, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([8, 128, 56, 56], f16), T([128, 4, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 3, ((T([8, 128, 56, 56], f16), T([256, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([8, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 56, 56], f16), T([256, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 56, 56], f16), T([256, 8, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 4, ((T([8, 256, 28, 28], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 56, 56], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([8, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([8, 256, 28, 28], f16), T([256, 8, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([8, 512, 28, 28], f16), T([512, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 512, 28, 28], f16), T([512, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 6, ((T([8, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([8, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([8, 512, 14, 14], f16), T([512, 16, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([8, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 1024, 14, 14], f16), T([1024, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 3, ((T([8, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([8, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([8, 1024, 7, 7], f16), T([1024, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([8, 2048, 7, 7], f16), T([8, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([8, 1024, 7, 7], f16), T([8, 1024, 7, 7], f16), T([1024, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 2, ((T([8, 1024, 7, 7], f16), T([8, 2048, 7, 7], f16), T([1024, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 2048, 7, 7], f16), T([8, 1024, 14, 14], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 1024, 7, 7], f16), T([8, 1024, 14, 14], f16), T([1024, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([8, 1024, 14, 14], f16), T([8, 1024, 14, 14], f16), T([1024, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([8, 1024, 14, 14], f16), T([8, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([8, 512, 14, 14], f16), T([8, 512, 14, 14], f16), T([512, 16, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 5, ((T([8, 512, 14, 14], f16), T([8, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 1024, 14, 14], f16), T([8, 512, 28, 28], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 512, 14, 14], f16), T([8, 512, 28, 28], f16), T([512, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([8, 512, 28, 28], f16), T([8, 512, 28, 28], f16), T([512, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([8, 512, 28, 28], f16), T([8, 256, 28, 28], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([8, 256, 28, 28], f16), T([8, 256, 28, 28], f16), T([256, 8, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 3, ((T([8, 256, 28, 28], f16), T([8, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 512, 28, 28], f16), T([8, 256, 56, 56], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 28, 28], f16), T([8, 256, 56, 56], f16), T([256, 8, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 56, 56], f16), T([8, 256, 56, 56], f16), T([256, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([8, 256, 56, 56], f16), T([8, 128, 56, 56], f16), T([256, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([8, 128, 56, 56], f16), T([8, 128, 56, 56], f16), T([128, 4, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 2, ((T([8, 128, 56, 56], f16), T([8, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 56, 56], f16), T([8, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 128, 56, 56], f16), T([8, 64, 56, 56], f16), T([128, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 64, 112, 112], f16), T([8, 3, 224, 224], f16), T([64, 3, 7, 7], f16), [0], [2, 2], [3, 3], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 3, 224, 224], f16), T([8, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([8, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 8000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([8, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([8, 64, 56, 56], f16), T([8, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([8, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([8, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([8, 1000], f16, stride=(0, 0)), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 8], f16, stride=(0, 0)), T([8, 2048], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([8, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([8, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([8, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 7, ((T([8, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([8, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 11, ((T([8, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 8, ((T([8, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([8, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([8, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 4, ((T([8, 2048, 7, 7], f16), T([8, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([8, 1024, 7, 7], f16), T([8, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([8, 1024, 14, 14], f16), T([8, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 11, ((T([8, 512, 14, 14], f16), T([8, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([8, 512, 28, 28], f16), T([8, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 7, ((T([8, 256, 28, 28], f16), T([8, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([8, 256, 56, 56], f16), T([8, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([8, 128, 56, 56], f16), T([8, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([8, 64, 112, 112], f16), T([8, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([8, 64, 112, 112], f16),), {}) +cnt: 6, ((T([8, 128, 56, 56], f16),), {}) +cnt: 4, ((T([8, 256, 56, 56], f16),), {}) +cnt: 7, ((T([8, 256, 28, 28], f16),), {}) +cnt: 5, ((T([8, 512, 28, 28], f16),), {}) +cnt: 11, ((T([8, 512, 14, 14], f16),), {}) +cnt: 7, ((T([8, 1024, 14, 14], f16),), {}) +cnt: 5, ((T([8, 1024, 7, 7], f16),), {}) +cnt: 3, ((T([8, 2048, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([8, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 3, ((T([8, 2048, 7, 7], f16), T([8, 2048, 7, 7], f16), 0), {}) +cnt: 5, ((T([8, 1024, 7, 7], f16), T([8, 1024, 7, 7], f16), 0), {}) +cnt: 7, ((T([8, 1024, 14, 14], f16), T([8, 1024, 14, 14], f16), 0), {}) +cnt: 11, ((T([8, 512, 14, 14], f16), T([8, 512, 14, 14], f16), 0), {}) +cnt: 5, ((T([8, 512, 28, 28], f16), T([8, 512, 28, 28], f16), 0), {}) +cnt: 7, ((T([8, 256, 28, 28], f16), T([8, 256, 28, 28], f16), 0), {}) +cnt: 4, ((T([8, 256, 56, 56], f16), T([8, 256, 56, 56], f16), 0), {}) +cnt: 6, ((T([8, 128, 56, 56], f16), T([8, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([8, 64, 112, 112], f16), T([8, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/shufflenet_v2_x1_0_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/shufflenet_v2_x1_0_training.txt new file mode 100644 index 000000000..9b26d6a7b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/shufflenet_v2_x1_0_training.txt @@ -0,0 +1,123 @@ +Operator: aten._unsafe_view.default +cnt: 4, ((T([128, 2, 232, 7, 7], f16), [128, 464, 7, 7]), {}) +cnt: 8, ((T([128, 2, 116, 14, 14], f16), [128, 232, 14, 14]), {}) +cnt: 4, ((T([128, 2, 58, 28, 28], f16), [128, 116, 28, 28]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([128, 232, 14, 14], f16), T([128, 232, 14, 14], f16)), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([128, 116, 28, 28], f16)), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.cat.default +cnt: 1, (([T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16)], 1), {}) +cnt: 6, (([T([128, 58, 28, 28], f16, stride=(90944, 784, 28, 1)), T([128, 58, 28, 28], f16)], 1), {}) +cnt: 1, (([T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16)], 1), {}) +cnt: 14, (([T([128, 116, 14, 14], f16, stride=(45472, 196, 14, 1)), T([128, 116, 14, 14], f16)], 1), {}) +cnt: 1, (([T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16)], 1), {}) +cnt: 6, (([T([128, 232, 7, 7], f16, stride=(22736, 49, 7, 1)), T([128, 232, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([24, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([24, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 24), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([58, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 24, 56, 56], f16), T([58, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 58, 56, 56], f16), T([58, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 58), {}) +cnt: 4, ((T([128, 58, 28, 28], f16), T([58, 58, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 58, 28, 28], f16, stride=(90944, 784, 28, 1)), T([58, 58, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 58, 28, 28], f16), T([58, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 58), {}) +cnt: 2, ((T([128, 116, 28, 28], f16), T([116, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 116), {}) +cnt: 9, ((T([128, 116, 14, 14], f16), T([116, 116, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([116, 116, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 116, 14, 14], f16, stride=(45472, 196, 14, 1)), T([116, 116, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([128, 116, 14, 14], f16), T([116, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 116), {}) +cnt: 2, ((T([128, 232, 14, 14], f16), T([232, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 232), {}) +cnt: 5, ((T([128, 232, 7, 7], f16), T([232, 232, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 232, 14, 14], f16), T([232, 232, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 232, 7, 7], f16, stride=(22736, 49, 7, 1)), T([232, 232, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 232, 7, 7], f16), T([232, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 232), {}) +cnt: 1, ((T([128, 464, 7, 7], f16), T([1024, 464, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 464, 7, 7], f16), T([1024, 464, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16), T([232, 232, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16), T([232, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 232, [True, True, False]), {}) +cnt: 3, ((T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16, stride=(22736, 49, 7, 1)), T([232, 232, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 232, 7, 7], f16), T([128, 232, 14, 14], f16), T([232, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 232, [True, True, False]), {}) +cnt: 1, ((T([128, 232, 14, 14], f16), T([128, 232, 14, 14], f16), T([232, 232, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 9, ((T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16), T([116, 116, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16), T([116, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 116, [True, True, False]), {}) +cnt: 7, ((T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16, stride=(45472, 196, 14, 1)), T([116, 116, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([128, 116, 14, 14], f16), T([128, 116, 28, 28], f16), T([116, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 116, [True, True, False]), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([128, 116, 28, 28], f16), T([116, 116, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16), T([58, 58, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16), T([58, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 58, [True, True, False]), {}) +cnt: 3, ((T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16, stride=(90944, 784, 28, 1)), T([58, 58, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 58, 28, 28], f16), T([128, 58, 56, 56], f16), T([58, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 58, [True, True, False]), {}) +cnt: 1, ((T([128, 58, 56, 56], f16), T([128, 24, 56, 56], f16), T([58, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 58, 28, 28], f16), T([128, 24, 28, 28], f16), T([58, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([128, 24, 56, 56], f16), T([24, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 24, [True, True, False]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 3, 224, 224], f16), T([24, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 128000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([128, 24, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([128, 24, 56, 56], f16), T([128, 24, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([128, 24, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 1024, 7, 7], f16), [2, 3]), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(0, 0)), T([128, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.1, 1e-05), {}) +cnt: 12, ((T([128, 58, 28, 28], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 58, 56, 56], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f16), False, 0.1, 1e-05), {}) +cnt: 25, ((T([128, 116, 14, 14], f16), T([116], f16), T([116], f16), T([116], f16), T([116], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([116], f16), T([116], f16), T([116], f16), T([116], f16), False, 0.1, 1e-05), {}) +cnt: 13, ((T([128, 232, 7, 7], f16), T([232], f16), T([232], f16), T([232], f16), T([232], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 232, 14, 14], f16), T([232], f16), T([232], f16), T([232], f16), T([232], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 13, ((T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16), T([232], f16), T([232], f16), T([232], f16), T([232], f32), T([232], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 232, 14, 14], f16), T([128, 232, 14, 14], f16), T([232], f16), T([232], f16), T([232], f16), T([232], f32), T([232], f32), False, 1e-05, [True, True, True]), {}) +cnt: 25, ((T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16), T([116], f16), T([116], f16), T([116], f16), T([116], f32), T([116], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([128, 116, 28, 28], f16), T([116], f16), T([116], f16), T([116], f16), T([116], f32), T([116], f32), False, 1e-05, [True, True, True]), {}) +cnt: 12, ((T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f32), T([58], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 58, 56, 56], f16), T([128, 58, 56, 56], f16), T([58], f16), T([58], f16), T([58], f16), T([58], f32), T([58], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 28, 28], f16), T([128, 24, 28, 28], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 24, 112, 112], f16),), {}) +cnt: 8, ((T([128, 58, 28, 28], f16),), {}) +cnt: 1, ((T([128, 58, 56, 56], f16),), {}) +cnt: 16, ((T([128, 116, 14, 14], f16),), {}) +cnt: 1, ((T([128, 116, 28, 28], f16),), {}) +cnt: 8, ((T([128, 232, 7, 7], f16),), {}) +cnt: 1, ((T([128, 232, 14, 14], f16),), {}) +cnt: 1, ((T([128, 1024, 7, 7], f16),), {}) +Operator: aten.split.Tensor +cnt: 3, ((T([128, 116, 28, 28], f16), 58, 1), {}) +cnt: 7, ((T([128, 232, 14, 14], f16), 116, 1), {}) +cnt: 3, ((T([128, 464, 7, 7], f16), 232, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([128, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([128, 1024, 7, 7], f16), T([128, 1024, 7, 7], f16), 0), {}) +cnt: 5, ((T([128, 232, 7, 7], f16, stride=(22736, 49, 7, 1)), T([128, 232, 7, 7], f16), 0), {}) +cnt: 3, ((T([128, 232, 7, 7], f16), T([128, 232, 7, 7], f16), 0), {}) +cnt: 1, ((T([128, 232, 14, 14], f16), T([128, 232, 14, 14], f16), 0), {}) +cnt: 9, ((T([128, 116, 14, 14], f16, stride=(45472, 196, 14, 1)), T([128, 116, 14, 14], f16), 0), {}) +cnt: 7, ((T([128, 116, 14, 14], f16), T([128, 116, 14, 14], f16), 0), {}) +cnt: 1, ((T([128, 116, 28, 28], f16), T([128, 116, 28, 28], f16), 0), {}) +cnt: 5, ((T([128, 58, 28, 28], f16, stride=(90944, 784, 28, 1)), T([128, 58, 28, 28], f16), 0), {}) +cnt: 3, ((T([128, 58, 28, 28], f16), T([128, 58, 28, 28], f16), 0), {}) +cnt: 1, ((T([128, 58, 56, 56], f16), T([128, 58, 56, 56], f16), 0), {}) +cnt: 1, ((T([128, 24, 112, 112], f16), T([128, 24, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/speech_transformer_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/speech_transformer_training.txt new file mode 100644 index 000000000..8431f307e --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/speech_transformer_training.txt @@ -0,0 +1,178 @@ +Operator: aten._softmax.default +cnt: 6, ((T([80, 204, 204], f16), 2, False), {}) +cnt: 6, ((T([80, 22, 22], f16), 2, False), {}) +cnt: 6, ((T([80, 22, 204], f16), 2, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], f16), 2, f16), {}) +cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], f16), 2, f16), {}) +cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], f16), 2, f16), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([10, 22], b8),), {'dtype': f32}) +cnt: 1, ((T([], f32),), {'dtype': f16}) +cnt: 18, ((T([10, 22, 512], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 1, ((T([220, 1014], f16), [10, 22, 1014]), {}) +cnt: 12, ((T([8, 10, 22, 64], f16), [80, 22, 64]), {}) +cnt: 30, ((T([10, 204, 8, 64], f16), [10, 204, 512]), {}) +cnt: 24, ((T([10, 22, 8, 64], f16), [10, 22, 512]), {}) +cnt: 6, ((T([8, 10, 204, 64], f16), [80, 204, 64]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([10, 204, 512], f16), T([1, 204, 512], f16)), {}) +cnt: 47, ((T([10, 204, 512], f16), T([10, 204, 512], f16)), {}) +cnt: 1, ((T([10, 22, 22], b8, stride=(22, 0, 1)), T([10, 22, 22], u8, stride=(0, 22, 1))), {}) +cnt: 1, ((T([10, 22, 512], f16), T([1, 22, 512], f16)), {}) +cnt: 48, ((T([10, 22, 512], f16), T([10, 22, 512], f16)), {}) +cnt: 1, ((T([], f16), 0), {}) +cnt: 1, ((T([], f16), T([], f32)), {}) +cnt: 1, ((T([1014, 512], f16), T([1014, 512], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([512], f16), T([2040, 320], f16), T([320, 512], f16, stride=(1, 320))), {}) +cnt: 36, ((T([512], f16), T([2040, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 6, ((T([2048], f16), T([2040, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([2040, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +cnt: 36, ((T([512], f16), T([220, 512], f16), T([512, 512], f16, stride=(1, 512))), {}) +cnt: 6, ((T([2048], f16), T([220, 512], f16), T([512, 2048], f16, stride=(1, 512))), {}) +cnt: 6, ((T([512], f16), T([220, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([80, 204, 64], f16), T([80, 64, 204], f16, stride=(13056, 1, 64))), {}) +cnt: 12, ((T([80, 204, 204], f16), T([80, 204, 64], f16)), {}) +cnt: 12, ((T([80, 22, 64], f16), T([80, 64, 22], f16, stride=(1408, 1, 64))), {}) +cnt: 12, ((T([80, 22, 22], f16), T([80, 22, 64], f16)), {}) +cnt: 12, ((T([80, 22, 64], f16), T([80, 64, 204], f16, stride=(13056, 1, 64))), {}) +cnt: 12, ((T([80, 22, 204], f16), T([80, 204, 64], f16)), {}) +cnt: 6, ((T([80, 204, 22], f16, stride=(4488, 1, 204)), T([80, 22, 64], f16)), {}) +cnt: 6, ((T([80, 64, 22], f16, stride=(1408, 1, 64)), T([80, 22, 204], f16)), {}) +cnt: 6, ((T([80, 22, 22], f16, stride=(484, 1, 22)), T([80, 22, 64], f16)), {}) +cnt: 6, ((T([80, 64, 22], f16, stride=(1408, 1, 64)), T([80, 22, 22], f16)), {}) +cnt: 6, ((T([80, 204, 204], f16, stride=(41616, 1, 204)), T([80, 204, 64], f16)), {}) +cnt: 6, ((T([80, 64, 204], f16, stride=(13056, 1, 64)), T([80, 204, 204], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1], i64), T([17], i64)],), {}) +cnt: 1, (([T([1], i64), T([15], i64)],), {}) +cnt: 1, (([T([1], i64), T([21], i64)],), {}) +cnt: 1, (([T([1], i64), T([18], i64)],), {}) +cnt: 3, (([T([1], i64), T([9], i64)],), {}) +cnt: 1, (([T([1], i64), T([12], i64)],), {}) +cnt: 1, (([T([1], i64), T([11], i64)],), {}) +cnt: 1, (([T([1], i64), T([10], i64)],), {}) +cnt: 1, (([T([17], i64), T([1], i64)],), {}) +cnt: 1, (([T([15], i64), T([1], i64)],), {}) +cnt: 1, (([T([21], i64), T([1], i64)],), {}) +cnt: 1, (([T([18], i64), T([1], i64)],), {}) +cnt: 3, (([T([9], i64), T([1], i64)],), {}) +cnt: 1, (([T([12], i64), T([1], i64)],), {}) +cnt: 1, (([T([11], i64), T([1], i64)],), {}) +cnt: 1, (([T([10], i64), T([1], i64)],), {}) +Operator: aten.clone.default +cnt: 1, ((T([10, 204, 320], f16),), {}) +cnt: 1, ((T([10], i64),), {}) +cnt: 1, ((T([10, 21], i64),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([10, 204, 320], f16), T([10, 204, 320], f16)), {}) +cnt: 7, ((T([10], i64), T([10], i64)), {}) +cnt: 1, ((T([10, 21], i64), T([10, 21], i64)), {}) +cnt: 2, ((T([18], i64), T([18], i64)), {}) +cnt: 2, ((T([16], i64), T([16], i64)), {}) +cnt: 2, ((T([22], i64), T([22], i64)), {}) +cnt: 2, ((T([19], i64), T([19], i64)), {}) +cnt: 2, ((T([13], i64), T([13], i64)), {}) +cnt: 2, ((T([12], i64), T([12], i64)), {}) +cnt: 2, ((T([11], i64), T([11], i64)), {}) +Operator: aten.div.Tensor +cnt: 12, ((T([80, 204, 204], f16), 8.0), {}) +cnt: 12, ((T([80, 22, 22], f16), 8.0), {}) +cnt: 12, ((T([80, 22, 204], f16), 8.0), {}) +cnt: 2, ((T([], f16), 223080), {}) +cnt: 1, ((T([], i64), 220), {}) +cnt: 2, ((T([], f32), 2), {}) +Operator: aten.embedding.default +cnt: 1, ((T([1014, 512], f16), T([10, 22], i64)), {}) +Operator: aten.embedding_dense_backward.default +cnt: 1, ((T([10, 22, 512], f16), T([10, 22], i64), 1014, -1, False), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([10, 22], i64), 2), {}) +Operator: aten.fill_.Scalar +cnt: 1, ((T([10, 22], i64), 2), {}) +cnt: 1, ((T([10, 22], i64), -1), {}) +Operator: aten.fill_.Tensor +cnt: 3, ((T([0], f16), T([], f16)), {}) +cnt: 3, ((T([4], f16), T([], f16)), {}) +cnt: 3, ((T([8], f16), T([], f16)), {}) +cnt: 3, ((T([24], f16), T([], f16)), {}) +cnt: 3, ((T([57], f16), T([], f16)), {}) +cnt: 3, ((T([67], f16), T([], f16)), {}) +cnt: 3, ((T([75], f16), T([], f16)), {}) +cnt: 3, ((T([91], f16), T([], f16)), {}) +cnt: 3, ((T([99], f16), T([], f16)), {}) +cnt: 3, ((T([118], f16), T([], f16)), {}) +Operator: aten.gt.Scalar +cnt: 1, ((T([10, 22, 22], u8), 0), {}) +Operator: aten.index.Tensor +cnt: 10, ((T([21], i64), [T([21], b8)]), {}) +Operator: aten.lt.Scalar +cnt: 2, ((T([10, 204], f16), 1), {}) +Operator: aten.masked_fill.Scalar +cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], b8), -inf), {}) +cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], b8), -inf), {}) +cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], b8), -inf), {}) +cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], b8), 0), {}) +cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], b8), 0), {}) +cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], b8), 0), {}) +Operator: aten.mm.default +cnt: 1, ((T([220, 512], f16), T([512, 1014], f16, stride=(1, 512))), {}) +cnt: 1, ((T([1014, 220], f16, stride=(0, 0)), T([220, 512], f16)), {}) +cnt: 1, ((T([220, 1014], f16, stride=(0, 0)), T([1014, 512], f16)), {}) +cnt: 6, ((T([220, 512], f16), T([512, 2048], f16)), {}) +cnt: 6, ((T([512, 220], f16, stride=(1, 512)), T([220, 2048], f16)), {}) +cnt: 6, ((T([220, 2048], f16), T([2048, 512], f16)), {}) +cnt: 6, ((T([2048, 220], f16, stride=(1, 2048)), T([220, 512], f16)), {}) +cnt: 36, ((T([220, 512], f16), T([512, 512], f16)), {}) +cnt: 36, ((T([512, 220], f16, stride=(1, 512)), T([220, 512], f16)), {}) +cnt: 36, ((T([2040, 512], f16), T([512, 512], f16)), {}) +cnt: 36, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 512], f16)), {}) +cnt: 6, ((T([2040, 512], f16), T([512, 2048], f16)), {}) +cnt: 6, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 2048], f16)), {}) +cnt: 6, ((T([2040, 2048], f16), T([2048, 512], f16)), {}) +cnt: 6, ((T([2048, 2040], f16, stride=(1, 2048)), T([2040, 512], f16)), {}) +cnt: 1, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 320], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([10, 22, 512], f16), 22.627416997969522), {}) +cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 1], f32)), {}) +cnt: 12, ((T([10, 204, 512], f16), T([10, 204, 1], f16)), {}) +Operator: aten.mul_.Tensor +cnt: 12, ((T([10, 204, 512], f16), T([10, 204, 1], f16)), {}) +cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 1], f32)), {}) +Operator: aten.native_layer_norm.default +cnt: 13, ((T([10, 204, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +cnt: 18, ((T([10, 22, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 512], f16), [512], T([10, 22, 1], f32), T([10, 22, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +cnt: 13, ((T([10, 204, 512], f16), T([10, 204, 512], f16), [512], T([10, 204, 1], f32), T([10, 204, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {}) +Operator: aten.ne.Scalar +cnt: 10, ((T([21], i64), -1), {}) +cnt: 1, ((T([10, 22], i64), 2), {}) +Operator: aten.new_ones.default +cnt: 2, ((T([10, 204, 320], f16), [10, 204]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([10, 204, 512], f16), [10, 204]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.relu.default +cnt: 6, ((T([10, 204, 2048], f16),), {}) +cnt: 6, ((T([10, 22, 2048], f16),), {}) +Operator: aten.repeat.default +cnt: 6, ((T([10, 204, 204], b8, stride=(204, 0, 1)), [8, 1, 1]), {}) +cnt: 6, ((T([10, 22, 22], b8), [8, 1, 1]), {}) +cnt: 6, ((T([10, 22, 204], b8, stride=(204, 0, 1)), [8, 1, 1]), {}) +Operator: aten.sum.SymInt +cnt: 42, ((T([220, 512], f16), [0], True), {}) +cnt: 6, ((T([220, 2048], f16), [0], True), {}) +cnt: 43, ((T([2040, 512], f16), [0], True), {}) +cnt: 6, ((T([2040, 2048], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([10, 22, 1014], f16),), {}) +cnt: 1, ((T([10, 22], i64),), {}) +Operator: aten.threshold_backward.default +cnt: 6, ((T([10, 22, 2048], f16), T([10, 22, 2048], f16), 0), {}) +cnt: 6, ((T([10, 204, 2048], f16), T([10, 204, 2048], f16), 0), {}) +Operator: aten.triu.default +cnt: 1, ((T([22, 22], u8), 1), {}) +Operator: aten.unbind.int +cnt: 1, ((T([10, 21], i64),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/squeezenet1_1_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/squeezenet1_1_training.txt new file mode 100644 index 000000000..4e4da308b --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/squeezenet1_1_training.txt @@ -0,0 +1,90 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([32, 64, 13, 13], f16), T([32, 64, 13, 13], f16)), {}) +cnt: 2, ((T([32, 48, 13, 13], f16), T([32, 48, 13, 13], f16)), {}) +cnt: 2, ((T([32, 32, 27, 27], f16), T([32, 32, 27, 27], f16)), {}) +cnt: 2, ((T([32, 16, 55, 55], f16), T([32, 16, 55, 55], f16)), {}) +Operator: aten.cat.default +cnt: 2, (([T([32, 64, 55, 55], f16), T([32, 64, 55, 55], f16)], 1), {}) +cnt: 2, (([T([32, 128, 27, 27], f16), T([32, 128, 27, 27], f16)], 1), {}) +cnt: 2, (([T([32, 192, 13, 13], f16), T([32, 192, 13, 13], f16)], 1), {}) +cnt: 2, (([T([32, 256, 13, 13], f16), T([32, 256, 13, 13], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), T([64], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 55, 55], f16), T([16, 64, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 16, 55, 55], f16), T([64, 16, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 16, 55, 55], f16), T([64, 16, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 55, 55], f16), T([16, 128, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 27, 27], f16), T([32, 128, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 32, 27, 27], f16), T([128, 32, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 32, 27, 27], f16), T([128, 32, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 27, 27], f16), T([32, 256, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 13, 13], f16), T([48, 256, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 48, 13, 13], f16), T([192, 48, 1, 1], f16), T([192], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 48, 13, 13], f16), T([192, 48, 3, 3], f16), T([192], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 384, 13, 13], f16), T([48, 384, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 384, 13, 13], f16), T([64, 384, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 64, 13, 13], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 64, 13, 13], f16), T([256, 64, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 13, 13], f16), T([64, 512, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 13, 13], f16), T([1000, 512, 1, 1], f16), T([1000], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1000, 13, 13], f16), T([32, 512, 13, 13], f16), T([1000, 512, 1, 1], f16), [1000], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 256, 13, 13], f16), T([32, 64, 13, 13], f16), T([256, 64, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 256, 13, 13], f16), T([32, 64, 13, 13], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 13, 13], f16), T([32, 512, 13, 13], f16), T([64, 512, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 13, 13], f16), T([32, 384, 13, 13], f16), T([64, 384, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 192, 13, 13], f16), T([32, 48, 13, 13], f16), T([192, 48, 3, 3], f16), [192], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 192, 13, 13], f16), T([32, 48, 13, 13], f16), T([192, 48, 1, 1], f16), [192], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 48, 13, 13], f16), T([32, 384, 13, 13], f16), T([48, 384, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 48, 13, 13], f16), T([32, 256, 13, 13], f16), T([48, 256, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 128, 27, 27], f16), T([32, 32, 27, 27], f16), T([128, 32, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 128, 27, 27], f16), T([32, 32, 27, 27], f16), T([128, 32, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 27, 27], f16), T([32, 256, 27, 27], f16), T([32, 256, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 27, 27], f16), T([32, 128, 27, 27], f16), T([32, 128, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 55, 55], f16), T([32, 16, 55, 55], f16), T([64, 16, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 55, 55], f16), T([32, 16, 55, 55], f16), T([64, 16, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 16, 55, 55], f16), T([32, 128, 55, 55], f16), T([16, 128, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 16, 55, 55], f16), T([32, 64, 55, 55], f16), T([16, 64, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 111, 111], f16), T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [64], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 1000, 13, 13], f16, stride=(0, 0, 0, 0)), 169), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 64, 111, 111], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([32, 128, 55, 55], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([32, 256, 27, 27], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 256, 13, 13], f16), T([32, 256, 27, 27], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 256, 13, 13], i64)), {}) +cnt: 1, ((T([32, 128, 27, 27], f16), T([32, 128, 55, 55], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 128, 27, 27], i64)), {}) +cnt: 1, ((T([32, 64, 55, 55], f16), T([32, 64, 111, 111], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 64, 55, 55], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 1000, 13, 13], f16), [-1, -2], True), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 64, 111, 111], f16),), {}) +cnt: 2, ((T([32, 16, 55, 55], f16),), {}) +cnt: 4, ((T([32, 64, 55, 55], f16),), {}) +cnt: 2, ((T([32, 32, 27, 27], f16),), {}) +cnt: 4, ((T([32, 128, 27, 27], f16),), {}) +cnt: 2, ((T([32, 48, 13, 13], f16),), {}) +cnt: 4, ((T([32, 192, 13, 13], f16),), {}) +cnt: 2, ((T([32, 64, 13, 13], f16),), {}) +cnt: 4, ((T([32, 256, 13, 13], f16),), {}) +cnt: 1, ((T([32, 1000, 13, 13], f16),), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([32, 1000, 13, 13], f16), T([32, 1000, 13, 13], f16), 0), {}) +cnt: 4, ((T([32, 256, 13, 13], f16, stride=(86528, 169, 13, 1)), T([32, 256, 13, 13], f16), 0), {}) +cnt: 2, ((T([32, 64, 13, 13], f16), T([32, 64, 13, 13], f16), 0), {}) +cnt: 4, ((T([32, 192, 13, 13], f16, stride=(64896, 169, 13, 1)), T([32, 192, 13, 13], f16), 0), {}) +cnt: 2, ((T([32, 48, 13, 13], f16), T([32, 48, 13, 13], f16), 0), {}) +cnt: 4, ((T([32, 128, 27, 27], f16, stride=(186624, 729, 27, 1)), T([32, 128, 27, 27], f16), 0), {}) +cnt: 2, ((T([32, 32, 27, 27], f16), T([32, 32, 27, 27], f16), 0), {}) +cnt: 4, ((T([32, 64, 55, 55], f16, stride=(387200, 3025, 55, 1)), T([32, 64, 55, 55], f16), 0), {}) +cnt: 2, ((T([32, 16, 55, 55], f16), T([32, 16, 55, 55], f16), 0), {}) +cnt: 1, ((T([32, 64, 111, 111], f16), T([32, 64, 111, 111], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientdet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientdet_training.txt new file mode 100644 index 000000000..873f03659 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientdet_training.txt @@ -0,0 +1,623 @@ +Operator: aten._index_put_impl_.default +cnt: 1, ((T([5000, 1], f32), [T([100], i64)], T([100, 1], f32, stride=(0, 0)), True, True), {}) +cnt: 1, ((T([5000, 4], f32), [T([100], i64)], T([100, 4], f32), True, True), {}) +Operator: aten._to_copy.default +cnt: 1, ((T([5000, 4], f16),), {'dtype': f32}) +cnt: 1, ((T([5000], f16),), {'dtype': f32}) +cnt: 1, ((T([5000], i64),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([], i64),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([100, 1], i64),), {'dtype': f32}) +cnt: 1, ((T([5000], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([5000, 4], f32),), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten._unsafe_view.default +cnt: 1, ((T([1, 80, 80, 810], f16), [1, 57600, 90]), {}) +cnt: 1, ((T([1, 40, 40, 810], f16), [1, 14400, 90]), {}) +cnt: 1, ((T([1, 20, 20, 810], f16), [1, 3600, 90]), {}) +cnt: 1, ((T([1, 10, 10, 810], f16), [1, 900, 90]), {}) +cnt: 1, ((T([1, 5, 5, 810], f16), [1, 225, 90]), {}) +cnt: 1, ((T([1, 80, 80, 36], f16), [1, 57600, 4]), {}) +cnt: 1, ((T([1, 40, 40, 36], f16), [1, 14400, 4]), {}) +cnt: 1, ((T([1, 20, 20, 36], f16), [1, 3600, 4]), {}) +cnt: 1, ((T([1, 10, 10, 36], f16), [1, 900, 4]), {}) +cnt: 1, ((T([1, 5, 5, 36], f16), [1, 225, 4]), {}) +Operator: aten.add.Scalar +cnt: 1, ((T([100, 1], i64), 1), {}) +Operator: aten.add.Tensor +cnt: 3, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16)), {}) +cnt: 4, ((T([1, 24, 160, 160], f16), T([1, 24, 160, 160], f16)), {}) +cnt: 5, ((T([1, 40, 80, 80], f16), T([1, 40, 80, 80], f16)), {}) +cnt: 6, ((T([1, 80, 40, 40], f16), T([1, 80, 40, 40], f16)), {}) +cnt: 8, ((T([1, 112, 40, 40], f16), T([1, 112, 40, 40], f16)), {}) +cnt: 8, ((T([1, 192, 20, 20], f16), T([1, 192, 20, 20], f16)), {}) +cnt: 4, ((T([1, 320, 20, 20], f16), T([1, 320, 20, 20], f16)), {}) +cnt: 76, ((T([], f16), 0.0001), {}) +cnt: 2, ((T([5000], f16, stride=(4,)), T([5000], f16, stride=(4,))), {}) +cnt: 2, ((T([5000], f32), T([5000], f16)), {}) +cnt: 2, ((T([5000], f32), T([5000], f32)), {}) +cnt: 1, ((T([], f32), T([], f32)), {}) +cnt: 1, ((T([5000, 4], f32), T([5000, 1], f32)), {}) +cnt: 2, ((T([5000], f32, stride=(4,)), T([5000], f32, stride=(4,))), {}) +cnt: 2, ((T([5000], f32, stride=(4,)), T([5000], f32)), {}) +cnt: 4, ((T([36, 88, 1, 1], f16), T([36, 88, 1, 1], f16)), {}) +cnt: 4, ((T([36], f16), T([36], f16)), {}) +cnt: 32, ((T([88, 1, 3, 3], f16), T([88, 1, 3, 3], f16)), {}) +cnt: 24, ((T([88, 88, 1, 1], f16), T([88, 88, 1, 1], f16)), {}) +cnt: 24, ((T([88], f16), T([88], f16)), {}) +cnt: 5, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16)), {}) +cnt: 4, ((T([810, 88, 1, 1], f16), T([810, 88, 1, 1], f16)), {}) +cnt: 4, ((T([810], f16), T([810], f16)), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16)), {}) +cnt: 12, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16)), {}) +cnt: 12, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16)), {}) +cnt: 5, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16)), {}) +cnt: 44, ((T([], f16), T([], f16)), {}) +cnt: 20, ((T([2], f16), T([2], f16)), {}) +cnt: 20, ((T([2], f16), T([2], f16, stride=(0,))), {}) +cnt: 24, ((T([3], f16), T([3], f16)), {}) +cnt: 12, ((T([3], f16), T([3], f16, stride=(0,))), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([1, 1920, 20, 20], f16)), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16)), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([1, 672, 20, 20], f16)), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), T([1, 672, 40, 40], f16)), {}) +cnt: 4, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16)), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([1, 240, 40, 40], f16)), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), T([1, 240, 80, 80], f16)), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([1, 144, 80, 80], f16)), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), T([1, 144, 160, 160], f16)), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([1, 96, 160, 160], f16)), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([1, 32, 320, 320], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([1, 57600, 90], f16), T([1, 14400, 90], f16), T([1, 3600, 90], f16), T([1, 900, 90], f16), T([1, 225, 90], f16)], 1), {}) +cnt: 1, (([T([1, 57600, 4], f16), T([1, 14400, 4], f16), T([1, 3600, 4], f16), T([1, 900, 4], f16), T([1, 225, 4], f16)], 1), {}) +cnt: 1, (([T([2], f16), T([2], f16)],), {}) +cnt: 1, (([T([100, 4], f32), T([100, 1], f32), T([100, 1], f32)], 1), {}) +Operator: aten.clamp.default +cnt: 1, ((T([5000, 4], f32), 0), {}) +Operator: aten.clone.default +cnt: 1, ((T([1, 3, 640, 640], f16),), {}) +cnt: 2, ((T([1, 32, 320, 320], f16),), {}) +cnt: 1, ((T([1, 8, 1, 1], f16),), {}) +cnt: 1, ((T([1, 16, 320, 320], f16),), {}) +cnt: 2, ((T([1, 4, 1, 1], f16),), {}) +cnt: 1, ((T([1, 96, 320, 320], f16),), {}) +cnt: 1, ((T([1, 96, 160, 160], f16),), {}) +cnt: 5, ((T([1, 144, 160, 160], f16),), {}) +cnt: 3, ((T([1, 6, 1, 1], f16),), {}) +cnt: 1, ((T([1, 144, 80, 80], f16),), {}) +cnt: 5, ((T([1, 240, 80, 80], f16),), {}) +cnt: 3, ((T([1, 10, 1, 1], f16),), {}) +cnt: 1, ((T([1, 240, 40, 40], f16),), {}) +cnt: 8, ((T([1, 480, 40, 40], f16),), {}) +cnt: 4, ((T([1, 20, 1, 1], f16),), {}) +cnt: 7, ((T([1, 672, 40, 40], f16),), {}) +cnt: 4, ((T([1, 28, 1, 1], f16),), {}) +cnt: 1, ((T([1, 672, 20, 20], f16),), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16),), {}) +cnt: 5, ((T([1, 48, 1, 1], f16),), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16),), {}) +cnt: 1, ((T([1, 80, 1, 1], f16),), {}) +cnt: 14, ((T([1, 88, 10, 10], f16),), {}) +cnt: 14, ((T([1, 88, 20, 20], f16),), {}) +cnt: 14, ((T([1, 88, 40, 40], f16),), {}) +cnt: 10, ((T([1, 88, 80, 80], f16),), {}) +cnt: 10, ((T([1, 88, 5, 5], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([1, 3, 640, 640], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([1, 96, 320, 320], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([1, 144, 160, 160], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 1, ((T([1, 240, 80, 80], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([1, 672, 40, 40], f16), [1, 2, 1, 2], 0.0), {}) +cnt: 5, ((T([1, 88, 20, 20], f16), [0, 1, 0, 1], -inf), {}) +cnt: 5, ((T([1, 88, 10, 10], f16), [0, 1, 0, 1], -inf), {}) +cnt: 4, ((T([1, 88, 80, 80], f16), [0, 1, 0, 1], -inf), {}) +cnt: 4, ((T([1, 88, 40, 40], f16), [0, 1, 0, 1], -inf), {}) +cnt: 5, ((T([1, 88, 11, 11], f16), [0, -1, 0, -1]), {}) +cnt: 5, ((T([1, 88, 21, 21], f16), [0, -1, 0, -1]), {}) +cnt: 4, ((T([1, 88, 41, 41], f16), [0, -1, 0, -1]), {}) +cnt: 4, ((T([1, 88, 81, 81], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([1, 672, 43, 43], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([1, 240, 81, 81], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([1, 144, 163, 163], f16), [-1, -2, -1, -2]), {}) +cnt: 1, ((T([1, 96, 321, 321], f16), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([1, 3, 641, 641], f16), T([32, 3, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([1, 32, 1, 1], f16), T([8, 32, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 8, 1, 1], f16), T([32, 8, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([16, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 16), {}) +cnt: 1, ((T([1, 16, 1, 1], f16), T([4, 16, 1, 1], f16), T([4], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 4, 1, 1], f16), T([16, 4, 1, 1], f16), T([16], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([16, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 96, 321, 321], f16), T([96, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([1, 96, 1, 1], f16), T([4, 96, 1, 1], f16), T([4], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 4, 1, 1], f16), T([96, 4, 1, 1], f16), T([96], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 24, 160, 160], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 3, ((T([1, 144, 1, 1], f16), T([6, 144, 1, 1], f16), T([6], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 6, 1, 1], f16), T([144, 6, 1, 1], f16), T([144], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 144, 163, 163], f16), T([144, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([40, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 40, 80, 80], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), T([240, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 3, ((T([1, 240, 1, 1], f16), T([10, 240, 1, 1], f16), T([10], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 10, 1, 1], f16), T([240, 10, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 240, 81, 81], f16), T([240, 1, 3, 3], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 80, 40, 40], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 480, 40, 40], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 4, ((T([1, 480, 1, 1], f16), T([20, 480, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 20, 1, 1], f16), T([480, 20, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 480, 40, 40], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 480, 40, 40], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([1, 480, 40, 40], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 112, 40, 40], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 4, ((T([1, 672, 1, 1], f16), T([28, 672, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 28, 1, 1], f16), T([672, 28, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 672, 43, 43], f16), T([672, 1, 5, 5], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([192, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([1, 192, 20, 20], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 1152, 20, 20], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 5, ((T([1, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([1, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), T([1152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([1, 1152, 20, 20], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 1152, 20, 20], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([1, 1152, 20, 20], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 320, 20, 20], f16), T([1920, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([1920, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1920), {}) +cnt: 1, ((T([1, 1920, 1, 1], f16), T([80, 1920, 1, 1], f16), T([80], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 80, 1, 1], f16), T([1920, 80, 1, 1], f16), T([1920], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([320, 1920, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([1, 320, 20, 20], f16), T([88, 320, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([1, 88, 10, 10], f16), T([88, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 88), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([88, 88, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([1, 88, 20, 20], f16), T([88, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 88), {}) +cnt: 14, ((T([1, 88, 20, 20], f16), T([88, 88, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([1, 112, 40, 40], f16), T([88, 112, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 16, ((T([1, 88, 40, 40], f16), T([88, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 88), {}) +cnt: 14, ((T([1, 88, 40, 40], f16), T([88, 88, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 40, 80, 80], f16), T([88, 40, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([1, 88, 80, 80], f16), T([88, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 88), {}) +cnt: 10, ((T([1, 88, 80, 80], f16), T([88, 88, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 12, ((T([1, 88, 5, 5], f16), T([88, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 88), {}) +cnt: 10, ((T([1, 88, 5, 5], f16), T([88, 88, 1, 1], f16), T([88], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 80, 80], f16), T([810, 88, 1, 1], f16), T([810], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 40, 40], f16), T([810, 88, 1, 1], f16), T([810], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 20, 20], f16), T([810, 88, 1, 1], f16), T([810], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 10, 10], f16), T([810, 88, 1, 1], f16), T([810], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 5, 5], f16), T([810, 88, 1, 1], f16), T([810], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 80, 80], f16), T([36, 88, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 40, 40], f16), T([36, 88, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 20, 20], f16), T([36, 88, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 10, 10], f16), T([36, 88, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([1, 88, 5, 5], f16), T([36, 88, 1, 1], f16), T([36], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([1, 36, 5, 5], f16, stride=(900, 1, 180, 36)), T([1, 88, 5, 5], f16), T([36, 88, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 12, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16), T([88, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 88, [True, True, False]), {}) +cnt: 10, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16), T([88, 88, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 36, 10, 10], f16, stride=(3600, 1, 360, 36)), T([1, 88, 10, 10], f16), T([36, 88, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 16, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16), T([88, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 88, [True, True, False]), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16), T([88, 88, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 36, 20, 20], f16, stride=(14400, 1, 720, 36)), T([1, 88, 20, 20], f16), T([36, 88, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 16, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16), T([88, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 88, [True, True, False]), {}) +cnt: 14, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16), T([88, 88, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 36, 40, 40], f16, stride=(57600, 1, 1440, 36)), T([1, 88, 40, 40], f16), T([36, 88, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 16, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16), T([88, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 88, [True, True, False]), {}) +cnt: 14, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16), T([88, 88, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 36, 80, 80], f16, stride=(230400, 1, 2880, 36)), T([1, 88, 80, 80], f16), T([36, 88, 1, 1], f16), [36], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 12, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16), T([88, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 88, [True, True, False]), {}) +cnt: 10, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16), T([88, 88, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 810, 5, 5], f16, stride=(20250, 1, 4050, 810)), T([1, 88, 5, 5], f16), T([810, 88, 1, 1], f16), [810], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 810, 10, 10], f16, stride=(81000, 1, 8100, 810)), T([1, 88, 10, 10], f16), T([810, 88, 1, 1], f16), [810], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 810, 20, 20], f16, stride=(324000, 1, 16200, 810)), T([1, 88, 20, 20], f16), T([810, 88, 1, 1], f16), [810], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 810, 40, 40], f16, stride=(1296000, 1, 32400, 810)), T([1, 88, 40, 40], f16), T([810, 88, 1, 1], f16), [810], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 810, 80, 80], f16, stride=(5184000, 1, 64800, 810)), T([1, 88, 80, 80], f16), T([810, 88, 1, 1], f16), [810], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([1, 88, 20, 20], f16), T([1, 320, 20, 20], f16), T([88, 320, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([1, 88, 40, 40], f16), T([1, 112, 40, 40], f16), T([88, 112, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 88, 80, 80], f16), T([1, 40, 80, 80], f16), T([88, 40, 1, 1], f16), [88], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 320, 20, 20], f16), T([1, 1920, 20, 20], f16), T([320, 1920, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 1920, 1, 1], f16), T([1, 80, 1, 1], f16), T([1920, 80, 1, 1], f16), [1920], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 80, 1, 1], f16), T([1, 1920, 1, 1], f16), T([80, 1920, 1, 1], f16), [80], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([1, 1920, 20, 20], f16), T([1920, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1920, [True, True, False]), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([1, 320, 20, 20], f16), T([1920, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 320, 20, 20], f16), T([1, 1152, 20, 20], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([1, 1152, 1, 1], f16), T([1, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), [1152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([1, 48, 1, 1], f16), T([1, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16), T([1, 192, 20, 20], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([1, 192, 20, 20], f16), T([1, 1152, 20, 20], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([1, 192, 20, 20], f16), T([1, 672, 20, 20], f16), T([192, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([1, 672, 1, 1], f16), T([1, 28, 1, 1], f16), T([672, 28, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([1, 28, 1, 1], f16), T([1, 672, 1, 1], f16), T([28, 672, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([1, 672, 43, 43], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 4, ((T([1, 672, 40, 40], f16), T([1, 112, 40, 40], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 112, 40, 40], f16), T([1, 672, 40, 40], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), T([1, 672, 40, 40], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([1, 112, 40, 40], f16), T([1, 480, 40, 40], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([1, 480, 1, 1], f16), T([1, 20, 1, 1], f16), T([480, 20, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([1, 20, 1, 1], f16), T([1, 480, 1, 1], f16), T([20, 480, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 4, ((T([1, 480, 40, 40], f16), T([1, 80, 40, 40], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 80, 40, 40], f16), T([1, 480, 40, 40], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([1, 80, 40, 40], f16), T([1, 240, 40, 40], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 240, 1, 1], f16), T([1, 10, 1, 1], f16), T([240, 10, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([1, 10, 1, 1], f16), T([1, 240, 1, 1], f16), T([10, 240, 1, 1], f16), [10], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([1, 240, 81, 81], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 3, ((T([1, 240, 80, 80], f16), T([1, 40, 80, 80], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([1, 40, 80, 80], f16), T([1, 240, 80, 80], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), T([1, 240, 80, 80], f16), T([240, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([1, 40, 80, 80], f16), T([1, 144, 80, 80], f16), T([40, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([1, 144, 1, 1], f16), T([1, 6, 1, 1], f16), T([144, 6, 1, 1], f16), [144], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([1, 6, 1, 1], f16), T([1, 144, 1, 1], f16), T([6, 144, 1, 1], f16), [6], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([1, 144, 163, 163], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 3, ((T([1, 144, 160, 160], f16), T([1, 24, 160, 160], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([1, 24, 160, 160], f16), T([1, 144, 160, 160], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), T([1, 144, 160, 160], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([1, 24, 160, 160], f16), T([1, 96, 160, 160], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 96, 1, 1], f16), T([1, 4, 1, 1], f16), T([96, 4, 1, 1], f16), [96], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 4, 1, 1], f16), T([1, 96, 1, 1], f16), T([4, 96, 1, 1], f16), [4], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([1, 96, 321, 321], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([1, 96, 320, 320], f16), T([1, 16, 320, 320], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16), T([16, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 1, 1], f16), T([1, 4, 1, 1], f16), T([16, 4, 1, 1], f16), [16], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 4, 1, 1], f16), T([1, 16, 1, 1], f16), T([4, 16, 1, 1], f16), [4], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16), T([16, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 16, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([1, 32, 320, 320], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([1, 32, 1, 1], f16), T([1, 8, 1, 1], f16), T([32, 8, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 8, 1, 1], f16), T([1, 32, 1, 1], f16), T([8, 32, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([1, 32, 320, 320], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([1, 3, 641, 641], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([1, 3, 640, 640], f16), T([1, 3, 640, 640], f16)), {}) +Operator: aten.div.Scalar +cnt: 2, ((T([5000], f16), 2), {}) +cnt: 2, ((T([5000], f32), 2.0), {}) +cnt: 1, ((T([5000, 4], f32), 2), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16, stride=(1920, 1, 0, 0)), 400), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16, stride=(1152, 1, 0, 0)), 400), {}) +cnt: 1, ((T([1, 672, 20, 20], f16, stride=(672, 1, 0, 0)), 400), {}) +cnt: 3, ((T([1, 672, 40, 40], f16, stride=(672, 1, 0, 0)), 1600), {}) +cnt: 4, ((T([1, 480, 40, 40], f16, stride=(480, 1, 0, 0)), 1600), {}) +cnt: 1, ((T([1, 240, 40, 40], f16, stride=(240, 1, 0, 0)), 1600), {}) +cnt: 2, ((T([1, 240, 80, 80], f16, stride=(240, 1, 0, 0)), 6400), {}) +cnt: 1, ((T([1, 144, 80, 80], f16, stride=(144, 1, 0, 0)), 6400), {}) +cnt: 2, ((T([1, 144, 160, 160], f16, stride=(144, 1, 0, 0)), 25600), {}) +cnt: 1, ((T([1, 96, 160, 160], f16, stride=(96, 1, 0, 0)), 25600), {}) +cnt: 1, ((T([1, 16, 320, 320], f16, stride=(16, 1, 0, 0)), 102400), {}) +cnt: 1, ((T([1, 32, 320, 320], f16, stride=(32, 1, 0, 0)), 102400), {}) +Operator: aten.div.Tensor +cnt: 80, ((T([1, 88, 10, 10], f16), T([], f16)), {}) +cnt: 80, ((T([1, 88, 20, 20], f16), T([], f16)), {}) +cnt: 80, ((T([1, 88, 40, 40], f16), T([], f16)), {}) +cnt: 32, ((T([1, 88, 80, 80], f16), T([], f16)), {}) +cnt: 32, ((T([1, 88, 5, 5], f16), T([], f16)), {}) +cnt: 1, ((T([2], i32), T([], f16)), {}) +cnt: 2, ((T([], f32), 600), {}) +cnt: 2, ((T([5000], f32), T([], f64)), {}) +Operator: aten.eq.Tensor +cnt: 1, ((T([5000, 4], f32), T([4], f16)), {}) +Operator: aten.exp.default +cnt: 2, ((T([5000], f32, stride=(4,)),), {}) +Operator: aten.floor_divide.default +cnt: 1, ((T([1, 5000], i64), 90), {}) +Operator: aten.gather.default +cnt: 1, ((T([1, 76725, 4], f16), 1, T([1, 5000, 4], i64, stride=(5000, 1, 0))), {}) +cnt: 1, ((T([1, 76725, 90], f16), 1, T([1, 5000, 90], i64, stride=(5000, 1, 0))), {}) +cnt: 1, ((T([1, 5000, 90], f16), 2, T([1, 5000, 1], i64)), {}) +Operator: aten.ge.Scalar +cnt: 1, ((T([5000, 4], f32), 0), {}) +Operator: aten.gt.Tensor +cnt: 1, ((T([5000, 4], f32), T([4], f16)), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([76725, 4], f16, stride=(1, 76725)), [T([5000], i64)]), {}) +cnt: 1, ((T([5000, 4], f32), [T([100], i64)]), {}) +cnt: 1, ((T([5000, 1], f32), [T([100], i64)]), {}) +cnt: 1, ((T([5000, 1], i64), [T([100], i64)]), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([5000, 4], f32), T([5000, 4], b8), 0), {}) +Operator: aten.max.default +cnt: 1, ((T([5000, 4], f32),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 5, ((T([1, 88, 21, 21], f16), [3, 3], [2, 2]), {}) +cnt: 5, ((T([1, 88, 11, 11], f16), [3, 3], [2, 2]), {}) +cnt: 4, ((T([1, 88, 81, 81], f16), [3, 3], [2, 2]), {}) +cnt: 4, ((T([1, 88, 41, 41], f16), [3, 3], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 5, ((T([1, 88, 5, 5], f16), T([1, 88, 11, 11], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([1, 88, 5, 5], i64)), {}) +cnt: 5, ((T([1, 88, 10, 10], f16), T([1, 88, 21, 21], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([1, 88, 10, 10], i64)), {}) +cnt: 4, ((T([1, 88, 20, 20], f16), T([1, 88, 41, 41], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([1, 88, 20, 20], i64)), {}) +cnt: 4, ((T([1, 88, 40, 40], f16), T([1, 88, 81, 81], f16), [3, 3], [2, 2], [0, 0], [1, 1], False, T([1, 88, 40, 40], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([1, 32, 320, 320], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), [2, 3], True), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), [2, 3], True), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), [2, 3], True), {}) +cnt: 4, ((T([1, 480, 40, 40], f16), [2, 3], True), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), [2, 3], True), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), [2, 3], True), {}) +Operator: aten.minimum.default +cnt: 1, ((T([5000, 4], f32), T([4], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([1, 32, 320, 320], f16), T([1, 32, 1, 1], f16)), {}) +cnt: 2, ((T([1, 16, 320, 320], f16), T([1, 16, 1, 1], f16)), {}) +cnt: 2, ((T([1, 96, 160, 160], f16), T([1, 96, 1, 1], f16)), {}) +cnt: 4, ((T([1, 144, 160, 160], f16), T([1, 144, 1, 1], f16)), {}) +cnt: 2, ((T([1, 144, 80, 80], f16), T([1, 144, 1, 1], f16)), {}) +cnt: 4, ((T([1, 240, 80, 80], f16), T([1, 240, 1, 1], f16)), {}) +cnt: 2, ((T([1, 240, 40, 40], f16), T([1, 240, 1, 1], f16)), {}) +cnt: 8, ((T([1, 480, 40, 40], f16), T([1, 480, 1, 1], f16)), {}) +cnt: 6, ((T([1, 672, 40, 40], f16), T([1, 672, 1, 1], f16)), {}) +cnt: 2, ((T([1, 672, 20, 20], f16), T([1, 672, 1, 1], f16)), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16), T([1, 1152, 1, 1], f16)), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16), T([1, 1920, 1, 1], f16)), {}) +cnt: 40, ((T([1, 88, 10, 10], f16), T([], f16)), {}) +cnt: 40, ((T([1, 88, 20, 20], f16), T([], f16)), {}) +cnt: 40, ((T([1, 88, 40, 40], f16), T([], f16)), {}) +cnt: 16, ((T([1, 88, 80, 80], f16), T([], f16)), {}) +cnt: 16, ((T([1, 88, 5, 5], f16), T([], f16)), {}) +cnt: 6, ((T([5000], f32), T([5000], f16)), {}) +cnt: 2, ((T([5000], f32, stride=(4,)), T([5000], f16)), {}) +cnt: 1, ((T([5000], f32), T([], f32)), {}) +cnt: 1, ((T([100, 4], f32), T([], f16)), {}) +cnt: 1, ((T([100, 4], f32, stride=(0, 0)), T([], f16)), {}) +cnt: 2, ((T([5000], f32), T([5000], f32)), {}) +cnt: 16, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16)), {}) +cnt: 40, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16)), {}) +cnt: 40, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16)), {}) +cnt: 40, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16)), {}) +cnt: 16, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16)), {}) +cnt: 1, ((T([1, 1920, 20, 20], f16), T([1, 1920, 20, 20], f16)), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16)), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([1, 672, 20, 20], f16)), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), T([1, 672, 40, 40], f16)), {}) +cnt: 4, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16)), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([1, 240, 40, 40], f16)), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), T([1, 240, 80, 80], f16)), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([1, 144, 80, 80], f16)), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), T([1, 144, 160, 160], f16)), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([1, 96, 160, 160], f16)), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16)), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), T([1, 32, 320, 320], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([1, 32, 320, 320], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 0.001), {}) +cnt: 3, ((T([1, 16, 320, 320], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), False, 0.1, 0.001), {}) +cnt: 1, ((T([1, 96, 320, 320], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 0.001), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 0.001), {}) +cnt: 3, ((T([1, 24, 160, 160], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.1, 0.001), {}) +cnt: 5, ((T([1, 144, 160, 160], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 0.001), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 0.001), {}) +cnt: 3, ((T([1, 40, 80, 80], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), False, 0.1, 0.001), {}) +cnt: 5, ((T([1, 240, 80, 80], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.1, 0.001), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.1, 0.001), {}) +cnt: 4, ((T([1, 80, 40, 40], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), False, 0.1, 0.001), {}) +cnt: 8, ((T([1, 480, 40, 40], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.1, 0.001), {}) +cnt: 4, ((T([1, 112, 40, 40], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), False, 0.1, 0.001), {}) +cnt: 7, ((T([1, 672, 40, 40], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 0.001), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 0.001), {}) +cnt: 5, ((T([1, 192, 20, 20], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 0.001), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), False, 0.1, 0.001), {}) +cnt: 2, ((T([1, 320, 20, 20], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.1, 0.001), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f16), False, 0.1, 0.001), {}) +cnt: 17, ((T([1, 88, 20, 20], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f16), False, 0.01, 0.001), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f16), False, 0.01, 0.001), {}) +cnt: 16, ((T([1, 88, 40, 40], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f16), False, 0.01, 0.001), {}) +cnt: 11, ((T([1, 88, 80, 80], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f16), False, 0.01, 0.001), {}) +cnt: 10, ((T([1, 88, 5, 5], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f16), False, 0.01, 0.001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 10, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f32), T([88], f32), False, 0.001, [True, True, True]), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f32), T([88], f32), False, 0.001, [True, True, True]), {}) +cnt: 17, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f32), T([88], f32), False, 0.001, [True, True, True]), {}) +cnt: 16, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f32), T([88], f32), False, 0.001, [True, True, True]), {}) +cnt: 11, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16), T([88], f16), T([88], f16), T([88], f16), T([88], f32), T([88], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([1, 320, 20, 20], f16), T([1, 320, 20, 20], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16), T([1, 1920, 20, 20], f16), T([1920], f16), T([1920], f16), T([1920], f16), T([1920], f32), T([1920], f32), False, 0.001, [True, True, True]), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), False, 0.001, [True, True, True]), {}) +cnt: 5, ((T([1, 192, 20, 20], f16), T([1, 192, 20, 20], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([1, 672, 20, 20], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 0.001, [True, True, True]), {}) +cnt: 7, ((T([1, 672, 40, 40], f16), T([1, 672, 40, 40], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 0.001, [True, True, True]), {}) +cnt: 4, ((T([1, 112, 40, 40], f16), T([1, 112, 40, 40], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), False, 0.001, [True, True, True]), {}) +cnt: 8, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 0.001, [True, True, True]), {}) +cnt: 4, ((T([1, 80, 40, 40], f16), T([1, 80, 40, 40], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([1, 240, 40, 40], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 0.001, [True, True, True]), {}) +cnt: 5, ((T([1, 240, 80, 80], f16), T([1, 240, 80, 80], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([1, 40, 80, 80], f16), T([1, 40, 80, 80], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([1, 144, 80, 80], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 0.001, [True, True, True]), {}) +cnt: 5, ((T([1, 144, 160, 160], f16), T([1, 144, 160, 160], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([1, 24, 160, 160], f16), T([1, 24, 160, 160], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([1, 96, 160, 160], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 0.001, [True, True, True]), {}) +cnt: 1, ((T([1, 96, 320, 320], f16), T([1, 96, 320, 320], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 0.001, [True, True, True]), {}) +cnt: 3, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), False, 0.001, [True, True, True]), {}) +cnt: 2, ((T([1, 32, 320, 320], f16), T([1, 32, 320, 320], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 0.001, [True, True, True]), {}) +Operator: aten.neg.default +cnt: 2, ((T([5000], f32, stride=(4,)),), {}) +cnt: 8, ((T([1, 88, 5, 5], f16),), {}) +cnt: 20, ((T([1, 88, 10, 10], f16),), {}) +cnt: 20, ((T([1, 88, 20, 20], f16),), {}) +cnt: 20, ((T([1, 88, 40, 40], f16),), {}) +cnt: 8, ((T([1, 88, 80, 80], f16),), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([100, 1], f32, stride=(0, 0)), [5000, 1]), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([100, 4], f32), [5000, 4]), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([1, 5000, 1], f16), [1, 5000, 90]), {}) +cnt: 1, ((T([1, 5000, 90], f16), [1, 76725, 90]), {}) +cnt: 1, ((T([1, 5000, 4], f16), [1, 76725, 4]), {}) +Operator: aten.relu.default +cnt: 20, ((T([2], f16),), {}) +cnt: 12, ((T([3], f16),), {}) +Operator: aten.remainder.Scalar +cnt: 1, ((T([1, 5000], i64), 90), {}) +Operator: aten.scatter_add_.default +cnt: 1, ((T([1, 5000, 90], f16), 2, T([1, 5000, 1], i64), T([1, 5000, 1], f16)), {}) +cnt: 1, ((T([1, 76725, 90], f16), 1, T([1, 5000, 90], i64, stride=(5000, 1, 0)), T([1, 5000, 90], f16)), {}) +cnt: 1, ((T([1, 76725, 4], f16), 1, T([1, 5000, 4], i64, stride=(5000, 1, 0)), T([1, 5000, 4], f16)), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([5000, 4], f16), [1, 5000, 4], 0, 0), {}) +cnt: 1, ((T([5000, 1], f16), [1, 5000, 1], 0, 0), {}) +cnt: 20, ((T([], f16), [2], 0, 1), {}) +cnt: 20, ((T([], f16), [2], 0, 0), {}) +cnt: 12, ((T([], f16), [3], 0, 2), {}) +cnt: 12, ((T([], f16), [3], 0, 1), {}) +cnt: 12, ((T([], f16), [3], 0, 0), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([1, 32, 1, 1], f16),), {}) +cnt: 1, ((T([1, 16, 1, 1], f16),), {}) +cnt: 1, ((T([1, 96, 1, 1], f16),), {}) +cnt: 3, ((T([1, 144, 1, 1], f16),), {}) +cnt: 3, ((T([1, 240, 1, 1], f16),), {}) +cnt: 4, ((T([1, 480, 1, 1], f16),), {}) +cnt: 4, ((T([1, 672, 1, 1], f16),), {}) +cnt: 5, ((T([1, 1152, 1, 1], f16),), {}) +cnt: 1, ((T([1, 1920, 1, 1], f16),), {}) +cnt: 1, ((T([5000, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([5000, 1], f16), T([5000, 1], f16)), {}) +cnt: 1, ((T([1, 1920, 1, 1], f16), T([1, 1920, 1, 1], f16)), {}) +cnt: 5, ((T([1, 1152, 1, 1], f16), T([1, 1152, 1, 1], f16)), {}) +cnt: 4, ((T([1, 672, 1, 1], f16), T([1, 672, 1, 1], f16)), {}) +cnt: 4, ((T([1, 480, 1, 1], f16), T([1, 480, 1, 1], f16)), {}) +cnt: 3, ((T([1, 240, 1, 1], f16), T([1, 240, 1, 1], f16)), {}) +cnt: 3, ((T([1, 144, 1, 1], f16), T([1, 144, 1, 1], f16)), {}) +cnt: 1, ((T([1, 96, 1, 1], f16), T([1, 96, 1, 1], f16)), {}) +cnt: 1, ((T([1, 16, 1, 1], f16), T([1, 16, 1, 1], f16)), {}) +cnt: 1, ((T([1, 32, 1, 1], f16), T([1, 32, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 2, ((T([1, 32, 320, 320], f16),), {}) +cnt: 1, ((T([1, 8, 1, 1], f16),), {}) +cnt: 1, ((T([1, 16, 320, 320], f16),), {}) +cnt: 2, ((T([1, 4, 1, 1], f16),), {}) +cnt: 1, ((T([1, 96, 320, 320], f16),), {}) +cnt: 1, ((T([1, 96, 160, 160], f16),), {}) +cnt: 5, ((T([1, 144, 160, 160], f16),), {}) +cnt: 3, ((T([1, 6, 1, 1], f16),), {}) +cnt: 1, ((T([1, 144, 80, 80], f16),), {}) +cnt: 5, ((T([1, 240, 80, 80], f16),), {}) +cnt: 3, ((T([1, 10, 1, 1], f16),), {}) +cnt: 1, ((T([1, 240, 40, 40], f16),), {}) +cnt: 8, ((T([1, 480, 40, 40], f16),), {}) +cnt: 4, ((T([1, 20, 1, 1], f16),), {}) +cnt: 7, ((T([1, 672, 40, 40], f16),), {}) +cnt: 4, ((T([1, 28, 1, 1], f16),), {}) +cnt: 1, ((T([1, 672, 20, 20], f16),), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16),), {}) +cnt: 5, ((T([1, 48, 1, 1], f16),), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16),), {}) +cnt: 1, ((T([1, 80, 1, 1], f16),), {}) +cnt: 14, ((T([1, 88, 10, 10], f16),), {}) +cnt: 14, ((T([1, 88, 20, 20], f16),), {}) +cnt: 14, ((T([1, 88, 40, 40], f16),), {}) +cnt: 10, ((T([1, 88, 80, 80], f16),), {}) +cnt: 10, ((T([1, 88, 5, 5], f16),), {}) +Operator: aten.silu_backward.default +cnt: 10, ((T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16)), {}) +cnt: 14, ((T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16)), {}) +cnt: 14, ((T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16)), {}) +cnt: 14, ((T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16)), {}) +cnt: 10, ((T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16)), {}) +cnt: 1, ((T([1, 80, 1, 1], f16), T([1, 80, 1, 1], f16)), {}) +cnt: 2, ((T([1, 1920, 20, 20], f16), T([1, 1920, 20, 20], f16)), {}) +cnt: 5, ((T([1, 48, 1, 1], f16), T([1, 48, 1, 1], f16)), {}) +cnt: 10, ((T([1, 1152, 20, 20], f16), T([1, 1152, 20, 20], f16)), {}) +cnt: 4, ((T([1, 28, 1, 1], f16), T([1, 28, 1, 1], f16)), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), T([1, 672, 20, 20], f16)), {}) +cnt: 7, ((T([1, 672, 40, 40], f16), T([1, 672, 40, 40], f16)), {}) +cnt: 4, ((T([1, 20, 1, 1], f16), T([1, 20, 1, 1], f16)), {}) +cnt: 8, ((T([1, 480, 40, 40], f16), T([1, 480, 40, 40], f16)), {}) +cnt: 3, ((T([1, 10, 1, 1], f16), T([1, 10, 1, 1], f16)), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), T([1, 240, 40, 40], f16)), {}) +cnt: 5, ((T([1, 240, 80, 80], f16), T([1, 240, 80, 80], f16)), {}) +cnt: 3, ((T([1, 6, 1, 1], f16), T([1, 6, 1, 1], f16)), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), T([1, 144, 80, 80], f16)), {}) +cnt: 5, ((T([1, 144, 160, 160], f16), T([1, 144, 160, 160], f16)), {}) +cnt: 2, ((T([1, 4, 1, 1], f16), T([1, 4, 1, 1], f16)), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), T([1, 96, 160, 160], f16)), {}) +cnt: 1, ((T([1, 96, 320, 320], f16), T([1, 96, 320, 320], f16)), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), T([1, 16, 320, 320], f16)), {}) +cnt: 1, ((T([1, 8, 1, 1], f16), T([1, 8, 1, 1], f16)), {}) +cnt: 2, ((T([1, 32, 320, 320], f16), T([1, 32, 320, 320], f16)), {}) +Operator: aten.stack.default +cnt: 4, (([T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16)], -1), {}) +cnt: 4, (([T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16)], -1), {}) +cnt: 4, (([T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16)], -1), {}) +cnt: 4, (([T([1, 88, 80, 80], f16), T([1, 88, 80, 80], f16)], -1), {}) +cnt: 4, (([T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16), T([1, 88, 40, 40], f16)], -1), {}) +cnt: 4, (([T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16), T([1, 88, 20, 20], f16)], -1), {}) +cnt: 4, (([T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16), T([1, 88, 10, 10], f16)], -1), {}) +cnt: 4, (([T([1, 88, 5, 5], f16), T([1, 88, 5, 5], f16)], -1), {}) +cnt: 2, (([T([5000], f32), T([5000], f32), T([5000], f32), T([5000], f32)], 1), {}) +cnt: 1, (([T([100, 6], f32)],), {}) +Operator: aten.sub.Tensor +cnt: 2, ((T([5000], f16, stride=(4,)), T([5000], f16, stride=(4,))), {}) +cnt: 2, ((T([5000], f32), T([5000], f32)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([1, 1920, 20, 20], f16), [2, 3], True), {}) +cnt: 5, ((T([1, 1152, 20, 20], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 672, 20, 20], f16), [2, 3], True), {}) +cnt: 3, ((T([1, 672, 40, 40], f16), [2, 3], True), {}) +cnt: 4, ((T([1, 480, 40, 40], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 240, 40, 40], f16), [2, 3], True), {}) +cnt: 2, ((T([1, 240, 80, 80], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 144, 80, 80], f16), [2, 3], True), {}) +cnt: 2, ((T([1, 144, 160, 160], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 96, 160, 160], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 16, 320, 320], f16), [2, 3], True), {}) +cnt: 1, ((T([1, 32, 320, 320], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 20, ((T([2], f16),), {}) +cnt: 12, ((T([3], f16),), {}) +cnt: 1, ((T([1, 100, 6], f32),), {}) +cnt: 16, ((T([1, 88, 5, 5], f16),), {}) +cnt: 40, ((T([1, 88, 10, 10], f16),), {}) +cnt: 40, ((T([1, 88, 20, 20], f16),), {}) +cnt: 40, ((T([1, 88, 40, 40], f16),), {}) +cnt: 16, ((T([1, 88, 80, 80], f16),), {}) +Operator: aten.sum.dim_IntList +cnt: 4, ((T([1, 88, 10, 10, 2], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 20, 20, 2], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 40, 40, 2], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 80, 80, 2], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 40, 40, 3], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 20, 20, 3], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 10, 10, 3], f16), [-1]), {}) +cnt: 4, ((T([1, 88, 5, 5, 2], f16), [-1]), {}) +Operator: aten.threshold_backward.default +cnt: 20, ((T([2], f16), T([2], f16), 0), {}) +cnt: 12, ((T([3], f16), T([3], f16), 0), {}) +Operator: aten.topk.default +cnt: 1, ((T([1, 6905250], f16), 5000, 1), {}) +Operator: aten.unbind.int +cnt: 2, ((T([5000, 4], f32), 1), {}) +cnt: 1, ((T([1, 100, 6], f32, stride=(0, 0, 0)),), {}) +cnt: 4, ((T([1, 88, 5, 5, 2], f16, stride=(2200, 25, 5, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 10, 10, 3], f16, stride=(8800, 100, 10, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 20, 20, 3], f16, stride=(35200, 400, 20, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 40, 40, 3], f16, stride=(140800, 1600, 40, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 80, 80, 2], f16, stride=(563200, 6400, 80, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 40, 40, 2], f16, stride=(140800, 1600, 40, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 20, 20, 2], f16, stride=(35200, 400, 20, 1, 0)), -1), {}) +cnt: 4, ((T([1, 88, 10, 10, 2], f16, stride=(8800, 100, 10, 1, 0)), -1), {}) +Operator: aten.upsample_nearest2d.vec +cnt: 4, ((T([1, 88, 5, 5], f16), [10, 10], None), {}) +cnt: 4, ((T([1, 88, 10, 10], f16), [20, 20], None), {}) +cnt: 4, ((T([1, 88, 20, 20], f16), [40, 40], None), {}) +cnt: 4, ((T([1, 88, 40, 40], f16), [80, 80], None), {}) +Operator: aten.upsample_nearest2d_backward.vec +cnt: 4, ((T([1, 88, 80, 80], f16), [80, 80], [1, 88, 40, 40], None), {}) +cnt: 4, ((T([1, 88, 40, 40], f16), [40, 40], [1, 88, 20, 20], None), {}) +cnt: 4, ((T([1, 88, 20, 20], f16), [20, 20], [1, 88, 10, 10], None), {}) +cnt: 4, ((T([1, 88, 10, 10], f16), [10, 10], [1, 88, 5, 5], None), {}) +Operator: aten.where.self +cnt: 1, ((T([5000, 4], b8), T([5000, 4], f32), T([5000, 4], f32)), {}) +cnt: 1, ((T([5000, 4], b8), T([5000, 4], f32), T([], f32)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientnet_training.txt new file mode 100644 index 000000000..1f004ded9 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_efficientnet_training.txt @@ -0,0 +1,295 @@ +Operator: aten.add.Tensor +cnt: 2, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16)), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16)), {}) +cnt: 4, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16)), {}) +cnt: 4, ((T([32, 112, 14, 14], f16), T([32, 112, 14, 14], f16)), {}) +cnt: 6, ((T([32, 192, 7, 7], f16), T([32, 192, 7, 7], f16)), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16)), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([32, 144, 28, 28], f16)), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([32, 144, 56, 56], f16)), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([32, 96, 56, 56], f16)), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 1280], f16), T([1280, 1000], f16, stride=(1, 1280))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +cnt: 2, ((T([32, 32, 112, 112], f16),), {}) +cnt: 1, ((T([32, 8, 1, 1], f16),), {}) +cnt: 1, ((T([32, 96, 112, 112], f16),), {}) +cnt: 1, ((T([32, 96, 56, 56], f16),), {}) +cnt: 1, ((T([32, 4, 1, 1], f16),), {}) +cnt: 3, ((T([32, 144, 56, 56], f16),), {}) +cnt: 2, ((T([32, 6, 1, 1], f16),), {}) +cnt: 1, ((T([32, 144, 28, 28], f16),), {}) +cnt: 3, ((T([32, 240, 28, 28], f16),), {}) +cnt: 2, ((T([32, 10, 1, 1], f16),), {}) +cnt: 1, ((T([32, 240, 14, 14], f16),), {}) +cnt: 6, ((T([32, 480, 14, 14], f16),), {}) +cnt: 3, ((T([32, 20, 1, 1], f16),), {}) +cnt: 5, ((T([32, 672, 14, 14], f16),), {}) +cnt: 3, ((T([32, 28, 1, 1], f16),), {}) +cnt: 1, ((T([32, 672, 7, 7], f16),), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16),), {}) +cnt: 4, ((T([32, 48, 1, 1], f16),), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 32), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([8, 32, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([32, 8, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([16, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([96, 16, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 96, 112, 112], f16), T([96, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 96), {}) +cnt: 1, ((T([32, 96, 1, 1], f16), T([4, 96, 1, 1], f16), T([4], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 4, 1, 1], f16), T([96, 4, 1, 1], f16), T([96], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([24, 96, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([144, 24, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([144, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 144), {}) +cnt: 2, ((T([32, 144, 1, 1], f16), T([6, 144, 1, 1], f16), T([6], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 6, 1, 1], f16), T([144, 6, 1, 1], f16), T([144], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([24, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([144, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 144), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([40, 144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 240), {}) +cnt: 2, ((T([32, 240, 1, 1], f16), T([10, 240, 1, 1], f16), T([10], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 10, 1, 1], f16), T([240, 10, 1, 1], f16), T([240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([40, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([240, 1, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 240), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 480), {}) +cnt: 3, ((T([32, 480, 1, 1], f16), T([20, 480, 1, 1], f16), T([20], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 20, 1, 1], f16), T([480, 20, 1, 1], f16), T([480], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([80, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([480, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 480), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([112, 480, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 112, 14, 14], f16), T([672, 112, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 3, ((T([32, 672, 1, 1], f16), T([28, 672, 1, 1], f16), T([28], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 28, 1, 1], f16), T([672, 28, 1, 1], f16), T([672], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([112, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), None, [2, 2], [2, 2], [1, 1], False, [0, 0], 672), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([192, 672, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), None, [1, 1], [2, 2], [1, 1], False, [0, 0], 1152), {}) +cnt: 4, ((T([32, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), T([48], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), T([1152], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1152), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 320, 7, 7], f16), T([1280, 320, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([32, 1152, 7, 7], f16), T([320, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 1152, 1, 1], f16), T([32, 48, 1, 1], f16), T([1152, 48, 1, 1], f16), [1152], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([32, 48, 1, 1], f16), T([32, 1152, 1, 1], f16), T([48, 1152, 1, 1], f16), [48], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), T([32, 192, 7, 7], f16), T([1152, 192, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 192, 7, 7], f16), T([32, 1152, 7, 7], f16), T([192, 1152, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 1152, [True, True, False]), {}) +cnt: 1, ((T([32, 192, 7, 7], f16), T([32, 672, 7, 7], f16), T([192, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 672, 1, 1], f16), T([32, 28, 1, 1], f16), T([672, 28, 1, 1], f16), [672], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 28, 1, 1], f16), T([32, 672, 1, 1], f16), T([28, 672, 1, 1], f16), [28], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 3, ((T([32, 672, 14, 14], f16), T([32, 112, 14, 14], f16), T([672, 112, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 112, 14, 14], f16), T([32, 672, 14, 14], f16), T([112, 672, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16), T([672, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 672, [True, True, False]), {}) +cnt: 1, ((T([32, 112, 14, 14], f16), T([32, 480, 14, 14], f16), T([112, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 480, 1, 1], f16), T([32, 20, 1, 1], f16), T([480, 20, 1, 1], f16), [480], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([32, 20, 1, 1], f16), T([32, 480, 1, 1], f16), T([20, 480, 1, 1], f16), [20], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), T([32, 80, 14, 14], f16), T([480, 80, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 80, 14, 14], f16), T([32, 480, 14, 14], f16), T([80, 480, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 480, [True, True, False]), {}) +cnt: 1, ((T([32, 80, 14, 14], f16), T([32, 240, 14, 14], f16), T([80, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 240, 1, 1], f16), T([32, 10, 1, 1], f16), T([240, 10, 1, 1], f16), [240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 10, 1, 1], f16), T([32, 240, 1, 1], f16), T([10, 240, 1, 1], f16), [10], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 28, 28], f16), T([240, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 2, ((T([32, 240, 28, 28], f16), T([32, 40, 28, 28], f16), T([240, 40, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([32, 240, 28, 28], f16), T([40, 240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16), T([240, 1, 5, 5], f16), [0], [1, 1], [2, 2], [1, 1], False, [0, 0], 240, [True, True, False]), {}) +cnt: 1, ((T([32, 40, 28, 28], f16), T([32, 144, 28, 28], f16), T([40, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 144, 1, 1], f16), T([32, 6, 1, 1], f16), T([144, 6, 1, 1], f16), [144], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([32, 6, 1, 1], f16), T([32, 144, 1, 1], f16), T([6, 144, 1, 1], f16), [6], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([32, 144, 56, 56], f16), T([144, 1, 5, 5], f16), [0], [2, 2], [2, 2], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 2, ((T([32, 144, 56, 56], f16), T([32, 24, 56, 56], f16), T([144, 24, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 144, 56, 56], f16), T([24, 144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([32, 144, 56, 56], f16), T([144, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 144, [True, True, False]), {}) +cnt: 1, ((T([32, 24, 56, 56], f16), T([32, 96, 56, 56], f16), T([24, 96, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 96, 1, 1], f16), T([32, 4, 1, 1], f16), T([96, 4, 1, 1], f16), [96], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 4, 1, 1], f16), T([32, 96, 1, 1], f16), T([4, 96, 1, 1], f16), [4], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([32, 96, 112, 112], f16), T([96, 1, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 96, [True, True, False]), {}) +cnt: 1, ((T([32, 96, 112, 112], f16), T([32, 16, 112, 112], f16), T([96, 16, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 32, 112, 112], f16), T([16, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32, 8, 1, 1], f16), T([32, 8, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([32, 32, 1, 1], f16), T([8, 32, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32, 1, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 32, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 1280, 7, 7], f16, stride=(1280, 1, 0, 0)), 49), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16, stride=(1152, 1, 0, 0)), 49), {}) +cnt: 1, ((T([32, 672, 7, 7], f16, stride=(672, 1, 0, 0)), 49), {}) +cnt: 2, ((T([32, 672, 14, 14], f16, stride=(672, 1, 0, 0)), 196), {}) +cnt: 3, ((T([32, 480, 14, 14], f16, stride=(480, 1, 0, 0)), 196), {}) +cnt: 1, ((T([32, 240, 14, 14], f16, stride=(240, 1, 0, 0)), 196), {}) +cnt: 1, ((T([32, 240, 28, 28], f16, stride=(240, 1, 0, 0)), 784), {}) +cnt: 1, ((T([32, 144, 28, 28], f16, stride=(144, 1, 0, 0)), 784), {}) +cnt: 1, ((T([32, 144, 56, 56], f16, stride=(144, 1, 0, 0)), 3136), {}) +cnt: 1, ((T([32, 96, 56, 56], f16, stride=(96, 1, 0, 0)), 3136), {}) +cnt: 1, ((T([32, 32, 112, 112], f16, stride=(32, 1, 0, 0)), 12544), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 32, 112, 112], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 1280], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 1280], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 1, 1], f16)), {}) +cnt: 2, ((T([32, 96, 56, 56], f16), T([32, 96, 1, 1], f16)), {}) +cnt: 2, ((T([32, 144, 56, 56], f16), T([32, 144, 1, 1], f16)), {}) +cnt: 2, ((T([32, 144, 28, 28], f16), T([32, 144, 1, 1], f16)), {}) +cnt: 2, ((T([32, 240, 28, 28], f16), T([32, 240, 1, 1], f16)), {}) +cnt: 2, ((T([32, 240, 14, 14], f16), T([32, 240, 1, 1], f16)), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([32, 480, 1, 1], f16)), {}) +cnt: 4, ((T([32, 672, 14, 14], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 2, ((T([32, 672, 7, 7], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([32, 1152, 1, 1], f16)), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16)), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16)), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([32, 144, 28, 28], f16)), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), T([32, 144, 56, 56], f16)), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([32, 96, 56, 56], f16)), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f16), False, 0.1, 1e-05), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 1280, 7, 7], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 320, 7, 7], f16), T([32, 320, 7, 7], f16), T([320], f16), T([320], f16), T([320], f16), T([320], f32), T([320], f32), False, 1e-05, [True, True, True]), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16), T([1152], f16), T([1152], f16), T([1152], f16), T([1152], f32), T([1152], f32), False, 1e-05, [True, True, True]), {}) +cnt: 4, ((T([32, 192, 7, 7], f16), T([32, 192, 7, 7], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16), T([672], f16), T([672], f16), T([672], f16), T([672], f32), T([672], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 112, 14, 14], f16), T([32, 112, 14, 14], f16), T([112], f16), T([112], f16), T([112], f16), T([112], f32), T([112], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16), T([480], f16), T([480], f16), T([480], f16), T([480], f32), T([480], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 80, 14, 14], f16), T([32, 80, 14, 14], f16), T([80], f16), T([80], f16), T([80], f16), T([80], f32), T([80], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16), T([240], f16), T([240], f16), T([240], f16), T([240], f32), T([240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 40, 28, 28], f16), T([32, 40, 28, 28], f16), T([40], f16), T([40], f16), T([40], f16), T([40], f32), T([40], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([32, 144, 28, 28], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 144, 56, 56], f16), T([32, 144, 56, 56], f16), T([144], f16), T([144], f16), T([144], f16), T([144], f32), T([144], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 24, 56, 56], f16), T([32, 24, 56, 56], f16), T([24], f16), T([24], f16), T([24], f16), T([24], f32), T([24], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([32, 96, 56, 56], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 96, 112, 112], f16), T([32, 96, 112, 112], f16), T([96], f16), T([96], f16), T([96], f16), T([96], f32), T([96], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 16, 112, 112], f16), T([32, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([32, 32, 1, 1], f16),), {}) +cnt: 1, ((T([32, 96, 1, 1], f16),), {}) +cnt: 2, ((T([32, 144, 1, 1], f16),), {}) +cnt: 2, ((T([32, 240, 1, 1], f16),), {}) +cnt: 3, ((T([32, 480, 1, 1], f16),), {}) +cnt: 3, ((T([32, 672, 1, 1], f16),), {}) +cnt: 4, ((T([32, 1152, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 4, ((T([32, 1152, 1, 1], f16), T([32, 1152, 1, 1], f16)), {}) +cnt: 3, ((T([32, 672, 1, 1], f16), T([32, 672, 1, 1], f16)), {}) +cnt: 3, ((T([32, 480, 1, 1], f16), T([32, 480, 1, 1], f16)), {}) +cnt: 2, ((T([32, 240, 1, 1], f16), T([32, 240, 1, 1], f16)), {}) +cnt: 2, ((T([32, 144, 1, 1], f16), T([32, 144, 1, 1], f16)), {}) +cnt: 1, ((T([32, 96, 1, 1], f16), T([32, 96, 1, 1], f16)), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16)), {}) +Operator: aten.silu_.default +cnt: 2, ((T([32, 32, 112, 112], f16),), {}) +cnt: 1, ((T([32, 8, 1, 1], f16),), {}) +cnt: 1, ((T([32, 96, 112, 112], f16),), {}) +cnt: 1, ((T([32, 96, 56, 56], f16),), {}) +cnt: 1, ((T([32, 4, 1, 1], f16),), {}) +cnt: 3, ((T([32, 144, 56, 56], f16),), {}) +cnt: 2, ((T([32, 6, 1, 1], f16),), {}) +cnt: 1, ((T([32, 144, 28, 28], f16),), {}) +cnt: 3, ((T([32, 240, 28, 28], f16),), {}) +cnt: 2, ((T([32, 10, 1, 1], f16),), {}) +cnt: 1, ((T([32, 240, 14, 14], f16),), {}) +cnt: 6, ((T([32, 480, 14, 14], f16),), {}) +cnt: 3, ((T([32, 20, 1, 1], f16),), {}) +cnt: 5, ((T([32, 672, 14, 14], f16),), {}) +cnt: 3, ((T([32, 28, 1, 1], f16),), {}) +cnt: 1, ((T([32, 672, 7, 7], f16),), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16),), {}) +cnt: 4, ((T([32, 48, 1, 1], f16),), {}) +cnt: 1, ((T([32, 1280, 7, 7], f16),), {}) +Operator: aten.silu_backward.default +cnt: 1, ((T([32, 1280, 7, 7], f16), T([32, 1280, 7, 7], f16)), {}) +cnt: 4, ((T([32, 48, 1, 1], f16), T([32, 48, 1, 1], f16)), {}) +cnt: 8, ((T([32, 1152, 7, 7], f16), T([32, 1152, 7, 7], f16)), {}) +cnt: 3, ((T([32, 28, 1, 1], f16), T([32, 28, 1, 1], f16)), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), T([32, 672, 7, 7], f16)), {}) +cnt: 5, ((T([32, 672, 14, 14], f16), T([32, 672, 14, 14], f16)), {}) +cnt: 3, ((T([32, 20, 1, 1], f16), T([32, 20, 1, 1], f16)), {}) +cnt: 6, ((T([32, 480, 14, 14], f16), T([32, 480, 14, 14], f16)), {}) +cnt: 2, ((T([32, 10, 1, 1], f16), T([32, 10, 1, 1], f16)), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), T([32, 240, 14, 14], f16)), {}) +cnt: 3, ((T([32, 240, 28, 28], f16), T([32, 240, 28, 28], f16)), {}) +cnt: 2, ((T([32, 6, 1, 1], f16), T([32, 6, 1, 1], f16)), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), T([32, 144, 28, 28], f16)), {}) +cnt: 3, ((T([32, 144, 56, 56], f16), T([32, 144, 56, 56], f16)), {}) +cnt: 1, ((T([32, 4, 1, 1], f16), T([32, 4, 1, 1], f16)), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), T([32, 96, 56, 56], f16)), {}) +cnt: 1, ((T([32, 96, 112, 112], f16), T([32, 96, 112, 112], f16)), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([32, 8, 1, 1], f16)), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 4, ((T([32, 1152, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 672, 7, 7], f16), [2, 3], True), {}) +cnt: 2, ((T([32, 672, 14, 14], f16), [2, 3], True), {}) +cnt: 3, ((T([32, 480, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 240, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 240, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 144, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 144, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 96, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_nfnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_nfnet_training.txt new file mode 100644 index 000000000..c94aacd7f --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_nfnet_training.txt @@ -0,0 +1,289 @@ +Operator: aten.add.Tensor +cnt: 3, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 6, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 18, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 8, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([128, 3072], f16), T([3072, 1000], f16, stride=(1, 3072))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([128, 1536, 12, 12], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([128, 1536, 6, 6], f16), T([128, 1536, 12, 12], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 512, 12, 12], f16), T([128, 512, 24, 24], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 256, 48, 48], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([128, 3, 192, 192], f16),), {}) +cnt: 1, ((T([128, 256, 48, 48], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16),), {}) +Operator: aten.constant_pad_nd.default +cnt: 1, ((T([128, 3, 192, 192], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 12, 12], f16), [0, 1, 0, 1], 0.0), {}) +cnt: 1, ((T([128, 768, 13, 13], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 768, 25, 25], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 256, 49, 49], f16), [0, -1, 0, -1]), {}) +cnt: 1, ((T([128, 64, 97, 97], f16), [0, -1, 0, -1]), {}) +Operator: aten.convolution.default +cnt: 1, ((T([128, 3, 193, 193], f16), T([16, 3, 3, 3], f16), T([16], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([32, 16, 3, 3], f16), T([32], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([64, 32, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 64, 97, 97], f16), T([128, 64, 3, 3], f16), T([128], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([256, 128, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([256, 128, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([512, 256, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([256, 256, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 256, 49, 49], f16), T([256, 128, 3, 3], f16), T([256], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 2), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([512, 256, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 12, 12], f16), T([1536, 512, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 512, 24, 24], f16), T([768, 512, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 25, 25], f16), T([768, 128, 3, 3], f16), T([768], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 6), {}) +cnt: 11, ((T([128, 768, 12, 12], f16), T([768, 128, 3, 3], f16), T([768], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 6, ((T([128, 768, 12, 12], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([128, 768, 1, 1], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([1536, 1536, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 768, 13, 13], f16), T([768, 128, 3, 3], f16), T([768], f16), [2, 2], [0, 0], [1, 1], False, [0, 0], 6), {}) +cnt: 5, ((T([128, 768, 6, 6], f16), T([768, 128, 3, 3], f16), T([768], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 6), {}) +cnt: 3, ((T([128, 768, 6, 6], f16), T([1536, 768, 1, 1], f16), T([1536], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), T([768, 1536, 1, 1], f16), T([768], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([3072, 1536, 1, 1], f16), T([3072], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([128, 3072, 6, 6], f16), T([128, 1536, 6, 6], f16), T([3072, 1536, 1, 1], f16), [3072], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 768, 1, 1], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 9, ((T([128, 768, 1, 1], f16), T([128, 1536, 1, 1], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([128, 768, 6, 6], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 5, ((T([128, 768, 6, 6], f16), T([128, 768, 6, 6], f16), T([768, 128, 3, 3], f16), [768], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 2, ((T([128, 768, 6, 6], f16), T([128, 1536, 6, 6], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 6, 6], f16), T([128, 768, 13, 13], f16), T([768, 128, 3, 3], f16), [768], [2, 2], [0, 0], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 6, ((T([128, 768, 12, 12], f16), T([128, 1536, 12, 12], f16), T([768, 1536, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16), T([1536, 1536, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([128, 768, 12, 12], f16), T([1536, 768, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 11, ((T([128, 768, 12, 12], f16), T([128, 768, 12, 12], f16), T([768, 128, 3, 3], f16), [768], [1, 1], [1, 1], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 12, 12], f16), T([128, 768, 25, 25], f16), T([768, 128, 3, 3], f16), [768], [2, 2], [0, 0], [1, 1], False, [0, 0], 6, [True, True, True]), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), T([128, 512, 24, 24], f16), T([768, 512, 1, 1], f16), [768], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 1536, 12, 12], f16), T([128, 512, 12, 12], f16), T([1536, 512, 1, 1], f16), [1536], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 256, 1, 1], f16), T([512, 256, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([128, 512, 1, 1], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 512, 24, 24], f16), T([128, 256, 24, 24], f16), T([512, 256, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([128, 256, 24, 24], f16), T([128, 256, 24, 24], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 512, 24, 24], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 24, 24], f16), T([128, 256, 49, 49], f16), T([256, 128, 3, 3], f16), [256], [2, 2], [0, 0], [1, 1], False, [0, 0], 2, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16), T([256, 256, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 128, 1, 1], f16), T([256, 128, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 128, 48, 48], f16), T([256, 128, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16), T([128, 128, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 128, 48, 48], f16), T([128, 64, 97, 97], f16), T([128, 64, 3, 3], f16), [128], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), T([128, 32, 96, 96], f16), T([64, 32, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 16, 96, 96], f16), T([32, 16, 3, 3], f16), [32], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 3, 193, 193], f16), T([16, 3, 3, 3], f16), [16], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([128, 3, 192, 192], f16), T([128, 3, 192, 192], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([128, 3072, 6, 6], f16, stride=(3072, 1, 0, 0)), 36), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16, stride=(1536, 1, 0, 0)), 36), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16, stride=(1536, 1, 0, 0)), 144), {}) +cnt: 2, ((T([128, 512, 24, 24], f16, stride=(512, 1, 0, 0)), 576), {}) +cnt: 1, ((T([128, 256, 48, 48], f16, stride=(256, 1, 0, 0)), 2304), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 128000), {}) +Operator: aten.gelu.default +cnt: 1, ((T([128, 16, 96, 96], f16),), {}) +cnt: 1, ((T([128, 32, 96, 96], f16),), {}) +cnt: 1, ((T([128, 64, 96, 96], f16),), {}) +cnt: 4, ((T([128, 128, 48, 48], f16),), {}) +cnt: 2, ((T([128, 256, 48, 48], f16),), {}) +cnt: 5, ((T([128, 256, 24, 24], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 1, ((T([128, 768, 24, 24], f16),), {}) +cnt: 18, ((T([128, 768, 12, 12], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 8, ((T([128, 768, 6, 6], f16),), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16),), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 1, ((T([128, 3072, 6, 6], f16), T([128, 3072, 6, 6], f16)), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), T([128, 768, 6, 6], f16)), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), T([128, 768, 12, 12], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), T([128, 768, 24, 24], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), T([128, 256, 24, 24], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), T([128, 128, 48, 48], f16)), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), T([128, 64, 96, 96], f16)), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), T([128, 32, 96, 96], f16)), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), T([128, 16, 96, 96], f16)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), [2, 3], True), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), T([1000, 3072], f16)), {}) +cnt: 1, ((T([1000, 128], f16, stride=(0, 0)), T([128, 3072], f16)), {}) +Operator: aten.mul.Tensor +cnt: 2, ((T([16, 1, 1, 1], f16), 0.19245008972987526), {}) +cnt: 2, ((T([32, 1, 1, 1], f16), 0.08333333333333333), {}) +cnt: 2, ((T([64, 1, 1, 1], f16), 0.05892556509887896), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.041666666666666664), {}) +cnt: 2, ((T([128, 128, 48, 48], f16), 1.0), {}) +cnt: 4, ((T([256, 1, 1, 1], f16), 0.08838834764831845), {}) +cnt: 2, ((T([128, 1, 1, 1], f16), 0.08838834764831845), {}) +cnt: 4, ((T([128, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 1, 1], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 2.0), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 0.2), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 0.9805806756909201), {}) +cnt: 6, ((T([512, 1, 1, 1], f16), 0.0625), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.0625), {}) +cnt: 8, ((T([256, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), 2.0), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), 0.2), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 0.9805806756909201), {}) +cnt: 2, ((T([256, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 0.9622504486493761), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 2, ((T([768, 1, 1, 1], f16), 0.04419417382415922), {}) +cnt: 36, ((T([768, 1, 1, 1], f16), 0.02946278254943948), {}) +cnt: 18, ((T([1536, 1, 1, 1], f16), 0.03608439182435161), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), 2.0), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9805806756909201), {}) +cnt: 16, ((T([768, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9622504486493761), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9449111825230679), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9284766908852592), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.9128709291752768), {}) +cnt: 2, ((T([128, 1536, 12, 12], f16), 0.8980265101338745), {}) +cnt: 2, ((T([1536, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), 2.0), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), 0.2), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 0.9805806756909201), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 0.9622504486493761), {}) +cnt: 2, ((T([3072, 1, 1, 1], f16), 0.02551551815399144), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 6, 6], f16), T([128, 1536, 6, 6], f16)), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([], f16)), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 1.7015043497085571), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), 1.7015043497085571), {}) +cnt: 12, ((T([128, 1536, 12, 12], f16), T([128, 1536, 12, 12], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([], f16)), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 1.7015043497085571), {}) +cnt: 4, ((T([128, 512, 24, 24], f16), T([128, 512, 24, 24], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([], f16)), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), T([128, 256, 48, 48], f16)), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([], f16)), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 16, 96, 96], f16), 1.7015043497085571), {}) +Operator: aten.mul_.Tensor +cnt: 1, ((T([128, 16, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 32, 96, 96], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 64, 96, 96], f16), 1.7015043497085571), {}) +cnt: 4, ((T([128, 128, 48, 48], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), T([], f16)), {}) +cnt: 2, ((T([128, 256, 48, 48], f16), 1.7015043497085571), {}) +cnt: 5, ((T([128, 256, 24, 24], f16), 1.7015043497085571), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), T([], f16)), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 768, 24, 24], f16), 1.7015043497085571), {}) +cnt: 18, ((T([128, 768, 12, 12], f16), 1.7015043497085571), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), T([], f16)), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), 1.7015043497085571), {}) +cnt: 8, ((T([128, 768, 6, 6], f16), 1.7015043497085571), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), T([], f16)), {}) +cnt: 2, ((T([128, 1536, 6, 6], f16), 1.7015043497085571), {}) +cnt: 1, ((T([128, 3072, 6, 6], f16), 1.7015043497085571), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([1, 16, 27], f16), T([16], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 32, 144], f16), T([32], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 64, 288], f16), T([64], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 576], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 256, 128], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 128, 128], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 2, ((T([1, 128, 1152], f16), T([128], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 3, ((T([1, 512, 256], f16), T([512], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 256], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 4, ((T([1, 256, 1152], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 256, 512], f16), T([256], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 768, 512], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 18, ((T([1, 768, 1152], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 9, ((T([1, 1536, 768], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 8, ((T([1, 768, 1536], f16), T([768], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1536], f16), None, None, None, True, 0.0, 1e-05), {}) +cnt: 1, ((T([1, 3072, 1536], f16), T([3072], f16), None, None, None, True, 0.0, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 1, ((T([1, 3072, 1536], f16), T([1, 3072, 1536], f16), T([3072], f16), None, None, T([3072], f32), T([3072], f32), True, 1e-05, [True, True, False]), {}) +cnt: 9, ((T([1, 1536, 768], f16), T([1, 1536, 768], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 18, ((T([1, 768, 1152], f16), T([1, 768, 1152], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 8, ((T([1, 768, 1536], f16), T([1, 768, 1536], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 1536], f16), T([1, 1536, 1536], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 768, 512], f16), T([1, 768, 512], f16), T([768], f16), None, None, T([768], f32), T([768], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 1536, 512], f16), T([1, 1536, 512], f16), T([1536], f16), None, None, T([1536], f32), T([1536], f32), True, 1e-05, [True, True, False]), {}) +cnt: 3, ((T([1, 512, 256], f16), T([1, 512, 256], f16), T([512], f16), None, None, T([512], f32), T([512], f32), True, 1e-05, [True, True, False]), {}) +cnt: 4, ((T([1, 256, 1152], f16), T([1, 256, 1152], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 512], f16), T([1, 256, 512], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 256, 256], f16), T([1, 256, 256], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 256, 128], f16), T([1, 256, 128], f16), T([256], f16), None, None, T([256], f32), T([256], f32), True, 1e-05, [True, True, False]), {}) +cnt: 2, ((T([1, 128, 1152], f16), T([1, 128, 1152], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 128], f16), T([1, 128, 128], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 128, 576], f16), T([1, 128, 576], f16), T([128], f16), None, None, T([128], f32), T([128], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 64, 288], f16), T([1, 64, 288], f16), T([64], f16), None, None, T([64], f32), T([64], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 32, 144], f16), T([1, 32, 144], f16), T([32], f16), None, None, T([32], f32), T([32], f32), True, 1e-05, [True, True, False]), {}) +cnt: 1, ((T([1, 16, 27], f16), T([1, 16, 27], f16), T([16], f16), None, None, T([16], f32), T([16], f32), True, 1e-05, [True, True, False]), {}) +Operator: aten.relu_.default +cnt: 1, ((T([128, 128, 1, 1], f16),), {}) +cnt: 2, ((T([128, 256, 1, 1], f16),), {}) +cnt: 9, ((T([128, 768, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([128, 256, 1, 1], f16),), {}) +cnt: 2, ((T([128, 512, 1, 1], f16),), {}) +cnt: 9, ((T([128, 1536, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 9, ((T([128, 1536, 1, 1], f16), T([128, 1536, 1, 1], f16)), {}) +cnt: 2, ((T([128, 512, 1, 1], f16), T([128, 512, 1, 1], f16)), {}) +cnt: 1, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([128, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16), [2, 3], True), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16), [2, 3], True), {}) +cnt: 2, ((T([128, 512, 24, 24], f16), [2, 3], True), {}) +cnt: 1, ((T([128, 256, 48, 48], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([128, 1000], f16),), {}) +cnt: 3, ((T([128, 1536, 6, 6], f16),), {}) +cnt: 6, ((T([128, 1536, 12, 12], f16),), {}) +cnt: 2, ((T([128, 512, 24, 24], f16),), {}) +cnt: 1, ((T([128, 256, 48, 48], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 9, ((T([128, 768, 1, 1], f16), T([128, 768, 1, 1], f16), 0), {}) +cnt: 2, ((T([128, 256, 1, 1], f16), T([128, 256, 1, 1], f16), 0), {}) +cnt: 1, ((T([128, 128, 1, 1], f16), T([128, 128, 1, 1], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_regnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_regnet_training.txt new file mode 100644 index 000000000..e67c9e94a --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_regnet_training.txt @@ -0,0 +1,178 @@ +Operator: aten.add.Tensor +cnt: 6, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16)), {}) +cnt: 15, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16)), {}) +cnt: 33, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16)), {}) +cnt: 2, ((T([32, 2240, 7, 7], f16), T([32, 2240, 7, 7], f16)), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2240], f16), T([2240, 1000], f16, stride=(1, 2240))), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([224, 32, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 224, 112, 112], f16), T([224, 112, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([8, 224, 1, 1], f16), T([8], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([224, 8, 1, 1], f16), T([224], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([32, 224, 56, 56], f16), T([224, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([224, 32, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([224, 112, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([56, 224, 1, 1], f16), T([56], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 56, 1, 1], f16), T([224, 56, 1, 1], f16), T([224], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([448, 224, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 448, 56, 56], f16), T([448, 112, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 4), {}) +cnt: 1, ((T([32, 448, 1, 1], f16), T([56, 448, 1, 1], f16), T([56], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 56, 1, 1], f16), T([448, 56, 1, 1], f16), T([448], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 9, ((T([32, 448, 28, 28], f16), T([448, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([448, 224, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 448, 28, 28], f16), T([448, 112, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 4), {}) +cnt: 4, ((T([32, 448, 1, 1], f16), T([112, 448, 1, 1], f16), T([112], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 112, 1, 1], f16), T([448, 112, 1, 1], f16), T([448], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 448, 28, 28], f16), T([896, 448, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([896, 112, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 1, ((T([32, 896, 1, 1], f16), T([112, 896, 1, 1], f16), T([112], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 112, 1, 1], f16), T([896, 112, 1, 1], f16), T([896], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 21, ((T([32, 896, 14, 14], f16), T([896, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 448, 28, 28], f16), T([896, 448, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 10, ((T([32, 896, 14, 14], f16), T([896, 112, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 8), {}) +cnt: 10, ((T([32, 896, 1, 1], f16), T([224, 896, 1, 1], f16), T([224], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 10, ((T([32, 224, 1, 1], f16), T([896, 224, 1, 1], f16), T([896], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 896, 14, 14], f16), T([2240, 896, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([2240, 112, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 20), {}) +cnt: 1, ((T([32, 2240, 1, 1], f16), T([224, 2240, 1, 1], f16), T([224], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([2240, 224, 1, 1], f16), T([2240], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), T([2240, 2240, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 896, 14, 14], f16), T([2240, 896, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 2240, 7, 7], f16), T([32, 896, 14, 14], f16), T([2240, 896, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), T([32, 2240, 7, 7], f16), T([2240, 2240, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2240, 1, 1], f16), T([32, 224, 1, 1], f16), T([2240, 224, 1, 1], f16), [2240], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([32, 2240, 1, 1], f16), T([224, 2240, 1, 1], f16), [224], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), T([32, 2240, 14, 14], f16), T([2240, 112, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 20, [True, True, False]), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([32, 896, 14, 14], f16), T([2240, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 21, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16), T([896, 896, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([32, 896, 1, 1], f16), T([32, 224, 1, 1], f16), T([896, 224, 1, 1], f16), [896], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 10, ((T([32, 224, 1, 1], f16), T([32, 896, 1, 1], f16), T([224, 896, 1, 1], f16), [224], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 10, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16), T([896, 112, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([32, 896, 14, 14], f16), T([32, 448, 28, 28], f16), T([896, 448, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 896, 1, 1], f16), T([32, 112, 1, 1], f16), T([896, 112, 1, 1], f16), [896], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 112, 1, 1], f16), T([32, 896, 1, 1], f16), T([112, 896, 1, 1], f16), [112], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 896, 14, 14], f16), T([32, 896, 28, 28], f16), T([896, 112, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 8, [True, True, False]), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([32, 448, 28, 28], f16), T([896, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 9, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16), T([448, 448, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 448, 1, 1], f16), T([32, 112, 1, 1], f16), T([448, 112, 1, 1], f16), [448], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([32, 112, 1, 1], f16), T([32, 448, 1, 1], f16), T([112, 448, 1, 1], f16), [112], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 4, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16), T([448, 112, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 4, [True, True, False]), {}) +cnt: 1, ((T([32, 448, 28, 28], f16), T([32, 224, 56, 56], f16), T([448, 224, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 448, 1, 1], f16), T([32, 56, 1, 1], f16), T([448, 56, 1, 1], f16), [448], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 56, 1, 1], f16), T([32, 448, 1, 1], f16), T([56, 448, 1, 1], f16), [56], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 448, 28, 28], f16), T([32, 448, 56, 56], f16), T([448, 112, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 4, [True, True, False]), {}) +cnt: 1, ((T([32, 448, 56, 56], f16), T([32, 224, 56, 56], f16), T([448, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16), T([224, 224, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([32, 56, 1, 1], f16), T([224, 56, 1, 1], f16), [224], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 56, 1, 1], f16), T([32, 224, 1, 1], f16), T([56, 224, 1, 1], f16), [56], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16), T([224, 112, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([32, 32, 112, 112], f16), T([224, 32, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 1, 1], f16), T([32, 8, 1, 1], f16), T([224, 8, 1, 1], f16), [224], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([32, 224, 1, 1], f16), T([8, 224, 1, 1], f16), [8], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 224, 56, 56], f16), T([32, 224, 112, 112], f16), T([224, 112, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 112, 112], f16), T([32, 32, 112, 112], f16), T([224, 32, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 2, ((T([32, 2240, 7, 7], f16, stride=(2240, 1, 0, 0)), 49), {}) +cnt: 11, ((T([32, 896, 14, 14], f16, stride=(896, 1, 0, 0)), 196), {}) +cnt: 5, ((T([32, 448, 28, 28], f16, stride=(448, 1, 0, 0)), 784), {}) +cnt: 2, ((T([32, 224, 56, 56], f16, stride=(224, 1, 0, 0)), 3136), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.mean.dim +cnt: 2, ((T([32, 224, 56, 56], f16), [2, 3], True), {}) +cnt: 5, ((T([32, 448, 28, 28], f16), [2, 3], True), {}) +cnt: 11, ((T([32, 896, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 2240], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 2240], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([32, 224, 56, 56], f16), T([32, 224, 1, 1], f16)), {}) +cnt: 10, ((T([32, 448, 28, 28], f16), T([32, 448, 1, 1], f16)), {}) +cnt: 22, ((T([32, 896, 14, 14], f16), T([32, 896, 1, 1], f16)), {}) +cnt: 2, ((T([32, 2240, 7, 7], f16), T([32, 2240, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), T([32, 2240, 7, 7], f16)), {}) +cnt: 11, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16)), {}) +cnt: 5, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16)), {}) +cnt: 2, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 224, 112, 112], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 448, 56, 56], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), False, 0.1, 1e-05), {}) +cnt: 15, ((T([32, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), False, 0.1, 1e-05), {}) +cnt: 33, ((T([32, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 2240, 7, 7], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 3, ((T([32, 2240, 7, 7], f16), T([32, 2240, 7, 7], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f32), T([2240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([32, 2240, 14, 14], f16), T([2240], f16), T([2240], f16), T([2240], f16), T([2240], f32), T([2240], f32), False, 1e-05, [True, True, True]), {}) +cnt: 33, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([32, 896, 28, 28], f16), T([896], f16), T([896], f16), T([896], f16), T([896], f32), T([896], f32), False, 1e-05, [True, True, True]), {}) +cnt: 15, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 448, 56, 56], f16), T([32, 448, 56, 56], f16), T([448], f16), T([448], f16), T([448], f16), T([448], f32), T([448], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 224, 112, 112], f16), T([32, 224, 112, 112], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu.default +cnt: 2, ((T([32, 224, 56, 56], f16),), {}) +cnt: 5, ((T([32, 448, 28, 28], f16),), {}) +cnt: 11, ((T([32, 896, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([32, 32, 112, 112], f16),), {}) +cnt: 1, ((T([32, 224, 112, 112], f16),), {}) +cnt: 3, ((T([32, 224, 56, 56], f16),), {}) +cnt: 1, ((T([32, 8, 1, 1], f16),), {}) +cnt: 2, ((T([32, 56, 1, 1], f16),), {}) +cnt: 1, ((T([32, 448, 56, 56], f16),), {}) +cnt: 9, ((T([32, 448, 28, 28], f16),), {}) +cnt: 5, ((T([32, 112, 1, 1], f16),), {}) +cnt: 1, ((T([32, 896, 28, 28], f16),), {}) +cnt: 21, ((T([32, 896, 14, 14], f16),), {}) +cnt: 11, ((T([32, 224, 1, 1], f16),), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16),), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16),), {}) +Operator: aten.sigmoid.default +cnt: 2, ((T([32, 224, 1, 1], f16),), {}) +cnt: 5, ((T([32, 448, 1, 1], f16),), {}) +cnt: 11, ((T([32, 896, 1, 1], f16),), {}) +cnt: 1, ((T([32, 2240, 1, 1], f16),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([32, 2240, 1, 1], f16), T([32, 2240, 1, 1], f16)), {}) +cnt: 11, ((T([32, 896, 1, 1], f16), T([32, 896, 1, 1], f16)), {}) +cnt: 5, ((T([32, 448, 1, 1], f16), T([32, 448, 1, 1], f16)), {}) +cnt: 2, ((T([32, 224, 1, 1], f16), T([32, 224, 1, 1], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 1, ((T([32, 2240, 7, 7], f16), [2, 3], True), {}) +cnt: 11, ((T([32, 896, 14, 14], f16), [2, 3], True), {}) +cnt: 5, ((T([32, 448, 28, 28], f16), [2, 3], True), {}) +cnt: 2, ((T([32, 224, 56, 56], f16), [2, 3], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([32, 2240, 7, 7], f16), T([32, 2240, 7, 7], f16), 0), {}) +cnt: 11, ((T([32, 224, 1, 1], f16), T([32, 224, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 2240, 14, 14], f16), T([32, 2240, 14, 14], f16), 0), {}) +cnt: 32, ((T([32, 896, 14, 14], f16), T([32, 896, 14, 14], f16), 0), {}) +cnt: 5, ((T([32, 112, 1, 1], f16), T([32, 112, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 896, 28, 28], f16), T([32, 896, 28, 28], f16), 0), {}) +cnt: 14, ((T([32, 448, 28, 28], f16), T([32, 448, 28, 28], f16), 0), {}) +cnt: 2, ((T([32, 56, 1, 1], f16), T([32, 56, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 448, 56, 56], f16), T([32, 448, 56, 56], f16), 0), {}) +cnt: 5, ((T([32, 224, 56, 56], f16), T([32, 224, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 8, 1, 1], f16), T([32, 8, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 224, 112, 112], f16), T([32, 224, 112, 112], f16), 0), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_resnest_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_resnest_training.txt new file mode 100644 index 000000000..31d5de6bf --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_resnest_training.txt @@ -0,0 +1,205 @@ +Operator: aten._softmax.default +cnt: 1, ((T([32, 2, 1, 64], f16), 1, False), {}) +cnt: 1, ((T([32, 2, 1, 128], f16), 1, False), {}) +cnt: 1, ((T([32, 2, 1, 256], f16), 1, False), {}) +cnt: 1, ((T([32, 2, 1, 512], f16), 1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([32, 2, 1, 512], f16), T([32, 2, 1, 512], f16), 1, f16), {}) +cnt: 1, ((T([32, 2, 1, 256], f16), T([32, 2, 1, 256], f16), 1, f16), {}) +cnt: 1, ((T([32, 2, 1, 128], f16), T([32, 2, 1, 128], f16), 1, f16), {}) +cnt: 1, ((T([32, 2, 1, 64], f16), T([32, 2, 1, 64], f16), 1, f16), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([32, 2, 512, 14, 14], f16), T([32, 2, 512, 14, 14], f16, stride=(100352, 0, 196, 14, 1))), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 1, ((T([32, 2, 256, 28, 28], f16), T([32, 2, 256, 28, 28], f16, stride=(200704, 0, 784, 28, 1))), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 1, ((T([32, 2, 128, 56, 56], f16), T([32, 2, 128, 56, 56], f16, stride=(401408, 0, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 1, ((T([32, 2, 64, 56, 56], f16), T([32, 2, 64, 56, 56], f16, stride=(200704, 0, 3136, 56, 1))), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16)), {}) +Operator: aten.add_.Tensor +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16)), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16)), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16)), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 2048], f16), T([2048, 1000], f16, stride=(1, 2048))), {}) +Operator: aten.avg_pool2d.default +cnt: 1, ((T([32, 128, 56, 56], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False), {}) +Operator: aten.avg_pool2d_backward.default +cnt: 1, ((T([32, 1024, 7, 7], f16), T([32, 1024, 14, 14], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 512, 7, 7], f16), T([32, 512, 14, 14], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 256, 14, 14], f16), T([32, 256, 28, 28], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], True, False, None), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([32, 128, 56, 56], f16), [3, 3], [2, 2], [1, 1], False, True, None), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([128, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), T([32], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([128, 32, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([256, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([64, 128, 1, 1], f16), T([64], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([256, 64, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 28, 28], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([512, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([512, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([128, 256, 1, 1], f16), T([128], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([512, 128, 1, 1], f16), T([512], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([1024, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 2), {}) +cnt: 1, ((T([32, 512, 1, 1], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([1024, 256, 1, 1], f16), T([1024], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 1024, 7, 7], f16), T([2048, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 512, 7, 7], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 1, 1], f16), T([32, 256, 1, 1], f16), T([1024, 256, 1, 1], f16), [1024], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([32, 512, 1, 1], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 14, 14], f16), T([1024, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 1024, 14, 14], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 512, 14, 14], f16), T([1024, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 14, 14], f16), T([32, 256, 14, 14], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 1, 1], f16), T([32, 128, 1, 1], f16), T([512, 128, 1, 1], f16), [512], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([32, 256, 1, 1], f16), T([128, 256, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 28, 28], f16), T([512, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 512, 28, 28], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 256, 28, 28], f16), T([512, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 128, 28, 28], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([32, 64, 1, 1], f16), T([256, 64, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([32, 128, 1, 1], f16), T([64, 128, 1, 1], f16), [64], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 128, 56, 56], f16), T([256, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 256, 56, 56], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([32, 64, 56, 56], f16), T([256, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([32, 32, 1, 1], f16), T([128, 32, 1, 1], f16), [128], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), [32], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 64, 56, 56], f16), T([128, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 2, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 32, 112, 112], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 32, 112, 112], f16), T([32, 3, 224, 224], f16), T([32, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 2048, 7, 7], f16, stride=(2048, 1, 0, 0)), 49), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(512, 1, 0, 0)), 196), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(256, 1, 0, 0)), 784), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(128, 1, 0, 0)), 3136), {}) +cnt: 1, ((T([32, 64, 56, 56], f16, stride=(64, 1, 0, 0)), 3136), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 112, 112], f16), [3, 3], [2, 2], [1, 1], [1, 1], False, T([32, 64, 56, 56], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 64, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), [2, 3], True), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 2048], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 2048], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([32, 2, 64, 56, 56], f16), T([32, 2, 64, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 128, 56, 56], f16), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 256, 28, 28], f16), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 512, 14, 14], f16), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 512, 14, 14], f16, stride=(100352, 0, 196, 14, 1)), T([32, 2, 512, 14, 14], f16)), {}) +cnt: 1, ((T([32, 2, 512, 14, 14], f16, stride=(100352, 0, 196, 14, 1)), T([32, 2, 512, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 256, 28, 28], f16, stride=(200704, 0, 784, 28, 1)), T([32, 2, 256, 28, 28], f16)), {}) +cnt: 1, ((T([32, 2, 256, 28, 28], f16, stride=(200704, 0, 784, 28, 1)), T([32, 2, 256, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 128, 56, 56], f16, stride=(401408, 0, 3136, 56, 1)), T([32, 2, 128, 56, 56], f16)), {}) +cnt: 1, ((T([32, 2, 128, 56, 56], f16, stride=(401408, 0, 3136, 56, 1)), T([32, 2, 128, 1, 1], f16)), {}) +cnt: 1, ((T([32, 2, 64, 56, 56], f16, stride=(200704, 0, 3136, 56, 1)), T([32, 2, 64, 56, 56], f16)), {}) +cnt: 1, ((T([32, 2, 64, 56, 56], f16, stride=(200704, 0, 3136, 56, 1)), T([32, 2, 64, 1, 1], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 3, ((T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), T([2048], f16), T([2048], f16), T([2048], f16), T([2048], f32), T([2048], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 3, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 32, 112, 112], f16),), {}) +cnt: 1, ((T([32, 64, 112, 112], f16),), {}) +cnt: 1, ((T([32, 64, 56, 56], f16),), {}) +cnt: 2, ((T([32, 128, 56, 56], f16),), {}) +cnt: 1, ((T([32, 32, 1, 1], f16),), {}) +cnt: 2, ((T([32, 256, 56, 56], f16),), {}) +cnt: 1, ((T([32, 64, 1, 1], f16),), {}) +cnt: 2, ((T([32, 512, 28, 28], f16),), {}) +cnt: 1, ((T([32, 256, 28, 28], f16),), {}) +cnt: 1, ((T([32, 128, 1, 1], f16),), {}) +cnt: 2, ((T([32, 1024, 14, 14], f16),), {}) +cnt: 1, ((T([32, 512, 14, 14], f16),), {}) +cnt: 1, ((T([32, 256, 1, 1], f16),), {}) +cnt: 1, ((T([32, 2048, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 1, ((T([32, 2, 512, 14, 14], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 256, 28, 28], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 128, 56, 56], f16), [3, 4], True), {}) +cnt: 1, ((T([32, 2, 64, 56, 56], f16), [3, 4], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.sum.dim_IntList +cnt: 2, ((T([32, 2, 64, 56, 56], f16), [1]), {}) +cnt: 2, ((T([32, 2, 128, 56, 56], f16), [1]), {}) +cnt: 2, ((T([32, 2, 256, 28, 28], f16), [1]), {}) +cnt: 2, ((T([32, 2, 512, 14, 14], f16), [1]), {}) +Operator: aten.threshold_backward.default +cnt: 1, ((T([32, 2048, 7, 7], f16), T([32, 2048, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 256, 1, 1], f16), T([32, 256, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 1024, 14, 14], f16), T([32, 1024, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 128, 1, 1], f16), T([32, 128, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 64, 1, 1], f16), T([32, 64, 1, 1], f16), 0), {}) +cnt: 2, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 2, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 32, 1, 1], f16), T([32, 32, 1, 1], f16), 0), {}) +cnt: 1, ((T([32, 64, 56, 56], f16), T([32, 64, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) +cnt: 2, ((T([32, 32, 112, 112], f16), T([32, 32, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vision_transformer_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vision_transformer_training.txt new file mode 100644 index 000000000..ed9e7bf69 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vision_transformer_training.txt @@ -0,0 +1,77 @@ +Operator: aten._softmax.default +cnt: 12, ((T([8, 6, 197, 197], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 12, ((T([8, 6, 197, 197], f16), T([8, 6, 197, 197], f16), -1, f16), {}) +Operator: aten._unsafe_view.default +cnt: 36, ((T([8, 6, 197, 64], f16), [48, 197, 64]), {}) +cnt: 12, ((T([8, 6, 64, 197], f16), [48, 64, 197]), {}) +cnt: 12, ((T([48, 197, 197], f16), [8, 6, 197, 197]), {}) +cnt: 12, ((T([48, 197, 64], f16), [8, 6, 197, 64]), {}) +cnt: 12, ((T([8, 197, 6, 64], f16), [8, 197, 384]), {}) +cnt: 12, ((T([8, 197, 3, 6, 64], f16), [8, 197, 1152]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([8, 197, 384], f16), T([1, 197, 384], f16)), {}) +cnt: 48, ((T([8, 197, 384], f16), T([8, 197, 384], f16)), {}) +Operator: aten.addmm.default +cnt: 12, ((T([1152], f16), T([1576, 384], f16), T([384, 1152], f16, stride=(1, 384))), {}) +cnt: 12, ((T([384], f16), T([1576, 384], f16), T([384, 384], f16, stride=(1, 384))), {}) +cnt: 12, ((T([1536], f16), T([1576, 384], f16), T([384, 1536], f16, stride=(1, 384))), {}) +cnt: 12, ((T([384], f16), T([1576, 1536], f16), T([1536, 384], f16, stride=(1, 1536))), {}) +cnt: 1, ((T([1000], f16), T([8, 384], f16, stride=(75648, 1)), T([384, 1000], f16, stride=(1, 384))), {}) +Operator: aten.bmm.default +cnt: 12, ((T([48, 197, 64], f16), T([48, 64, 197], f16)), {}) +cnt: 12, ((T([48, 197, 197], f16), T([48, 197, 64], f16)), {}) +cnt: 12, ((T([48, 197, 197], f16, stride=(38809, 1, 197)), T([48, 197, 64], f16)), {}) +cnt: 12, ((T([48, 197, 64], f16), T([48, 64, 197], f16, stride=(12608, 1, 64))), {}) +cnt: 12, ((T([48, 64, 197], f16, stride=(12608, 1, 64)), T([48, 197, 197], f16)), {}) +cnt: 12, ((T([48, 197, 197], f16), T([48, 197, 64], f16, stride=(12608, 1, 197))), {}) +Operator: aten.cat.default +cnt: 1, (([T([8, 1, 384], f16, stride=(0, 384, 1)), T([8, 196, 384], f16, stride=(75264, 1, 196))], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([8, 3, 224, 224], f16), T([384, 3, 16, 16], f16), T([384], f16), [16, 16], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([8, 384, 14, 14], f16, stride=(75648, 1, 5376, 384)), T([8, 3, 224, 224], f16), T([384, 3, 16, 16], f16), [384], [16, 16], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 3, 224, 224], f16), T([8, 3, 224, 224], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 8000), {}) +Operator: aten.gelu.default +cnt: 12, ((T([8, 197, 1536], f16),), {}) +Operator: aten.gelu_backward.default +cnt: 12, ((T([8, 197, 1536], f16), T([8, 197, 1536], f16)), {}) +Operator: aten.mm.default +cnt: 1, ((T([8, 1000], f16, stride=(0, 0)), T([1000, 384], f16)), {}) +cnt: 1, ((T([1000, 8], f16, stride=(0, 0)), T([8, 384], f16, stride=(75648, 1))), {}) +cnt: 12, ((T([1576, 384], f16), T([384, 1536], f16)), {}) +cnt: 12, ((T([384, 1576], f16, stride=(1, 384)), T([1576, 1536], f16)), {}) +cnt: 12, ((T([1576, 1536], f16), T([1536, 384], f16)), {}) +cnt: 12, ((T([1536, 1576], f16, stride=(1, 1536)), T([1576, 384], f16)), {}) +cnt: 12, ((T([1576, 384], f16), T([384, 384], f16)), {}) +cnt: 12, ((T([384, 1576], f16, stride=(1, 384)), T([1576, 384], f16)), {}) +cnt: 12, ((T([1576, 1152], f16), T([1152, 384], f16)), {}) +cnt: 12, ((T([1152, 1576], f16, stride=(1, 1152)), T([1576, 384], f16)), {}) +Operator: aten.mul.Tensor +cnt: 24, ((T([8, 6, 197, 197], f16), 0.125), {}) +Operator: aten.native_layer_norm.default +cnt: 25, ((T([8, 197, 384], f16), [384], T([384], f16), T([384], f16), 1e-06), {}) +Operator: aten.native_layer_norm_backward.default +cnt: 25, ((T([8, 197, 384], f16), T([8, 197, 384], f16), [384], T([8, 197, 1], f32), T([8, 197, 1], f32), T([384], f16), T([384], f16), [True, True, True]), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([8, 384], f16), [8, 197, 384], 1, 0), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([8, 197, 384], f16), [8, 197, 384], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.stack.default +cnt: 12, (([T([8, 6, 197, 64], f16), T([8, 6, 197, 64], f16, stride=(75648, 12608, 1, 197)), T([8, 6, 197, 64], f16)],), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([8, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 24, ((T([1576, 384], f16), [0], True), {}) +cnt: 12, ((T([1576, 1536], f16), [0], True), {}) +cnt: 12, ((T([1576, 1152], f16), [0], True), {}) +cnt: 1, ((T([8, 197, 384], f16), [0], True), {}) +cnt: 1, ((T([8, 1, 384], f16, stride=(75648, 384, 1)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([8, 1000], f16),), {}) +Operator: aten.unbind.int +cnt: 12, ((T([3, 8, 6, 197, 64], f16, stride=(384, 226944, 64, 1152, 1)),), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vovnet_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vovnet_training.txt new file mode 100644 index 000000000..0ff92b240 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/timm_vovnet_training.txt @@ -0,0 +1,130 @@ +Operator: aten.add.Tensor +cnt: 4, ((T([32, 224, 7, 7], f16, stride=(105056, 49, 7, 1)), T([32, 224, 7, 7], f16)), {}) +cnt: 1, ((T([32, 1024, 7, 7], f16, stride=(105056, 49, 7, 1)), T([32, 1024, 7, 7], f16)), {}) +cnt: 4, ((T([32, 224, 7, 7], f16, stride=(92512, 49, 7, 1)), T([32, 224, 7, 7], f16)), {}) +cnt: 1, ((T([32, 768, 7, 7], f16, stride=(92512, 49, 7, 1)), T([32, 768, 7, 7], f16)), {}) +cnt: 4, ((T([32, 192, 14, 14], f16, stride=(338688, 196, 14, 1)), T([32, 192, 14, 14], f16)), {}) +cnt: 1, ((T([32, 768, 14, 14], f16, stride=(338688, 196, 14, 1)), T([32, 768, 14, 14], f16)), {}) +cnt: 4, ((T([32, 192, 14, 14], f16, stride=(288512, 196, 14, 1)), T([32, 192, 14, 14], f16)), {}) +cnt: 1, ((T([32, 512, 14, 14], f16, stride=(288512, 196, 14, 1)), T([32, 512, 14, 14], f16)), {}) +cnt: 4, ((T([32, 160, 28, 28], f16, stride=(827904, 784, 28, 1)), T([32, 160, 28, 28], f16)), {}) +cnt: 1, ((T([32, 256, 28, 28], f16, stride=(827904, 784, 28, 1)), T([32, 256, 28, 28], f16)), {}) +cnt: 5, ((T([32, 128, 56, 56], f16, stride=(2408448, 3136, 56, 1)), T([32, 128, 56, 56], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1000], f16), T([32, 1024], f16), T([1024, 1000], f16, stride=(1, 1024))), {}) +Operator: aten.cat.default +cnt: 1, (([T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16)], 1), {}) +cnt: 1, (([T([32, 256, 28, 28], f16), T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16)], 1), {}) +cnt: 1, (([T([32, 512, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 768, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16)], 1), {}) +cnt: 1, (([T([32, 768, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16)], 1), {}) +cnt: 1, (([T([32, 1024, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([32, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([32, 128, 56, 56], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 768, 56, 56], f16), T([256, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([160, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([32, 160, 28, 28], f16), T([160, 160, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1056, 28, 28], f16), T([512, 1056, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([192, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 192, 14, 14], f16), T([192, 192, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1472, 14, 14], f16), T([768, 1472, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 768, 14, 14], f16), T([192, 768, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1728, 14, 14], f16), T([768, 1728, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 768, 7, 7], f16), T([224, 768, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 8, ((T([32, 224, 7, 7], f16), T([224, 224, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1888, 7, 7], f16), T([1024, 1888, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 1024, 7, 7], f16), T([224, 1024, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([32, 2144, 7, 7], f16), T([1024, 2144, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([32, 1024, 7, 7], f16), T([32, 2144, 7, 7], f16), T([1024, 2144, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([224, 224, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 7, 7], f16), T([32, 1024, 7, 7], f16), T([224, 1024, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 1024, 7, 7], f16), T([32, 1888, 7, 7], f16), T([1024, 1888, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 224, 7, 7], f16), T([32, 768, 7, 7], f16), T([224, 768, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 768, 14, 14], f16), T([32, 1728, 14, 14], f16), T([768, 1728, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 8, ((T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([192, 192, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 192, 14, 14], f16), T([32, 768, 14, 14], f16), T([192, 768, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 768, 14, 14], f16), T([32, 1472, 14, 14], f16), T([768, 1472, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 192, 14, 14], f16), T([32, 512, 14, 14], f16), T([192, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 1056, 28, 28], f16), T([512, 1056, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), T([160, 160, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 160, 28, 28], f16), T([32, 256, 28, 28], f16), T([160, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 768, 56, 56], f16), T([256, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 128, 56, 56], f16), T([32, 64, 112, 112], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([32, 64, 112, 112], f16), T([32, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([32, 3, 224, 224], f16), T([32, 3, 224, 224], f16)), {}) +Operator: aten.div.Scalar +cnt: 1, ((T([32, 1024, 7, 7], f16, stride=(1024, 1, 0, 0)), 49), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 32000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([32, 256, 56, 56], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +cnt: 1, ((T([32, 768, 14, 14], f16), [3, 3], [2, 2], [0, 0], [1, 1], True), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([32, 768, 7, 7], f16), T([32, 768, 14, 14], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 768, 7, 7], i64)), {}) +cnt: 1, ((T([32, 512, 14, 14], f16), T([32, 512, 28, 28], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 512, 14, 14], i64)), {}) +cnt: 1, ((T([32, 256, 28, 28], f16), T([32, 256, 56, 56], f16), [3, 3], [2, 2], [0, 0], [1, 1], True, T([32, 256, 28, 28], i64)), {}) +Operator: aten.mean.dim +cnt: 1, ((T([32, 1024, 7, 7], f16), [-1, -2], True), {}) +Operator: aten.mm.default +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), T([1000, 1024], f16)), {}) +cnt: 1, ((T([1000, 32], f16, stride=(0, 0)), T([32, 1024], f16)), {}) +Operator: aten.native_batch_norm.default +cnt: 2, ((T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.1, 1e-05), {}) +cnt: 6, ((T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.1, 1e-05), {}) +cnt: 5, ((T([32, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f16), False, 0.1, 1e-05), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.1, 1e-05), {}) +cnt: 10, ((T([32, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), False, 0.1, 1e-05), {}) +cnt: 10, ((T([32, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f16), False, 0.1, 1e-05), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.1, 1e-05), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), T([224], f16), T([224], f16), T([224], f16), T([224], f32), T([224], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 768, 14, 14], f16), T([32, 768, 14, 14], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), False, 1e-05, [True, True, True]), {}) +cnt: 10, ((T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), T([192], f16), T([192], f16), T([192], f16), T([192], f32), T([192], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 1e-05, [True, True, True]), {}) +cnt: 5, ((T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), T([160], f16), T([160], f16), T([160], f16), T([160], f32), T([160], f32), False, 1e-05, [True, True, True]), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 1e-05, [True, True, True]), {}) +cnt: 6, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 1e-05, [True, True, True]), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 1e-05, [True, True, True]), {}) +Operator: aten.relu_.default +cnt: 2, ((T([32, 64, 112, 112], f16),), {}) +cnt: 6, ((T([32, 128, 56, 56], f16),), {}) +cnt: 1, ((T([32, 256, 56, 56], f16),), {}) +cnt: 5, ((T([32, 160, 28, 28], f16),), {}) +cnt: 1, ((T([32, 512, 28, 28], f16),), {}) +cnt: 10, ((T([32, 192, 14, 14], f16),), {}) +cnt: 2, ((T([32, 768, 14, 14], f16),), {}) +cnt: 10, ((T([32, 224, 7, 7], f16),), {}) +cnt: 2, ((T([32, 1024, 7, 7], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([32, 1000], f16, stride=(0, 0)), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([32, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([32, 1024, 7, 7], f16), T([32, 1024, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 224, 7, 7], f16, stride=(105056, 49, 7, 1)), T([32, 224, 7, 7], f16), 0), {}) +cnt: 8, ((T([32, 224, 7, 7], f16), T([32, 224, 7, 7], f16), 0), {}) +cnt: 1, ((T([32, 224, 7, 7], f16, stride=(92512, 49, 7, 1)), T([32, 224, 7, 7], f16), 0), {}) +cnt: 2, ((T([32, 768, 14, 14], f16), T([32, 768, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 192, 14, 14], f16, stride=(338688, 196, 14, 1)), T([32, 192, 14, 14], f16), 0), {}) +cnt: 8, ((T([32, 192, 14, 14], f16), T([32, 192, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 192, 14, 14], f16, stride=(288512, 196, 14, 1)), T([32, 192, 14, 14], f16), 0), {}) +cnt: 1, ((T([32, 512, 28, 28], f16), T([32, 512, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 160, 28, 28], f16, stride=(827904, 784, 28, 1)), T([32, 160, 28, 28], f16), 0), {}) +cnt: 4, ((T([32, 160, 28, 28], f16), T([32, 160, 28, 28], f16), 0), {}) +cnt: 1, ((T([32, 256, 56, 56], f16), T([32, 256, 56, 56], f16), 0), {}) +cnt: 1, ((T([32, 128, 56, 56], f16, stride=(2408448, 3136, 56, 1)), T([32, 128, 56, 56], f16), 0), {}) +cnt: 5, ((T([32, 128, 56, 56], f16), T([32, 128, 56, 56], f16), 0), {}) +cnt: 2, ((T([32, 64, 112, 112], f16), T([32, 64, 112, 112], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/tts_angular_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/tts_angular_training.txt new file mode 100644 index 000000000..847934aa9 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/tts_angular_training.txt @@ -0,0 +1,51 @@ +Operator: aten._cudnn_rnn.default +cnt: 1, ((T([64, 50, 40], f16), [T([3072, 40], f16), T([3072, 768], f16), T([3072], f16), T([3072], f16)], 4, None, T([1, 64, 768], f16), T([1, 64, 768], f16), 2, 768, 0, 1, True, 0.0, True, False, [], None), {}) +cnt: 2, ((T([64, 50, 256], f16), [T([3072, 256], f16), T([3072, 768], f16), T([3072], f16), T([3072], f16)], 4, None, T([1, 64, 768], f16), T([1, 64, 768], f16), 2, 768, 0, 1, True, 0.0, True, False, [], None), {}) +Operator: aten._cudnn_rnn_backward.default +cnt: 2, ((T([64, 50, 256], f16), [T([3072, 256], f16), T([3072, 768], f16), T([3072], f16), T([3072], f16)], 4, T([3151872], f16), T([1, 64, 768], f16), T([1, 64, 768], f16), T([64, 50, 768], f16, stride=(768, 49152, 1)), T([64, 50, 768], f16), None, None, 2, 768, 0, 1, True, 0.0, True, False, [], None, T([24576016], u8), [True, False, False, True]), {}) +cnt: 1, ((T([64, 50, 40], f16), [T([3072, 40], f16), T([3072, 768], f16), T([3072], f16), T([3072], f16)], 4, T([2488320], f16), T([1, 64, 768], f16), T([1, 64, 768], f16), T([64, 50, 768], f16, stride=(768, 49152, 1)), T([64, 50, 768], f16), None, None, 2, 768, 0, 1, True, 0.0, True, False, [], None, T([24576016], u8), [False, False, False, True]), {}) +Operator: aten._unsafe_view.default +cnt: 3, ((T([64, 50, 768], f16), [3200, 768]), {}) +cnt: 3, ((T([3200, 256], f16), [64, 50, 256]), {}) +cnt: 2, ((T([64, 50, 256], f16), [3200, 256]), {}) +Operator: aten.add.Tensor +cnt: 1, ((T([64, 256], f16), T([64, 256], f16)), {}) +Operator: aten.clamp_min.default +cnt: 1, ((T([64, 1], f16), 1e-12), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 50, 40], f16),), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 50, 40], f16), T([64, 50, 40], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([64, 256], f16, stride=(12800, 1)), T([64, 256], f16, stride=(1, 0))), {}) +cnt: 2, ((T([], f16), 16384), {}) +cnt: 1, ((T([64, 256], f16), T([64, 256], f16, stride=(1, 0))), {}) +cnt: 1, ((T([64, 256], f16, stride=(0, 0)), T([64, 256], f16, stride=(1, 0))), {}) +cnt: 1, ((T([64, 256], f16, stride=(12800, 1)), T([64, 1], f16)), {}) +Operator: aten.eq.Scalar +cnt: 1, ((T([64, 1], f16), 0), {}) +Operator: aten.ge.Scalar +cnt: 1, ((T([64, 1], f16), 1e-12), {}) +Operator: aten.masked_fill_.Scalar +cnt: 1, ((T([64, 256], f16), T([64, 1], b8), 0), {}) +Operator: aten.mm.default +cnt: 3, ((T([3200, 768], f16), T([768, 256], f16, stride=(1, 768))), {}) +cnt: 3, ((T([256, 3200], f16, stride=(1, 256)), T([3200, 768], f16)), {}) +cnt: 3, ((T([3200, 256], f16), T([256, 768], f16)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([64, 256], f16), T([64, 256], f16)), {}) +cnt: 1, ((T([64, 1], f16), T([64, 256], f16)), {}) +Operator: aten.neg.default +cnt: 1, ((T([64, 256], f16, stride=(0, 0)),), {}) +Operator: aten.norm.ScalarOpt_dim +cnt: 1, ((T([64, 256], f16, stride=(12800, 1)), 2, [1], True), {}) +Operator: aten.select_backward.default +cnt: 1, ((T([64, 256], f16), [64, 50, 256], 1, -1), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([64, 50, 256], f16), [64, 50, 256], 0, 0, 9223372036854775807, 1), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 256], f16), [1], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([64, 256], f16),), {}) +Operator: aten.where.self +cnt: 1, ((T([64, 1], b8), T([64, 1], f16), T([], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vgg16_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vgg16_training.txt new file mode 100644 index 000000000..cc96188bb --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vgg16_training.txt @@ -0,0 +1,72 @@ +Operator: aten._adaptive_avg_pool2d.default +cnt: 1, ((T([64, 512, 7, 7], f16), [7, 7]), {}) +Operator: aten._adaptive_avg_pool2d_backward.default +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 7, 7], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([4096], f16), T([64, 25088], f16), T([25088, 4096], f16, stride=(1, 25088))), {}) +cnt: 1, ((T([4096], f16), T([64, 4096], f16), T([4096, 4096], f16, stride=(1, 4096))), {}) +cnt: 1, ((T([1000], f16), T([64, 4096], f16), T([4096, 1000], f16, stride=(1, 4096))), {}) +Operator: aten.clone.default +cnt: 1, ((T([64, 3, 224, 224], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 224, 224], f16), T([64, 64, 3, 3], f16), T([64], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([128, 64, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 112, 112], f16), T([128, 128, 3, 3], f16), T([128], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([256, 128, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 256, 56, 56], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([512, 256, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([64, 512, 28, 28], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), T([512], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 3, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), T([512, 512, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), T([64, 256, 28, 28], f16), T([512, 256, 3, 3], f16), [512], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 2, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), T([64, 128, 56, 56], f16), T([256, 128, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 112, 112], f16), T([64, 128, 112, 112], f16), T([128, 128, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 128, 112, 112], f16), T([64, 64, 112, 112], f16), T([128, 64, 3, 3], f16), [128], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 224, 224], f16), T([64, 64, 224, 224], f16), T([64, 64, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([64, 64, 224, 224], f16), T([64, 3, 224, 224], f16), T([64, 3, 3, 3], f16), [64], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([64, 3, 224, 224], f16), T([64, 3, 224, 224], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 64000), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([64, 64, 224, 224], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 128, 112, 112], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 256, 56, 56], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 512, 28, 28], f16), [2, 2], [2, 2]), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), [2, 2], [2, 2]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([64, 512, 7, 7], f16), T([64, 512, 14, 14], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 512, 7, 7], i64)), {}) +cnt: 1, ((T([64, 512, 14, 14], f16), T([64, 512, 28, 28], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 512, 14, 14], i64)), {}) +cnt: 1, ((T([64, 256, 28, 28], f16), T([64, 256, 56, 56], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 256, 28, 28], i64)), {}) +cnt: 1, ((T([64, 128, 56, 56], f16), T([64, 128, 112, 112], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 128, 56, 56], i64)), {}) +cnt: 1, ((T([64, 64, 112, 112], f16), T([64, 64, 224, 224], f16), [2, 2], [2, 2], [0, 0], [1, 1], False, T([64, 64, 112, 112], i64)), {}) +Operator: aten.mm.default +cnt: 1, ((T([64, 1000], f16, stride=(0, 0)), T([1000, 4096], f16)), {}) +cnt: 1, ((T([1000, 64], f16, stride=(0, 0)), T([64, 4096], f16)), {}) +cnt: 1, ((T([64, 4096], f16), T([4096, 4096], f16)), {}) +cnt: 1, ((T([4096, 64], f16, stride=(1, 4096)), T([64, 4096], f16)), {}) +cnt: 1, ((T([64, 4096], f16), T([4096, 25088], f16)), {}) +cnt: 1, ((T([4096, 64], f16, stride=(1, 4096)), T([64, 25088], f16)), {}) +Operator: aten.relu_.default +cnt: 2, ((T([64, 64, 224, 224], f16),), {}) +cnt: 2, ((T([64, 128, 112, 112], f16),), {}) +cnt: 3, ((T([64, 256, 56, 56], f16),), {}) +cnt: 3, ((T([64, 512, 28, 28], f16),), {}) +cnt: 3, ((T([64, 512, 14, 14], f16),), {}) +cnt: 2, ((T([64, 4096], f16),), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([64, 1000], f16, stride=(0, 0)), [0], True), {}) +cnt: 2, ((T([64, 4096], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 1, ((T([64, 1000], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([64, 4096], f16), T([64, 4096], f16), 0), {}) +cnt: 3, ((T([64, 512, 14, 14], f16), T([64, 512, 14, 14], f16), 0), {}) +cnt: 3, ((T([64, 512, 28, 28], f16), T([64, 512, 28, 28], f16), 0), {}) +cnt: 3, ((T([64, 256, 56, 56], f16), T([64, 256, 56, 56], f16), 0), {}) +cnt: 2, ((T([64, 128, 112, 112], f16), T([64, 128, 112, 112], f16), 0), {}) +cnt: 2, ((T([64, 64, 224, 224], f16), T([64, 64, 224, 224], f16), 0), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vision_maskrcnn_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vision_maskrcnn_training.txt new file mode 100644 index 000000000..a88dbc3ae --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/vision_maskrcnn_training.txt @@ -0,0 +1,477 @@ +Operator: aten._index_put_impl_.default +cnt: 12, ((T([0], f16), [T([0], i64)], T([0], f16), True, True), {}) +cnt: 12, ((T([0, 4], f16), [T([0], i64)], T([0, 4], f16), True, True), {}) +Operator: aten._softmax.default +cnt: 1, ((T([0, 91], f16), -1, False), {}) +Operator: aten._softmax_backward_data.default +cnt: 1, ((T([0, 91], f16), T([0, 91], f16), -1, f16), {}) +Operator: aten._to_copy.default +cnt: 8, ((T([], i64),), {'dtype': f32}) +cnt: 5, ((T([3, 4], f32),), {'dtype': f16, 'device': 'cuda'}) +cnt: 8, ((T([0, 4], f16),), {'dtype': f32}) +cnt: 2, ((T([0], f32),), {'dtype': i64}) +cnt: 4, ((T([0, 4], f16),), {'dtype': i64}) +cnt: 8, ((T([], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 2, ((T([296, 304], i32), [89984]), {}) +cnt: 2, ((T([148, 152], i32), [22496]), {}) +cnt: 2, ((T([74, 76], i32), [5624]), {}) +cnt: 2, ((T([37, 38], i32), [1406]), {}) +cnt: 2, ((T([19, 19], i32), [361]), {}) +cnt: 1, ((T([4, 296, 304, 3, 1], f16), [4, 269952, 1]), {}) +cnt: 1, ((T([4, 296, 304, 3, 4], f16), [4, 269952, 4]), {}) +cnt: 1, ((T([4, 148, 152, 3, 1], f16), [4, 67488, 1]), {}) +cnt: 1, ((T([4, 148, 152, 3, 4], f16), [4, 67488, 4]), {}) +cnt: 1, ((T([4, 74, 76, 3, 1], f16), [4, 16872, 1]), {}) +cnt: 1, ((T([4, 74, 76, 3, 4], f16), [4, 16872, 4]), {}) +cnt: 1, ((T([4, 37, 38, 3, 1], f16), [4, 4218, 1]), {}) +cnt: 1, ((T([4, 37, 38, 3, 4], f16), [4, 4218, 4]), {}) +cnt: 1, ((T([4, 19, 19, 3, 1], f16), [4, 1083, 1]), {}) +cnt: 1, ((T([4, 19, 19, 3, 4], f16), [4, 1083, 4]), {}) +Operator: aten.add.Tensor +cnt: 7, ((T([1, 64, 1, 1], f16), 0.0), {}) +cnt: 1, ((T([4, 64, 592, 608], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 6, ((T([4, 64, 296, 304], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 16, ((T([1, 256, 1, 1], f16), 0.0), {}) +cnt: 4, ((T([4, 256, 296, 304], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 8, ((T([1, 128, 1, 1], f16), 0.0), {}) +cnt: 1, ((T([4, 128, 296, 304], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 7, ((T([4, 128, 148, 152], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 11, ((T([1, 512, 1, 1], f16), 0.0), {}) +cnt: 5, ((T([4, 512, 148, 152], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 11, ((T([4, 256, 74, 76], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 7, ((T([1, 1024, 1, 1], f16), 0.0), {}) +cnt: 7, ((T([4, 1024, 74, 76], f16), T([1, 1024, 1, 1], f16)), {}) +cnt: 1, ((T([4, 512, 74, 76], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 5, ((T([4, 512, 37, 38], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 4, ((T([1, 2048, 1, 1], f16), 0.0), {}) +cnt: 4, ((T([4, 2048, 37, 38], f16), T([1, 2048, 1, 1], f16)), {}) +cnt: 2, ((T([4, 256, 74, 76], f16), T([4, 256, 74, 76], f16)), {}) +cnt: 2, ((T([4, 256, 148, 152], f16), T([4, 256, 148, 152], f16)), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([4, 256, 296, 304], f16)), {}) +cnt: 1, ((T([89984, 1, 4], i32), T([1, 3, 4], f16)), {}) +cnt: 1, ((T([22496, 1, 4], i32), T([1, 3, 4], f16)), {}) +cnt: 1, ((T([5624, 1, 4], i32), T([1, 3, 4], f16)), {}) +cnt: 1, ((T([1406, 1, 4], i32), T([1, 3, 4], f16)), {}) +cnt: 1, ((T([361, 1, 4], i32), T([1, 3, 4], f16)), {}) +cnt: 2, ((T([1438452], f16, stride=(4,)), T([1438452], f16)), {}) +cnt: 4, ((T([1438452, 1], f16), T([1438452, 1], f16)), {}) +cnt: 1, ((T([4, 1000], i64), 0), {}) +cnt: 1, ((T([4, 1000], i64), 269952), {}) +cnt: 1, ((T([4, 1000], i64), 337440), {}) +cnt: 1, ((T([4, 1000], i64), 354312), {}) +cnt: 1, ((T([4, 1000], i64), 358530), {}) +cnt: 2, ((T([0], f32), 4), {}) +cnt: 2, ((T([0], f32), T([], f32)), {}) +cnt: 18, ((T([0], f16), T([0], f16)), {}) +cnt: 2, ((T([0, 91], f16), T([0, 1], f16)), {}) +cnt: 6, ((T([0, 91], f16), T([0, 91], f16)), {}) +cnt: 4, ((T([], f16), 0), {}) +cnt: 4, ((T([], f16), T([], f32)), {}) +cnt: 8, ((T([], f32), T([], f16)), {}) +cnt: 1, ((T([], f32), 0), {}) +cnt: 3, ((T([], f32), T([], f32)), {}) +cnt: 7, ((T([0, 364], f16), T([0, 364], f16)), {}) +cnt: 1, ((T([0, 1024], f16), T([0, 1024], f16)), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), T([4, 256, 37, 38], f16)), {}) +cnt: 2, ((T([4, 2048, 37, 38], f16), T([4, 2048, 37, 38], f16)), {}) +cnt: 7, ((T([4, 1024, 74, 76], f16), T([4, 1024, 74, 76], f16)), {}) +cnt: 5, ((T([4, 512, 148, 152], f16), T([4, 512, 148, 152], f16)), {}) +Operator: aten.add_.Tensor +cnt: 3, ((T([4, 256, 296, 304], f16), T([4, 256, 296, 304], f16)), {}) +cnt: 4, ((T([4, 512, 148, 152], f16), T([4, 512, 148, 152], f16)), {}) +cnt: 6, ((T([4, 1024, 74, 76], f16), T([4, 1024, 74, 76], f16)), {}) +cnt: 3, ((T([4, 2048, 37, 38], f16), T([4, 2048, 37, 38], f16)), {}) +Operator: aten.addmm.default +cnt: 1, ((T([1024], f16), T([0, 12544], f16), T([12544, 1024], f16, stride=(1, 12544))), {}) +cnt: 1, ((T([1024], f16), T([0, 1024], f16), T([1024, 1024], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([91], f16), T([0, 1024], f16), T([1024, 91], f16, stride=(1, 1024))), {}) +cnt: 1, ((T([364], f16), T([0, 1024], f16), T([1024, 364], f16, stride=(1, 1024))), {}) +Operator: aten.bitwise_and.Tensor +cnt: 4, ((T([5000], b8), T([5000], b8)), {}) +cnt: 4, ((T([0], b8), T([0], b8)), {}) +Operator: aten.cat.default +cnt: 4, (([T([269952, 4], f16), T([67488, 4], f16), T([16872, 4], f16), T([4218, 4], f16), T([1083, 4], f16)],), {}) +cnt: 1, (([T([4, 269952, 1], f16), T([4, 67488, 1], f16), T([4, 16872, 1], f16), T([4, 4218, 1], f16), T([4, 1083, 1], f16)], 1), {}) +cnt: 1, (([T([4, 269952, 4], f16), T([4, 67488, 4], f16), T([4, 16872, 4], f16), T([4, 4218, 4], f16), T([4, 1083, 4], f16)], 1), {}) +cnt: 1, (([T([359613, 4], f16), T([359613, 4], f16), T([359613, 4], f16), T([359613, 4], f16)],), {}) +cnt: 1, (([T([269952], i64), T([67488], i64), T([16872], i64), T([4218], i64), T([1083], i64)],), {}) +cnt: 1, (([T([4, 1000], i64), T([4, 1000], i64), T([4, 1000], i64), T([4, 1000], i64), T([4, 1000], i64)], 1), {}) +cnt: 3, (([T([0, 4], f16), T([0, 4], f16), T([0, 4], f16), T([0, 4], f16)],), {}) +cnt: 2, (([T([0, 1], f16), T([0, 1], f16), T([0, 1], f16), T([0, 1], f16)],), {}) +cnt: 2, (([T([0, 1], f16), T([0, 4], f16)], 1), {}) +cnt: 2, (([T([0], f32), T([0], f32), T([0], f32), T([0], f32)],), {}) +cnt: 1, (([T([0], i64), T([0], i64), T([0], i64), T([0], i64)],), {}) +cnt: 1, (([T([0, 91], f16), T([0, 91], f16), T([0, 91], f16), T([0, 91], f16)],), {}) +cnt: 1, (([T([0, 364], f16), T([0, 364], f16), T([0, 364], f16), T([0, 364], f16)],), {}) +Operator: aten.clamp.default +cnt: 2, ((T([1438452, 1], f16), None, 4.135166556742356), {}) +cnt: 1, ((T([5000, 2], f16, stride=(4, 2)), 0, 1199), {}) +cnt: 2, ((T([5000, 2], f16, stride=(4, 2)), 0, 799), {}) +cnt: 3, ((T([5000, 2], f16, stride=(4, 2)), 0, 800), {}) +cnt: 1, ((T([5000, 2], f16, stride=(4, 2)), 0, 1155), {}) +cnt: 1, ((T([5000, 2], f16, stride=(4, 2)), 0, 1115), {}) +cnt: 2, ((T([0], f32), 2, 5), {}) +cnt: 2, ((T([0, 91], f16), None, 4.135166556742356), {}) +cnt: 1, ((T([0, 182], f16), 0, 1199), {}) +cnt: 2, ((T([0, 182], f16), 0, 799), {}) +cnt: 3, ((T([0, 182], f16), 0, 800), {}) +cnt: 1, ((T([0, 182], f16), 0, 1155), {}) +cnt: 1, ((T([0, 182], f16), 0, 1115), {}) +Operator: aten.constant_pad_nd.default +cnt: 4, ((T([0, 1, 28, 28], f16), [1, 1, 1, 1], 0.0), {}) +Operator: aten.convolution.default +cnt: 1, ((T([4, 3, 1184, 1216], f16), T([64, 3, 7, 7], f16), None, [2, 2], [3, 3], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 64, 296, 304], f16), T([64, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([4, 64, 296, 304], f16), T([64, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([4, 64, 296, 304], f16), T([256, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 256, 296, 304], f16), T([64, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 128, 296, 304], f16), T([128, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([4, 128, 148, 152], f16), T([512, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([512, 256, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([4, 512, 148, 152], f16), T([128, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([4, 128, 148, 152], f16), T([128, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 148, 152], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([256, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 6, ((T([4, 256, 74, 76], f16), T([1024, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 148, 152], f16), T([1024, 512, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([4, 1024, 74, 76], f16), T([256, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 5, ((T([4, 256, 74, 76], f16), T([256, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 1024, 74, 76], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 74, 76], f16), T([512, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 3, ((T([4, 512, 37, 38], f16), T([2048, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 1024, 74, 76], f16), T([2048, 1024, 1, 1], f16), None, [2, 2], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 2048, 37, 38], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 512, 37, 38], f16), T([512, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 2048, 37, 38], f16), T([256, 2048, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 256, 37, 38], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 1024, 74, 76], f16), T([256, 1024, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 256, 74, 76], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 512, 148, 152], f16), T([256, 512, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 256, 148, 152], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([256, 256, 1, 1], f16), T([256], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([4, 256, 296, 304], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([3, 256, 1, 1], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([12, 256, 1, 1], f16), T([12], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([3, 256, 1, 1], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([12, 256, 1, 1], f16), T([12], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), T([3, 256, 1, 1], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), T([12, 256, 1, 1], f16), T([12], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), T([3, 256, 1, 1], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), T([12, 256, 1, 1], f16), T([12], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 19, 19], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 19, 19], f16), T([3, 256, 1, 1], f16), T([3], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([4, 256, 19, 19], f16), T([12, 256, 1, 1], f16), T([12], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 4, ((T([0, 256, 14, 14], f16), T([256, 256, 3, 3], f16), T([256], f16), [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([0, 256, 14, 14], f16), T([256, 256, 2, 2], f16), T([256], f16), [2, 2], [0, 0], [1, 1], True, [0, 0], 1), {}) +cnt: 1, ((T([0, 256, 28, 28], f16), T([91, 256, 1, 1], f16), T([91], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([4, 256, 296, 304], f16), T([4, 256, 296, 304], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 296, 304], f16), T([4, 256, 296, 304], f16), T([256, 256, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, True]), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([4, 256, 148, 152], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([4, 512, 148, 152], f16), T([256, 512, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), T([4, 256, 74, 76], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), T([4, 1024, 74, 76], f16), T([256, 1024, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), T([4, 256, 37, 38], f16), T([256, 256, 3, 3], f16), [256], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), T([4, 2048, 37, 38], f16), T([256, 2048, 1, 1], f16), [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 3, ((T([4, 2048, 37, 38], f16), T([4, 512, 37, 38], f16), T([2048, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([4, 512, 37, 38], f16), T([4, 512, 37, 38], f16), T([512, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([4, 512, 37, 38], f16), T([4, 2048, 37, 38], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 2048, 37, 38], f16), T([4, 1024, 74, 76], f16), T([2048, 1024, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 512, 37, 38], f16), T([4, 512, 74, 76], f16), T([512, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 512, 74, 76], f16), T([4, 1024, 74, 76], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 6, ((T([4, 1024, 74, 76], f16), T([4, 256, 74, 76], f16), T([1024, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([4, 256, 74, 76], f16), T([4, 256, 74, 76], f16), T([256, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 5, ((T([4, 256, 74, 76], f16), T([4, 1024, 74, 76], f16), T([256, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 1024, 74, 76], f16), T([4, 512, 148, 152], f16), T([1024, 512, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), T([4, 256, 148, 152], f16), T([256, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([4, 512, 148, 152], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 4, ((T([4, 512, 148, 152], f16), T([4, 128, 148, 152], f16), T([512, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([4, 128, 148, 152], f16), T([4, 128, 148, 152], f16), T([128, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 3, ((T([4, 128, 148, 152], f16), T([4, 512, 148, 152], f16), T([128, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 512, 148, 152], f16), T([4, 256, 296, 304], f16), T([512, 256, 1, 1], f16), [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +cnt: 1, ((T([4, 128, 148, 152], f16), T([4, 128, 296, 304], f16), T([128, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([4, 128, 296, 304], f16), T([4, 256, 296, 304], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([3, 799, 1199], f16, stride=(1439744, 1216, 1)), T([3, 799, 1199], f16)), {}) +cnt: 1, ((T([3, 800, 800], f16, stride=(1439744, 1216, 1)), T([3, 800, 800], f16)), {}) +cnt: 1, ((T([3, 1155, 800], f16, stride=(1439744, 1216, 1)), T([3, 1155, 800], f16)), {}) +cnt: 1, ((T([3, 799, 1115], f16, stride=(1439744, 1216, 1)), T([3, 799, 1115], f16)), {}) +cnt: 16, ((T([0], f16), T([0], f16)), {}) +Operator: aten.div.Tensor +cnt: 1, ((T([3, 427, 640], f16, stride=(1, 1920, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 612, 612], f16, stride=(1, 1836, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 640, 443], f16, stride=(1, 1329, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 459, 640], f16, stride=(1, 1920, 3)), T([3, 1, 1], f16)), {}) +cnt: 4, ((T([1438452, 1], f16, stride=(4, 4)), 1.0), {}) +cnt: 2, ((T([0], f32), 224), {}) +cnt: 4, ((T([0, 91], f16), 10.0), {}) +cnt: 4, ((T([0, 91], f16), 5.0), {}) +cnt: 8, ((T([], f32), T([], f32)), {}) +cnt: 20, ((T([], f16), 0), {}) +cnt: 4, ((T([], i64), 0), {}) +cnt: 10, ((T([], f32), 4), {}) +Operator: aten.eq.Scalar +cnt: 2, ((T([0], i64), 0), {}) +cnt: 2, ((T([0], i64), 1), {}) +cnt: 2, ((T([0], i64), 2), {}) +cnt: 2, ((T([0], i64), 3), {}) +Operator: aten.exp.default +cnt: 2, ((T([1438452, 1], f16),), {}) +cnt: 2, ((T([0, 91], f16),), {}) +Operator: aten.fill_.Scalar +cnt: 2, ((T([], i64), 4), {}) +cnt: 2, ((T([], i64), 8), {}) +cnt: 2, ((T([], i64), 16), {}) +cnt: 2, ((T([], i64), 32), {}) +cnt: 1, ((T([], i64), 62), {}) +cnt: 1, ((T([], i64), 64), {}) +Operator: aten.floor.default +cnt: 2, ((T([0], f32),), {}) +Operator: aten.ge.Scalar +cnt: 8, ((T([5000], f16), 0.001), {}) +cnt: 4, ((T([0], f16), 0.0), {}) +cnt: 8, ((T([0], f16), 0.01), {}) +cnt: 8, ((T([0, 182], f16), 0), {}) +Operator: aten.gt.Scalar +cnt: 4, ((T([0], f16), 0.05), {}) +Operator: aten.index.Tensor +cnt: 1, ((T([4, 359613], f16), [T([4, 1], i64), T([4, 5000], i64)]), {}) +cnt: 1, ((T([4, 359613], i64, stride=(0, 1)), [T([4, 1], i64), T([4, 5000], i64)]), {}) +cnt: 1, ((T([4, 359613, 4], f16), [T([4, 1], i64), T([4, 5000], i64)]), {}) +cnt: 4, ((T([5000, 4], f16), [T([0], i64)]), {}) +cnt: 4, ((T([5000], f16), [T([0], i64)]), {}) +cnt: 4, ((T([5000], i64), [T([0], i64)]), {}) +cnt: 20, ((T([0, 4], f16), [T([0], i64)]), {}) +cnt: 20, ((T([0], f16), [T([0], i64)]), {}) +cnt: 16, ((T([0], i64), [T([0], i64)]), {}) +cnt: 8, ((T([0, 5], f16), [T([0], i64)]), {}) +cnt: 1, ((T([0, 91, 28, 28], f16), [T([0], i64), T([0], i64)]), {}) +cnt: 4, ((T([0, 256, 7, 7], f16), [T([0], i64)]), {}) +Operator: aten.index_put.default +cnt: 3, ((T([0, 256, 7, 7], f16), [T([0], i64)], T([0, 256, 7, 7], f16)), {}) +Operator: aten.index_put_.default +cnt: 4, ((T([0, 256, 7, 7], f16), [T([0], i64)], T([0, 256, 7, 7], f16)), {}) +cnt: 4, ((T([0, 256, 14, 14], f16), [T([0], i64)], T([0, 256, 14, 14], f16)), {}) +Operator: aten.le.Scalar +cnt: 2, ((T([0, 182], f16), 799), {}) +cnt: 1, ((T([0, 182], f16), 1115), {}) +cnt: 1, ((T([0, 182], f16), 1155), {}) +cnt: 3, ((T([0, 182], f16), 800), {}) +cnt: 1, ((T([0, 182], f16), 1199), {}) +cnt: 2, ((T([0, 91], f16), 4.135166556742356), {}) +Operator: aten.log2.default +cnt: 20, ((T([], f32),), {}) +cnt: 2, ((T([0], f32),), {}) +Operator: aten.logical_and_.default +cnt: 8, ((T([0, 182], b8), T([0, 182], b8)), {}) +Operator: aten.max.default +cnt: 4, ((T([2], i64),), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([4, 64, 592, 608], f16), [3, 3], [2, 2], [1, 1]), {}) +cnt: 1, ((T([4, 256, 37, 38], f16), [1, 1], [2, 2]), {}) +Operator: aten.min.default +cnt: 4, ((T([2], i64),), {}) +Operator: aten.minimum.default +cnt: 4, ((T([], f32), T([], f32)), {}) +Operator: aten.mm.default +cnt: 1, ((T([0, 364], f16), T([364, 1024], f16)), {}) +cnt: 1, ((T([364, 0], f16), T([0, 1024], f16)), {}) +cnt: 1, ((T([0, 91], f16), T([91, 1024], f16)), {}) +cnt: 1, ((T([91, 0], f16), T([0, 1024], f16)), {}) +cnt: 1, ((T([0, 1024], f16), T([1024, 1024], f16)), {}) +cnt: 1, ((T([1024, 0], f16), T([0, 1024], f16)), {}) +cnt: 1, ((T([0, 1024], f16), T([1024, 12544], f16)), {}) +cnt: 1, ((T([1024, 0], f16), T([0, 12544], f16)), {}) +Operator: aten.mul.Tensor +cnt: 4, ((T([], f32), 800.0), {}) +cnt: 4, ((T([], f32), 1333.0), {}) +cnt: 14, ((T([1, 64, 1, 1], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 1, ((T([4, 64, 592, 608], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 6, ((T([4, 64, 296, 304], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 32, ((T([1, 256, 1, 1], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 4, ((T([4, 256, 296, 304], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 16, ((T([1, 128, 1, 1], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 2, ((T([4, 128, 296, 304], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 14, ((T([4, 128, 148, 152], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 22, ((T([1, 512, 1, 1], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 10, ((T([4, 512, 148, 152], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 2, ((T([4, 256, 148, 152], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 22, ((T([4, 256, 74, 76], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 14, ((T([1, 1024, 1, 1], f16), T([1, 1024, 1, 1], f16)), {}) +cnt: 14, ((T([4, 1024, 74, 76], f16), T([1, 1024, 1, 1], f16)), {}) +cnt: 2, ((T([4, 512, 74, 76], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 10, ((T([4, 512, 37, 38], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 8, ((T([1, 2048, 1, 1], f16), T([1, 2048, 1, 1], f16)), {}) +cnt: 8, ((T([4, 2048, 37, 38], f16), T([1, 2048, 1, 1], f16)), {}) +cnt: 1, ((T([304], i32), T([], i64)), {}) +cnt: 1, ((T([296], i32), T([], i64)), {}) +cnt: 1, ((T([152], i32), T([], i64)), {}) +cnt: 1, ((T([148], i32), T([], i64)), {}) +cnt: 1, ((T([76], i32), T([], i64)), {}) +cnt: 1, ((T([74], i32), T([], i64)), {}) +cnt: 1, ((T([38], i32), T([], i64)), {}) +cnt: 1, ((T([37], i32), T([], i64)), {}) +cnt: 2, ((T([19], i32), T([], i64)), {}) +cnt: 2, ((T([1438452], f16), 0.5), {}) +cnt: 4, ((T([1438452, 1], f16), T([1438452, 1], f16)), {}) +cnt: 2, ((T([], f16), T([1438452, 1], f16)), {}) +cnt: 8, ((T([0], f32), T([0], f32)), {}) +cnt: 18, ((T([0], f16), 0.5), {}) +cnt: 8, ((T([0, 91], f16), T([0, 1], f16)), {}) +cnt: 2, ((T([], f16), T([0, 91], f16)), {}) +cnt: 32, ((T([0], f16), T([], f32)), {}) +cnt: 2, ((T([0, 91], f16), T([], f16)), {}) +cnt: 2, ((T([0, 91], f16), T([0, 91], f16)), {}) +Operator: aten.mul_.Tensor +cnt: 8, ((T([0], f16), 1.0714285714285714), {}) +Operator: aten.neg.default +cnt: 2, ((T([0, 91], f16),), {}) +Operator: aten.new_empty.default +cnt: 1, ((T([0, 1, 30, 30], f16), [0, 1, 427, 640]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([0, 1, 30, 30], f16), [0, 1, 612, 612]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([0, 1, 30, 30], f16), [0, 1, 640, 443]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +cnt: 1, ((T([0, 1, 30, 30], f16), [0, 1, 459, 640]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_full.default +cnt: 1, ((T([3, 799, 1199], f16), [4, 3, 1184, 1216], 0), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False}) +Operator: aten.new_zeros.default +cnt: 12, ((T([0], f16), [0]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 12, ((T([0, 4], f16), [0, 4]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'}) +Operator: aten.nonzero.default +cnt: 4, ((T([5000], b8),), {}) +cnt: 20, ((T([0], b8),), {}) +Operator: aten.reciprocal.default +cnt: 8, ((T([], f32),), {}) +Operator: aten.relu.default +cnt: 2, ((T([0, 1024], f16),), {}) +Operator: aten.relu_.default +cnt: 1, ((T([4, 64, 592, 608], f16),), {}) +cnt: 6, ((T([4, 64, 296, 304], f16),), {}) +cnt: 4, ((T([4, 256, 296, 304], f16),), {}) +cnt: 1, ((T([4, 128, 296, 304], f16),), {}) +cnt: 7, ((T([4, 128, 148, 152], f16),), {}) +cnt: 4, ((T([4, 512, 148, 152], f16),), {}) +cnt: 2, ((T([4, 256, 148, 152], f16),), {}) +cnt: 12, ((T([4, 256, 74, 76], f16),), {}) +cnt: 6, ((T([4, 1024, 74, 76], f16),), {}) +cnt: 1, ((T([4, 512, 74, 76], f16),), {}) +cnt: 5, ((T([4, 512, 37, 38], f16),), {}) +cnt: 3, ((T([4, 2048, 37, 38], f16),), {}) +cnt: 1, ((T([4, 256, 37, 38], f16),), {}) +cnt: 1, ((T([4, 256, 19, 19], f16),), {}) +cnt: 4, ((T([0, 256, 14, 14], f16),), {}) +cnt: 1, ((T([0, 256, 28, 28], f16),), {}) +Operator: aten.round.default +cnt: 16, ((T([], f32),), {}) +Operator: aten.rsqrt.default +cnt: 7, ((T([1, 64, 1, 1], f16),), {}) +cnt: 16, ((T([1, 256, 1, 1], f16),), {}) +cnt: 8, ((T([1, 128, 1, 1], f16),), {}) +cnt: 11, ((T([1, 512, 1, 1], f16),), {}) +cnt: 7, ((T([1, 1024, 1, 1], f16),), {}) +cnt: 4, ((T([1, 2048, 1, 1], f16),), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([4, 5000], f16),), {}) +cnt: 1, ((T([0, 91, 28, 28], f16),), {}) +Operator: aten.slice_backward.default +cnt: 4, ((T([0, 90], f16), [0, 91], 1, 1, 9223372036854775807, 1), {}) +cnt: 4, ((T([0, 91], f16), [0, 91], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([0, 363], f16), [0, 364], 1, 1, 9223372036854775807, 1), {}) +cnt: 8, ((T([0, 364], f16), [0, 364], 0, 0, 9223372036854775807, 1), {}) +cnt: 4, ((T([0, 182], f16), [0, 364], 1, 1, 9223372036854775807, 2), {}) +cnt: 4, ((T([0, 182], f16), [0, 364], 1, 0, 9223372036854775807, 2), {}) +cnt: 1, ((T([0, 91], f16), [0, 364], 1, 3, 9223372036854775807, 4), {}) +cnt: 1, ((T([0, 91], f16), [0, 364], 1, 2, 9223372036854775807, 4), {}) +cnt: 1, ((T([0, 91], f16), [0, 364], 1, 1, 9223372036854775807, 4), {}) +cnt: 1, ((T([0, 91], f16), [0, 364], 1, 0, 9223372036854775807, 4), {}) +Operator: aten.split_with_sizes.default +cnt: 1, ((T([4, 359613], f16), [269952, 67488, 16872, 4218, 1083], 1), {}) +cnt: 1, ((T([0, 364], f16), [0, 0, 0, 0]), {}) +cnt: 1, ((T([0, 91], f16), [0, 0, 0, 0]), {}) +cnt: 1, ((T([0, 1, 28, 28], f16), [0, 0, 0, 0]), {}) +Operator: aten.sqrt.default +cnt: 2, ((T([0], f32),), {}) +Operator: aten.stack.default +cnt: 1, (([T([89984], i32), T([89984], i32), T([89984], i32), T([89984], i32)], 1), {}) +cnt: 1, (([T([22496], i32), T([22496], i32), T([22496], i32), T([22496], i32)], 1), {}) +cnt: 1, (([T([5624], i32), T([5624], i32), T([5624], i32), T([5624], i32)], 1), {}) +cnt: 1, (([T([1406], i32), T([1406], i32), T([1406], i32), T([1406], i32)], 1), {}) +cnt: 1, (([T([361], i32), T([361], i32), T([361], i32), T([361], i32)], 1), {}) +cnt: 1, (([T([1438452, 1], f16), T([1438452, 1], f16), T([1438452, 1], f16), T([1438452, 1], f16)], 2), {}) +cnt: 4, (([T([5000, 2], f16), T([5000, 2], f16)], 2), {}) +cnt: 1, (([T([0, 91], f16), T([0, 91], f16), T([0, 91], f16), T([0, 91], f16)], 2), {}) +cnt: 4, (([T([0, 182], f16), T([0, 182], f16)], 2), {}) +cnt: 8, (([T([0], f16), T([0], f16), T([0], f16), T([0], f16)], 1), {}) +Operator: aten.sub.Tensor +cnt: 1, ((T([3, 427, 640], f16, stride=(1, 1920, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 612, 612], f16, stride=(1, 1836, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 640, 443], f16, stride=(1, 1329, 3)), T([3, 1, 1], f16)), {}) +cnt: 1, ((T([3, 459, 640], f16, stride=(1, 1920, 3)), T([3, 1, 1], f16)), {}) +cnt: 7, ((T([1, 64, 1, 1], f16), T([1, 64, 1, 1], f16)), {}) +cnt: 16, ((T([1, 256, 1, 1], f16), T([1, 256, 1, 1], f16)), {}) +cnt: 8, ((T([1, 128, 1, 1], f16), T([1, 128, 1, 1], f16)), {}) +cnt: 11, ((T([1, 512, 1, 1], f16), T([1, 512, 1, 1], f16)), {}) +cnt: 7, ((T([1, 1024, 1, 1], f16), T([1, 1024, 1, 1], f16)), {}) +cnt: 4, ((T([1, 2048, 1, 1], f16), T([1, 2048, 1, 1], f16)), {}) +cnt: 2, ((T([1438452], f16, stride=(4,)), T([1438452], f16, stride=(4,))), {}) +cnt: 2, ((T([1438452, 1], f16), T([1438452, 1], f16)), {}) +cnt: 8, ((T([5000], f16, stride=(4,)), T([5000], f16, stride=(4,))), {}) +cnt: 16, ((T([0], f32), T([0], f32)), {}) +cnt: 2, ((T([0], i64), 2), {}) +cnt: 26, ((T([0], f16), T([0], f16)), {}) +cnt: 2, ((T([0, 91], f16), T([0, 91], f16)), {}) +Operator: aten.sum.SymInt +cnt: 1, ((T([0, 364], f16), [0], True), {}) +cnt: 1, ((T([0, 91], f16), [0], True), {}) +cnt: 2, ((T([0, 1024], f16), [0], True), {}) +Operator: aten.sum.default +cnt: 4, ((T([0, 4], f16),), {}) +cnt: 4, ((T([0], i64),), {}) +cnt: 4, ((T([0], f16),), {}) +cnt: 1, ((T([0, 1, 427, 640], f16),), {}) +cnt: 1, ((T([0, 1, 612, 612], f16),), {}) +cnt: 1, ((T([0, 1, 640, 443], f16),), {}) +cnt: 1, ((T([0, 1, 459, 640], f16),), {}) +Operator: aten.threshold_backward.default +cnt: 2, ((T([0, 1024], f16), T([0, 1024], f16), 0), {}) +cnt: 3, ((T([4, 2048, 37, 38], f16), T([4, 2048, 37, 38], f16), 0), {}) +cnt: 5, ((T([4, 512, 37, 38], f16), T([4, 512, 37, 38], f16), 0), {}) +cnt: 1, ((T([4, 512, 74, 76], f16), T([4, 512, 74, 76], f16), 0), {}) +cnt: 6, ((T([4, 1024, 74, 76], f16), T([4, 1024, 74, 76], f16), 0), {}) +cnt: 11, ((T([4, 256, 74, 76], f16), T([4, 256, 74, 76], f16), 0), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), T([4, 256, 148, 152], f16), 0), {}) +cnt: 4, ((T([4, 512, 148, 152], f16), T([4, 512, 148, 152], f16), 0), {}) +cnt: 7, ((T([4, 128, 148, 152], f16), T([4, 128, 148, 152], f16), 0), {}) +cnt: 1, ((T([4, 128, 296, 304], f16), T([4, 128, 296, 304], f16), 0), {}) +Operator: aten.topk.default +cnt: 1, ((T([4, 269952], f16, stride=(359613, 1)), 1000, 1), {}) +cnt: 1, ((T([4, 67488], f16, stride=(359613, 1)), 1000, 1), {}) +cnt: 1, ((T([4, 16872], f16, stride=(359613, 1)), 1000, 1), {}) +cnt: 1, ((T([4, 4218], f16, stride=(359613, 1)), 1000, 1), {}) +cnt: 1, ((T([4, 1083], f16, stride=(359613, 1)), 1000, 1), {}) +Operator: aten.unbind.int +cnt: 1, ((T([4, 5000, 4], f16),), {}) +cnt: 1, ((T([4, 5000], f16),), {}) +cnt: 1, ((T([4, 5000], i64),), {}) +cnt: 24, ((T([0, 1], i64), 1), {}) +cnt: 8, ((T([0, 4], f16), 1), {}) +cnt: 4, ((T([0, 182, 2], f16), 2), {}) +cnt: 1, ((T([0, 91, 4], f16), 2), {}) +Operator: aten.upsample_bilinear2d.vec +cnt: 1, ((T([1, 3, 427, 640], f16, stride=(3, 1, 1920, 3)), [799, 1199], False, None), {}) +cnt: 1, ((T([1, 3, 612, 612], f16, stride=(3, 1, 1836, 3)), [800, 800], False, None), {}) +cnt: 1, ((T([1, 3, 640, 443], f16, stride=(3, 1, 1329, 3)), [1155, 800], False, None), {}) +cnt: 1, ((T([1, 3, 459, 640], f16, stride=(3, 1, 1920, 3)), [799, 1115], False, None), {}) +Operator: aten.upsample_nearest2d.vec +cnt: 1, ((T([4, 256, 37, 38], f16), [74, 76], None), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), [148, 152], None), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), [296, 304], None), {}) +Operator: aten.upsample_nearest2d_backward.vec +cnt: 1, ((T([4, 256, 296, 304], f16), [296, 304], [4, 256, 148, 152], None), {}) +cnt: 1, ((T([4, 256, 148, 152], f16), [148, 152], [4, 256, 74, 76], None), {}) +cnt: 1, ((T([4, 256, 74, 76], f16), [74, 76], [4, 256, 37, 38], None), {}) +Operator: aten.where.self +cnt: 8, ((T([0, 182], b8), T([0, 182], f16), T([], f16)), {}) +cnt: 2, ((T([0, 91], b8), T([0, 91], f16), T([], f16)), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/yolov3_training.txt b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/yolov3_training.txt new file mode 100644 index 000000000..c8ad36838 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_logs/torchbench_train/yolov3_training.txt @@ -0,0 +1,261 @@ +Operator: aten._to_copy.default +cnt: 1, ((T([1, 1, 12, 16, 2], i64),), {'dtype': f32}) +cnt: 3, ((T([3, 2], f32),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 3, ((T([1, 3, 1, 1, 2], f32),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 1, ((T([1, 1, 24, 32, 2], i64),), {'dtype': f32}) +cnt: 1, ((T([1, 1, 48, 64, 2], i64),), {'dtype': f32}) +cnt: 2, ((T([8, 3, 48, 64, 2], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([8, 3, 48, 64, 2], f32),), {'dtype': f16}) +cnt: 2, ((T([8, 3, 24, 32, 2], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([8, 3, 24, 32, 2], f32),), {'dtype': f16}) +cnt: 2, ((T([8, 3, 12, 16, 2], f16),), {'dtype': f32, 'layout': torch.strided, 'device': 'cuda'}) +cnt: 2, ((T([8, 3, 12, 16, 2], f32),), {'dtype': f16}) +Operator: aten._unsafe_view.default +cnt: 1, ((T([8, 3, 85, 48, 64], f16), [8, 255, 48, 64]), {}) +cnt: 1, ((T([8, 3, 85, 24, 32], f16), [8, 255, 24, 32]), {}) +cnt: 1, ((T([8, 3, 85, 12, 16], f16), [8, 255, 12, 16]), {}) +Operator: aten.add.Tensor +cnt: 2, ((T([8, 64, 192, 256], f16), T([8, 64, 192, 256], f16)), {}) +cnt: 4, ((T([8, 128, 96, 128], f16), T([8, 128, 96, 128], f16)), {}) +cnt: 16, ((T([8, 256, 48, 64], f16), T([8, 256, 48, 64], f16)), {}) +cnt: 16, ((T([8, 512, 24, 32], f16), T([8, 512, 24, 32], f16)), {}) +cnt: 8, ((T([8, 1024, 12, 16], f16), T([8, 1024, 12, 16], f16)), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f16), T([1, 1, 12, 16, 2], f32)), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), T([1, 1, 24, 32, 2], f32)), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16), T([1, 1, 48, 64, 2], f32)), {}) +cnt: 2, ((T([], f16), 0), {}) +cnt: 3, ((T([], f16), T([], f16)), {}) +cnt: 3, ((T([8, 3, 48, 64, 85], f16), T([8, 3, 48, 64, 85], f16)), {}) +cnt: 1, ((T([8, 3, 48, 64, 85], f16, stride=(0, 0, 0, 0, 0)), T([8, 3, 48, 64, 85], f16)), {}) +cnt: 3, ((T([8, 3, 24, 32, 85], f16), T([8, 3, 24, 32, 85], f16)), {}) +cnt: 1, ((T([8, 3, 24, 32, 85], f16, stride=(0, 0, 0, 0, 0)), T([8, 3, 24, 32, 85], f16)), {}) +cnt: 1, ((T([8, 256, 24, 32], f16), T([8, 256, 24, 32], f16)), {}) +cnt: 3, ((T([8, 3, 12, 16, 85], f16), T([8, 3, 12, 16, 85], f16)), {}) +cnt: 1, ((T([8, 3, 12, 16, 85], f16, stride=(0, 0, 0, 0, 0)), T([8, 3, 12, 16, 85], f16)), {}) +cnt: 3, ((T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16)), {}) +cnt: 1, ((T([8, 512, 12, 16], f16, stride=(393216, 192, 16, 1)), T([8, 512, 12, 16], f16)), {}) +cnt: 1, ((T([8, 512, 24, 32], f16, stride=(589824, 768, 32, 1)), T([8, 512, 24, 32], f16)), {}) +cnt: 1, ((T([8, 256, 48, 64], f16, stride=(1179648, 3072, 64, 1)), T([8, 256, 48, 64], f16)), {}) +Operator: aten.cat.default +cnt: 1, (([T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16)], 1), {}) +cnt: 1, (([T([8, 256, 24, 32], f16), T([8, 512, 24, 32], f16)], 1), {}) +cnt: 1, (([T([8, 128, 48, 64], f16), T([8, 256, 48, 64], f16)], 1), {}) +cnt: 1, (([T([8, 576, 85], f16), T([8, 2304, 85], f16), T([8, 9216, 85], f16)], 1), {}) +Operator: aten.clone.default +cnt: 1, ((T([8, 3, 384, 512], f16),), {}) +cnt: 1, ((T([8, 3, 12, 16, 85], f16),), {}) +cnt: 1, ((T([8, 3, 24, 32, 85], f16),), {}) +cnt: 1, ((T([8, 3, 48, 64, 85], f16),), {}) +Operator: aten.convolution.default +cnt: 1, ((T([8, 3, 384, 512], f16), T([32, 3, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 32, 384, 512], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 64, 192, 256], f16), T([32, 64, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), T([64, 32, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 64, 192, 256], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([8, 128, 96, 128], f16), T([64, 128, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), T([128, 64, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 128, 96, 128], f16), T([256, 128, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 10, ((T([8, 256, 48, 64], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 11, ((T([8, 128, 48, 64], f16), T([256, 128, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 48, 64], f16), T([512, 256, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 10, ((T([8, 512, 24, 32], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 11, ((T([8, 256, 24, 32], f16), T([512, 256, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 512, 24, 32], f16), T([1024, 512, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([8, 1024, 12, 16], f16), T([512, 1024, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 7, ((T([8, 512, 12, 16], f16), T([1024, 512, 3, 3], f16), None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 2048, 12, 16], f16), T([512, 2048, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 1024, 12, 16], f16), T([255, 1024, 1, 1], f16), T([255], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 512, 12, 16], f16), T([256, 512, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 768, 24, 32], f16), T([256, 768, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 512, 24, 32], f16), T([255, 512, 1, 1], f16), T([255], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 24, 32], f16), T([128, 256, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 384, 48, 64], f16), T([128, 384, 1, 1], f16), None, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +cnt: 1, ((T([8, 256, 48, 64], f16), T([255, 256, 1, 1], f16), T([255], f16), [1, 1], [0, 0], [1, 1], False, [0, 0], 1), {}) +Operator: aten.convolution_backward.default +cnt: 1, ((T([8, 255, 48, 64], f16), T([8, 256, 48, 64], f16), T([255, 256, 1, 1], f16), [255], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 11, ((T([8, 256, 48, 64], f16), T([8, 128, 48, 64], f16), T([256, 128, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([8, 128, 48, 64], f16), T([8, 256, 48, 64], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 128, 48, 64], f16), T([8, 384, 48, 64], f16), T([128, 384, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), T([8, 256, 24, 32], f16), T([128, 256, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 255, 24, 32], f16), T([8, 512, 24, 32], f16), T([255, 512, 1, 1], f16), [255], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 11, ((T([8, 512, 24, 32], f16), T([8, 256, 24, 32], f16), T([512, 256, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 10, ((T([8, 256, 24, 32], f16), T([8, 512, 24, 32], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 24, 32], f16), T([8, 768, 24, 32], f16), T([256, 768, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 12, 16], f16), T([8, 512, 12, 16], f16), T([256, 512, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 255, 12, 16], f16), T([8, 1024, 12, 16], f16), T([255, 1024, 1, 1], f16), [255], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, True]), {}) +cnt: 7, ((T([8, 1024, 12, 16], f16), T([8, 512, 12, 16], f16), T([1024, 512, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 7, ((T([8, 512, 12, 16], f16), T([8, 1024, 12, 16], f16), T([512, 1024, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 512, 12, 16], f16), T([8, 2048, 12, 16], f16), T([512, 2048, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 1024, 12, 16], f16), T([8, 512, 24, 32], f16), T([1024, 512, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 512, 24, 32], f16), T([8, 256, 48, 64], f16), T([512, 256, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 256, 48, 64], f16), T([8, 128, 96, 128], f16), T([256, 128, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([8, 128, 96, 128], f16), T([8, 64, 96, 128], f16), T([128, 64, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), T([8, 128, 96, 128], f16), T([64, 128, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 128, 96, 128], f16), T([8, 64, 192, 256], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 64, 192, 256], f16), T([8, 32, 192, 256], f16), T([64, 32, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), T([8, 64, 192, 256], f16), T([32, 64, 1, 1], f16), [0], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 64, 192, 256], f16), T([8, 32, 384, 512], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {}) +cnt: 1, ((T([8, 32, 384, 512], f16), T([8, 3, 384, 512], f16), T([32, 3, 3, 3], f16), [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {}) +Operator: aten.copy_.default +cnt: 1, ((T([8, 3, 384, 512], f16), T([8, 3, 384, 512], f16)), {}) +cnt: 2, ((T([8, 3, 12, 16, 2], f16, stride=(48960, 16320, 1360, 85, 1)), T([8, 3, 12, 16, 2], f32)), {}) +cnt: 1, ((T([8, 3, 12, 16, 4], f16, stride=(48960, 16320, 1360, 85, 1)), T([8, 3, 12, 16, 4], f16, stride=(48960, 16320, 1360, 85, 1))), {}) +cnt: 2, ((T([8, 3, 24, 32, 2], f16, stride=(195840, 65280, 2720, 85, 1)), T([8, 3, 24, 32, 2], f32)), {}) +cnt: 1, ((T([8, 3, 24, 32, 4], f16, stride=(195840, 65280, 2720, 85, 1)), T([8, 3, 24, 32, 4], f16, stride=(195840, 65280, 2720, 85, 1))), {}) +cnt: 2, ((T([8, 3, 48, 64, 2], f16, stride=(783360, 261120, 5440, 85, 1)), T([8, 3, 48, 64, 2], f32)), {}) +cnt: 1, ((T([8, 3, 48, 64, 4], f16, stride=(783360, 261120, 5440, 85, 1)), T([8, 3, 48, 64, 4], f16, stride=(783360, 261120, 5440, 85, 1))), {}) +cnt: 1, ((T([8, 3, 48, 64, 85], f16), T([8, 3, 48, 64, 85], f16, stride=(0, 0, 0, 0, 0))), {}) +cnt: 1, ((T([8, 3, 48, 64, 81], f16, stride=(783360, 261120, 5440, 85, 1)), T([8, 3, 48, 64, 81], f16)), {}) +cnt: 4, ((T([8, 3, 48, 64, 85], f16), T([8, 3, 48, 64, 85], f16)), {}) +cnt: 3, ((T([8, 3, 48, 64, 4], f16, stride=(783360, 261120, 5440, 85, 1)), T([8, 3, 48, 64, 4], f16)), {}) +cnt: 2, ((T([8, 3, 48, 64, 2], f16, stride=(783360, 261120, 5440, 85, 1)), T([8, 3, 48, 64, 2], f16)), {}) +cnt: 1, ((T([8, 3, 24, 32, 85], f16), T([8, 3, 24, 32, 85], f16, stride=(0, 0, 0, 0, 0))), {}) +cnt: 1, ((T([8, 3, 24, 32, 81], f16, stride=(195840, 65280, 2720, 85, 1)), T([8, 3, 24, 32, 81], f16)), {}) +cnt: 4, ((T([8, 3, 24, 32, 85], f16), T([8, 3, 24, 32, 85], f16)), {}) +cnt: 3, ((T([8, 3, 24, 32, 4], f16, stride=(195840, 65280, 2720, 85, 1)), T([8, 3, 24, 32, 4], f16)), {}) +cnt: 2, ((T([8, 3, 24, 32, 2], f16, stride=(195840, 65280, 2720, 85, 1)), T([8, 3, 24, 32, 2], f16)), {}) +cnt: 1, ((T([8, 3, 12, 16, 85], f16), T([8, 3, 12, 16, 85], f16, stride=(0, 0, 0, 0, 0))), {}) +cnt: 1, ((T([8, 3, 12, 16, 81], f16, stride=(48960, 16320, 1360, 85, 1)), T([8, 3, 12, 16, 81], f16)), {}) +cnt: 4, ((T([8, 3, 12, 16, 85], f16), T([8, 3, 12, 16, 85], f16)), {}) +cnt: 3, ((T([8, 3, 12, 16, 4], f16, stride=(48960, 16320, 1360, 85, 1)), T([8, 3, 12, 16, 4], f16)), {}) +cnt: 2, ((T([8, 3, 12, 16, 2], f16, stride=(48960, 16320, 1360, 85, 1)), T([8, 3, 12, 16, 2], f16)), {}) +Operator: aten.div.Tensor +cnt: 2, ((T([], f16), 8225280), {}) +cnt: 2, ((T([], f16), 391680), {}) +cnt: 2, ((T([], f16), 1566720), {}) +cnt: 2, ((T([], f16), 6266880), {}) +cnt: 2, ((T([], f16), 3), {}) +cnt: 2, ((T([], f16), 2), {}) +Operator: aten.exp.default +cnt: 1, ((T([8, 3, 12, 16, 2], f16, stride=(48960, 16320, 1360, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16, stride=(195840, 65280, 2720, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16, stride=(783360, 261120, 5440, 85, 1)),), {}) +Operator: aten.leaky_relu_.default +cnt: 1, ((T([8, 32, 384, 512], f16), 0.1), {}) +cnt: 2, ((T([8, 64, 192, 256], f16), 0.1), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), 0.1), {}) +cnt: 3, ((T([8, 128, 96, 128], f16), 0.1), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), 0.1), {}) +cnt: 12, ((T([8, 256, 48, 64], f16), 0.1), {}) +cnt: 11, ((T([8, 128, 48, 64], f16), 0.1), {}) +cnt: 12, ((T([8, 512, 24, 32], f16), 0.1), {}) +cnt: 11, ((T([8, 256, 24, 32], f16), 0.1), {}) +cnt: 8, ((T([8, 1024, 12, 16], f16), 0.1), {}) +cnt: 8, ((T([8, 512, 12, 16], f16), 0.1), {}) +cnt: 1, ((T([8, 256, 12, 16], f16), 0.1), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), 0.1), {}) +Operator: aten.leaky_relu_backward.default +cnt: 12, ((T([8, 256, 48, 64], f16), T([8, 256, 48, 64], f16), 0.1, True), {}) +cnt: 11, ((T([8, 128, 48, 64], f16), T([8, 128, 48, 64], f16), 0.1, True), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), T([8, 128, 24, 32], f16), 0.1, True), {}) +cnt: 12, ((T([8, 512, 24, 32], f16), T([8, 512, 24, 32], f16), 0.1, True), {}) +cnt: 11, ((T([8, 256, 24, 32], f16), T([8, 256, 24, 32], f16), 0.1, True), {}) +cnt: 1, ((T([8, 256, 12, 16], f16), T([8, 256, 12, 16], f16), 0.1, True), {}) +cnt: 8, ((T([8, 1024, 12, 16], f16), T([8, 1024, 12, 16], f16), 0.1, True), {}) +cnt: 8, ((T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16), 0.1, True), {}) +cnt: 3, ((T([8, 128, 96, 128], f16), T([8, 128, 96, 128], f16), 0.1, True), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), T([8, 64, 96, 128], f16), 0.1, True), {}) +cnt: 2, ((T([8, 64, 192, 256], f16), T([8, 64, 192, 256], f16), 0.1, True), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), T([8, 32, 192, 256], f16), 0.1, True), {}) +cnt: 1, ((T([8, 32, 384, 512], f16), T([8, 32, 384, 512], f16), 0.1, True), {}) +Operator: aten.max_pool2d_with_indices.default +cnt: 1, ((T([8, 512, 12, 16], f16), [5, 5], [1, 1], [2, 2]), {}) +cnt: 1, ((T([8, 512, 12, 16], f16), [9, 9], [1, 1], [4, 4]), {}) +cnt: 1, ((T([8, 512, 12, 16], f16), [13, 13], [1, 1], [6, 6]), {}) +Operator: aten.max_pool2d_with_indices_backward.default +cnt: 1, ((T([8, 512, 12, 16], f16, stride=(393216, 192, 16, 1)), T([8, 512, 12, 16], f16), [13, 13], [1, 1], [6, 6], [1, 1], False, T([8, 512, 12, 16], i64)), {}) +cnt: 1, ((T([8, 512, 12, 16], f16, stride=(393216, 192, 16, 1)), T([8, 512, 12, 16], f16), [9, 9], [1, 1], [4, 4], [1, 1], False, T([8, 512, 12, 16], i64)), {}) +cnt: 1, ((T([8, 512, 12, 16], f16, stride=(393216, 192, 16, 1)), T([8, 512, 12, 16], f16), [5, 5], [1, 1], [2, 2], [1, 1], False, T([8, 512, 12, 16], i64)), {}) +Operator: aten.mul.Tensor +cnt: 1, ((T([8, 3, 12, 16, 2], f16), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 48, 64, 4], f16), 8), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f32), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16), T([8, 3, 48, 64, 2], f16)), {}) +cnt: 1, ((T([8, 3, 24, 32, 4], f16), 16), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f32), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), T([8, 3, 24, 32, 2], f16)), {}) +cnt: 1, ((T([8, 3, 12, 16, 4], f16), 32), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f32), T([1, 3, 1, 1, 2], f32)), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f16), T([8, 3, 12, 16, 2], f16)), {}) +Operator: aten.mul_.Tensor +cnt: 1, ((T([8, 3, 12, 16, 4], f16, stride=(48960, 16320, 1360, 85, 1)), 32), {}) +cnt: 1, ((T([8, 3, 24, 32, 4], f16, stride=(195840, 65280, 2720, 85, 1)), 16), {}) +cnt: 1, ((T([8, 3, 48, 64, 4], f16, stride=(783360, 261120, 5440, 85, 1)), 8), {}) +Operator: aten.native_batch_norm.default +cnt: 1, ((T([8, 32, 384, 512], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.03, 0.0001), {}) +cnt: 2, ((T([8, 64, 192, 256], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.03, 0.0001), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), False, 0.03, 0.0001), {}) +cnt: 3, ((T([8, 128, 96, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.03, 0.0001), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), False, 0.03, 0.0001), {}) +cnt: 12, ((T([8, 256, 48, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.03, 0.0001), {}) +cnt: 11, ((T([8, 128, 48, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.03, 0.0001), {}) +cnt: 12, ((T([8, 512, 24, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.03, 0.0001), {}) +cnt: 11, ((T([8, 256, 24, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.03, 0.0001), {}) +cnt: 8, ((T([8, 1024, 12, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f16), False, 0.03, 0.0001), {}) +cnt: 8, ((T([8, 512, 12, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), False, 0.03, 0.0001), {}) +cnt: 1, ((T([8, 256, 12, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), False, 0.03, 0.0001), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), False, 0.03, 0.0001), {}) +Operator: aten.native_batch_norm_backward.default +cnt: 12, ((T([8, 256, 48, 64], f16), T([8, 256, 48, 64], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 0.0001, [True, True, True]), {}) +cnt: 11, ((T([8, 128, 48, 64], f16), T([8, 128, 48, 64], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 0.0001, [True, True, True]), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), T([8, 128, 24, 32], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 0.0001, [True, True, True]), {}) +cnt: 12, ((T([8, 512, 24, 32], f16), T([8, 512, 24, 32], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 0.0001, [True, True, True]), {}) +cnt: 11, ((T([8, 256, 24, 32], f16), T([8, 256, 24, 32], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 0.0001, [True, True, True]), {}) +cnt: 1, ((T([8, 256, 12, 16], f16), T([8, 256, 12, 16], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), False, 0.0001, [True, True, True]), {}) +cnt: 8, ((T([8, 1024, 12, 16], f16), T([8, 1024, 12, 16], f16), T([1024], f16), T([1024], f16), T([1024], f16), T([1024], f32), T([1024], f32), False, 0.0001, [True, True, True]), {}) +cnt: 8, ((T([8, 512, 12, 16], f16), T([8, 512, 12, 16], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), False, 0.0001, [True, True, True]), {}) +cnt: 3, ((T([8, 128, 96, 128], f16), T([8, 128, 96, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), False, 0.0001, [True, True, True]), {}) +cnt: 2, ((T([8, 64, 96, 128], f16), T([8, 64, 96, 128], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 0.0001, [True, True, True]), {}) +cnt: 2, ((T([8, 64, 192, 256], f16), T([8, 64, 192, 256], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), False, 0.0001, [True, True, True]), {}) +cnt: 1, ((T([8, 32, 192, 256], f16), T([8, 32, 192, 256], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 0.0001, [True, True, True]), {}) +cnt: 1, ((T([8, 32, 384, 512], f16), T([8, 32, 384, 512], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), False, 0.0001, [True, True, True]), {}) +Operator: aten.new_empty_strided.default +cnt: 1, ((T([8, 3, 48, 64, 85], f16, stride=(0, 0, 0, 0, 0)), [8, 3, 48, 64, 85], [783360, 261120, 5440, 85, 1]), {}) +cnt: 4, ((T([8, 3, 48, 64, 85], f16), [8, 3, 48, 64, 85], [783360, 261120, 5440, 85, 1]), {}) +cnt: 1, ((T([8, 3, 24, 32, 85], f16, stride=(0, 0, 0, 0, 0)), [8, 3, 24, 32, 85], [195840, 65280, 2720, 85, 1]), {}) +cnt: 4, ((T([8, 3, 24, 32, 85], f16), [8, 3, 24, 32, 85], [195840, 65280, 2720, 85, 1]), {}) +cnt: 1, ((T([8, 3, 12, 16, 85], f16, stride=(0, 0, 0, 0, 0)), [8, 3, 12, 16, 85], [48960, 16320, 1360, 85, 1]), {}) +cnt: 4, ((T([8, 3, 12, 16, 85], f16), [8, 3, 12, 16, 85], [48960, 16320, 1360, 85, 1]), {}) +Operator: aten.new_zeros.default +cnt: 1, ((T([8, 3, 48, 64, 4], f16), [6266880]), {}) +cnt: 1, ((T([8, 3, 24, 32, 4], f16), [1566720]), {}) +cnt: 1, ((T([8, 3, 12, 16, 4], f16), [391680]), {}) +Operator: aten.sigmoid.default +cnt: 1, ((T([8, 3, 12, 16, 2], f16, stride=(48960, 16320, 1360, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16, stride=(195840, 65280, 2720, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16, stride=(783360, 261120, 5440, 85, 1)),), {}) +Operator: aten.sigmoid_.default +cnt: 1, ((T([8, 3, 12, 16, 81], f16, stride=(48960, 16320, 1360, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 24, 32, 81], f16, stride=(195840, 65280, 2720, 85, 1)),), {}) +cnt: 1, ((T([8, 3, 48, 64, 81], f16, stride=(783360, 261120, 5440, 85, 1)),), {}) +Operator: aten.sigmoid_backward.default +cnt: 1, ((T([8, 3, 48, 64, 81], f16), T([8, 3, 48, 64, 81], f16, stride=(783360, 261120, 5440, 85, 1))), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16), T([8, 3, 48, 64, 2], f16)), {}) +cnt: 1, ((T([8, 3, 24, 32, 81], f16), T([8, 3, 24, 32, 81], f16, stride=(195840, 65280, 2720, 85, 1))), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), T([8, 3, 24, 32, 2], f16)), {}) +cnt: 1, ((T([8, 3, 12, 16, 81], f16), T([8, 3, 12, 16, 81], f16, stride=(48960, 16320, 1360, 85, 1))), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f16), T([8, 3, 12, 16, 2], f16)), {}) +Operator: aten.slice_backward.default +cnt: 1, ((T([8, 3, 48, 64, 2], f16), [8, 3, 48, 64, 85], 4, 2, 4, 1), {}) +cnt: 1, ((T([8, 3, 48, 64, 2], f16), [8, 3, 48, 64, 85], 4, 0, 2, 1), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), [8, 3, 24, 32, 85], 4, 2, 4, 1), {}) +cnt: 1, ((T([8, 3, 24, 32, 2], f16), [8, 3, 24, 32, 85], 4, 0, 2, 1), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f16), [8, 3, 12, 16, 85], 4, 2, 4, 1), {}) +cnt: 1, ((T([8, 3, 12, 16, 2], f16), [8, 3, 12, 16, 85], 4, 0, 2, 1), {}) +Operator: aten.stack.default +cnt: 1, (([T([12, 16], i64, stride=(0, 1)), T([12, 16], i64, stride=(1, 0))], 2), {}) +cnt: 1, (([T([24, 32], i64, stride=(0, 1)), T([24, 32], i64, stride=(1, 0))], 2), {}) +cnt: 1, (([T([48, 64], i64, stride=(0, 1)), T([48, 64], i64, stride=(1, 0))], 2), {}) +Operator: aten.sum.default +cnt: 1, ((T([8, 12096, 85], f16),), {}) +cnt: 1, ((T([8, 3, 12, 16, 85], f16),), {}) +cnt: 1, ((T([8, 3, 24, 32, 85], f16),), {}) +cnt: 1, ((T([8, 3, 48, 64, 85], f16),), {}) +Operator: aten.upsample_nearest2d.vec +cnt: 1, ((T([8, 256, 12, 16], f16), None, [2.0, 2.0]), {}) +cnt: 1, ((T([8, 128, 24, 32], f16), None, [2.0, 2.0]), {}) +Operator: aten.upsample_nearest2d_backward.vec +cnt: 1, ((T([8, 128, 48, 64], f16, stride=(1179648, 3072, 64, 1)), None, [8, 128, 24, 32], [2.0, 2.0]), {}) +cnt: 1, ((T([8, 256, 24, 32], f16, stride=(589824, 768, 32, 1)), None, [8, 256, 12, 16], [2.0, 2.0]), {}) diff --git a/torchbenchmark/operator_loader/operator_inp_utils.py b/torchbenchmark/operator_loader/operator_inp_utils.py new file mode 100644 index 000000000..d0b7621c7 --- /dev/null +++ b/torchbenchmark/operator_loader/operator_inp_utils.py @@ -0,0 +1,346 @@ +import functools +import logging +import math +import os +from collections import Counter, defaultdict +from functools import partial +from typing import Any, Dict, Generator, Iterable, Tuple + +import torch +from torch.testing import make_tensor +from torch.utils import _pytree as pytree +from torch.utils._python_dispatch import TorchDispatchMode +from torch.utils._pytree import tree_map + + +log = logging.getLogger(__name__) + +OP_INP_DIRECTORY = os.path.join(os.path.dirname(__file__), "operator_inp_logs") + +TIMM_DIR = os.path.join(OP_INP_DIRECTORY, "timm_train") +HF_DIR = os.path.join(OP_INP_DIRECTORY, "hf_train") +TORCHBENCH_DIR = os.path.join(OP_INP_DIRECTORY, "torchbench_train") + +aten = torch.ops.aten +tensor_type = torch._C.TensorType.get() + +dtype_abbrs = { + torch.bfloat16: "bf16", + torch.float64: "f64", + torch.float32: "f32", + torch.float16: "f16", + torch.complex32: "c32", + torch.complex64: "c64", + torch.complex128: "c128", + torch.int8: "i8", + torch.int16: "i16", + torch.int32: "i32", + torch.int64: "i64", + torch.bool: "b8", + torch.uint8: "u8", +} + +dtype_abbrs_parsing = {value: key for key, value in dtype_abbrs.items()} + + +def truncate_inp(arg): + if arg in dtype_abbrs: + return dtype_abbrs[arg] + elif isinstance(arg, torch.device): + return arg.type + else: + return arg + + +# Serialize Function Call +class FuncCallWrapper: + def __init__(self, call, *args, **kwargs): + self.call = call + self.args = tree_map(truncate_inp, args) + self.kwargs = tree_map(truncate_inp, kwargs) if kwargs is not None else {} + + def __repr__(self): + args = ", ".join([repr(arg) for arg in self.args]) + kwargs = "".join( + [f", {str(key)}={value}" for key, value in self.kwargs.items()] + ) + out = f"{self.call}({args}{kwargs})".strip('"') + # f strings introduce quotations we dont want + for key in dtype_abbrs_parsing: + out = out.replace(f"'{key}'", key) + return out + + +def serialize_sparse_tensor(e): + if isinstance(e, torch._subclasses.FakeTensor): + return FuncCallWrapper("ST", list(e.shape), e.dtype, e.layout, e.is_coalesced()) + else: + return FuncCallWrapper( + "ST", list(e.shape), e.dtype, e.layout, e.is_coalesced(), e._nnz() + ) + + +def deserialize_sparse_tensor(size, dtype, layout, is_coalesced, nnz=None): + raise NotImplementedError + + +def deserialize_tensor(size, dtype, stride=None): + if stride is not None: + out = torch.empty_strided(size, stride, dtype=dtype) + else: + out = torch.empty(size, dtype=dtype) + try: + out.copy_(make_tensor(size, dtype=dtype, device="cpu")) + except Exception as e: + print(e) + return out + return out + + +def serialize_tensor(e): + if not e.is_contiguous(): + return FuncCallWrapper("T", list(e.shape), e.dtype, stride=e.stride()) + else: + return FuncCallWrapper("T", list(e.shape), e.dtype) + + +def serialize_torch_args(e): + if isinstance(e, torch.Tensor): + if e.is_sparse: + return serialize_sparse_tensor(e) + return serialize_tensor(e) + else: + return truncate_inp(e) + + +def contains_tensor(elems): + for elem in pytree.tree_leaves(elems): + if isinstance(elem, torch.Tensor): + return True + return False + + +def skip_args(elems): + for i in pytree.tree_leaves(elems): + # only shows up in constructors and ops like that + if isinstance(i, (torch.memory_format, torch.storage.UntypedStorage)): + return True + return False + + +def contains_tensor_types(type): + return type.isSubtypeOf(tensor_type) or any( + contains_tensor_types(e) for e in type.containedTypes() + ) + + +@functools.lru_cache(None) +def non_compute_operator(op): + schema = op._schema + + # skip constructors + if not any(contains_tensor_types(arg.type) for arg in schema.arguments): + return True + if "_like" in op.name(): + return True + + # allow in place writes + if schema.is_mutable: + return False + + tensor_inps = [arg for arg in schema.arguments if arg.type is tensor_type] + tensor_outputs = [ret for ret in schema.returns if ret.type is tensor_type] + + # skip aliasing unless there are multiple outputs + if len(tensor_outputs) != 1: + return False + + for inp in tensor_inps: + if inp.alias_info and tensor_outputs[0].alias_info: + if inp.alias_info.before_set.intersection( + tensor_outputs[0].alias_info.after_set + ): + return True + + return False + + +class OperatorInputsMode(TorchDispatchMode): + def __init__(self, func_db=None): + self.func_db = defaultdict(Counter) if func_db is None else func_db + + def __torch_dispatch__(self, func_overload, types, args=(), kwargs=None): + kwargs = kwargs if kwargs else {} + arg_meta, kwarg_meta = tree_map(serialize_torch_args, (args, kwargs)) + + out = func_overload(*args, **kwargs) + + inps = (args, kwargs) + if contains_tensor(inps) and not skip_args(inps) and contains_tensor(out): + serialized_str = repr((arg_meta, kwarg_meta)) + self.func_db[str(func_overload)][serialized_str] += 1 + + return out + + def log_to_file(self, output_filename, *, skip_non_compute_operators=True): + sorted_operators = sorted(self.func_db.keys()) + with open(output_filename, "w") as f: + for operator in sorted_operators: + if skip_non_compute_operators and non_compute_operator(eval(operator)): + continue + f.write(f"Operator: {operator}\n") + operator_inputs = self.func_db[operator] + for inps, count in operator_inputs.items(): + f.write(f"cnt: {count}, ") + # repr will add quotation marks around the dtype strings + for dtype_abbr in dtype_abbrs.values(): + inps = inps.replace("'" + dtype_abbr + "'", dtype_abbr) + f.write(inps) + f.write("\n") + + +def map_to_device(e, device): + if isinstance(e, torch.Tensor): + return e.to(device) + elif isinstance(e, torch.device): + return device + elif isinstance(e, str): + if e == "cuda" or e == "cpu": + return device.type + else: + return e + + +def map_to_dtype(e, dtype): + if isinstance(e, torch.Tensor) and e.is_floating_point(): + return e.to(dtype) + elif isinstance(e, torch.dtype): + return dtype + else: + return e + + +def deserialize_args(inps): + inps = inps.strip().strip("'") + global_vals = { + "T": deserialize_tensor, + "ST": deserialize_sparse_tensor, + "th": torch, + "inf": math.inf, + "torch": torch, + **dtype_abbrs_parsing, + } + # f strings introduce quotations we dont want + for key in dtype_abbrs_parsing: + inps = inps.replace(f"'{key}'", key) + return eval(inps.strip().strip("'").strip('"'), global_vals) + + +class OperatorInputsLoader: + def __init__(self, json_file_path): + self.operator_db = defaultdict(Counter) + + with open(json_file_path) as f: + lines = f.readlines() + + i = 0 + while i < len(lines): + op_line = lines[i].strip("\n") + assert "Operator: " in op_line, op_line + operator = op_line[len("Operator: ") :] + operator = ( + operator if operator != "aten.sum.SymInt" else "aten.sum.dim_IntList" + ) + op_inps = Counter() + i += 1 + while i < len(lines) and "Operator: " not in lines[i]: + line = lines[i] + cnt = eval(line[len("cnt: ") : line.find(",")]) + inps = line[line.find(",") + 2 :].strip("'") + op_inps[inps] += cnt + i += 1 + self.operator_db[operator] = op_inps + + def get_inputs_for_operator( + self, operator, dtype=None, device="cuda" + ) -> Generator[Tuple[Iterable[Any], Dict[str, Any]], None, None]: + assert ( + str(operator) in self.operator_db + ), f"Could not find {operator}, must provide overload" + + if "embedding" in str(operator): + log.warning("Embedding inputs NYI, input data cannot be randomized") + yield + return + + # line[1] represents number of times these inputs occured, ignored for now + for line in self.operator_db[str(operator)].items(): + inps = line[0] + + args, kwargs = deserialize_args(inps) + + # Backwards require some inputs to be float16 and some to be float32 + # So we record on half and upcast to float when specified + if dtype and dtype != torch.float16: + to_dtype = partial(map_to_dtype, dtype=dtype) + args, kwargs = tree_map(to_dtype, (args, kwargs)) + + if device: + to_device = partial(map_to_device, device=torch.device(device)) + args, kwargs = tree_map(to_device, (args, kwargs)) + + yield args, kwargs + + def get_all_ops(self): + for key in self.operator_db.keys(): + try: + op = eval(key) + except AttributeError as ae: + log.warning("Evaluating an op name into an OpOverload: %s", ae) + continue + yield op + + def get_call_frequency(self, op): + assert ( + str(op) in self.operator_db + ), f"Could not find {op}, must provide overload" + + count = 0 + for counter in self.operator_db[str(op)].values(): + count += counter + return count + + def merge(self, other): + for operator, counter_dict in other.operator_db.items(): + for inps, cnt in counter_dict.items(): + self.operator_db[operator][inps] += cnt + + @staticmethod + def get_timm_loader(): + return OperatorInputsLoader._load_directory(TIMM_DIR) + + @staticmethod + def get_huggingface_loader(): + return OperatorInputsLoader._load_directory(HF_DIR) + + @staticmethod + def get_torchbench_loader(): + return OperatorInputsLoader._load_directory(TORCHBENCH_DIR) + + @staticmethod + def _load_directory(inp_dir): + assert os.path.isdir(inp_dir), inp_dir + union = None + for inp in os.listdir(inp_dir): + if inp[-4:] != ".txt": + continue + path = os.path.join(inp_dir, inp) + if union is None: + union = OperatorInputsLoader(path) + else: + union.merge(OperatorInputsLoader(path)) + return union + + +def to_channels_last(ten): + return ten if ten.ndim != 4 else ten.to(memory_format=torch.channels_last) diff --git a/torchbenchmark/operator_loader/operatorbench.py b/torchbenchmark/operator_loader/operatorbench.py new file mode 100644 index 000000000..f92409362 --- /dev/null +++ b/torchbenchmark/operator_loader/operatorbench.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python3 +import csv +import itertools +import sys +import time +import warnings +from contextlib import nullcontext + +import click +import numpy as np + +import torch +from operator_inp_utils import OperatorInputsLoader +from torch._dynamo.backends.cudagraphs import cudagraphs_inner +from torch._dynamo.testing import same +from torch._inductor.compile_fx import compile_fx +from torch._inductor.decomposition import decompositions +from torch._inductor.lowering import lowerings +from torch._inductor.runtime.benchmarking import benchmarker +from torch._inductor.utils import gen_gm_and_inputs +from torch.utils._pytree import tree_map_only +from tqdm import tqdm + + +aten = torch.ops.aten +profile_enabled = False +inductor_config_options = { + "halide": {"cpu_backend": "halide", "cuda_backend": "halide"}, + "autotune": { + "max_autotune_pointwise": True, + "max_autotune": True, + "max_autotune_gemm": True, + "coordinate_descent_tuning": True, + }, +} + + +def maybe_record_function(name): + return torch.profiler.record_function(name) if profile_enabled else nullcontext() + + +def compute_speedups( + operator, models, example_inputs, repeats, accuracy_checking=False, device="cuda" +): + expected = models[0](*example_inputs) + if accuracy_checking: + for model in models[1:]: + actual = model(*example_inputs) + # change to assert later + try: + same(actual, expected, cos_similarity=True, equal_nan=True) + except AssertionError as e: + print(e) + print(f"Accuracy check failed: {operator}") + print((expected[0] - actual[0]).abs().max()) + + timings = np.zeros((repeats, len(models)), np.float64) + for rep in range(repeats): + with maybe_record_function(f"rep_{rep}"): + # interleave the runs to handle frequency scaling and load changes + for m, model in enumerate(models): + with maybe_record_function(f"model_{m}"): + if device == "cuda": + model(*example_inputs) + + # benchmarker.benchmark_gpu() clears L2 cache to hide the latency of CPU launch time + # along with cuda synchronization + timings[rep, m] = benchmarker.benchmark_gpu( + lambda: model(*example_inputs) + ) + else: + from torch._inductor.utils import timed + + timings[rep, m] = timed(model, example_inputs) + return np.median(timings, axis=0) + + +def strip_overloads(gm): + """ + Modifies the target of graph nodes in :attr:`gm` to strip overloads. + Args: + gm(fx.GraphModule): The input Fx graph module to be modified + """ + for node in gm.graph.nodes: + if isinstance(node.target, torch._ops.OpOverload): + node.target = node.target.overloadpacket + gm.recompile() + + +def convert_to_jit(gm, gm_args): + strip_overloads(gm) + try: + return torch.jit.script(gm) + except Exception: + pass + return torch.jit.trace(gm, gm_args) + + +def to_channels_last(ten): + return ten if ten.ndim != 4 else ten.to(memory_format=torch.channels_last) + + +def microbenchmark( + operator, + args, + kwargs, + accuracy_checking, + repeats, + inductor_configs, + measure_nvfuser, + device, +): + gm, gm_args = gen_gm_and_inputs(operator, args, kwargs) + torch.jit._builtins._register_builtin( + torch.ops.aten.convolution_backward.default, "aten::convolution_backward" + ) + compiled = [gm] + for config in inductor_configs: + t = -time.perf_counter() + compiled.append(compile_fx(gm, gm_args, config_patches=config)) + t += time.perf_counter() + if t > 10: + print(f"slow compile inductor {t:.1f}s {config}") + + if measure_nvfuser: + g = convert_to_jit(gm, gm_args) + cudagraphs_jit = cudagraphs_inner( + g, gm_args, copy_outputs=False, copy_inputs=False + ) + compiled += [cudagraphs_jit] + if accuracy_checking: + repeats = 1 + + medians = compute_speedups( + operator, compiled, gm_args, repeats, accuracy_checking, device + ) + return medians + + +quantiles_thresholds = (0.2, 0.5, 0.8) + + +def quantiles(timings): + return np.quantile(timings, quantiles_thresholds).tolist() + + +def skip_operator(operator): + nyi_strings = ( + "aten.gather.default", + "nll_loss", + "aten.index", + "aten.scatter_", + "masked_fill_.Scalar", + ) + + if any(nyi_string in str(operator) for nyi_string in nyi_strings): + # maybe disable aten.native_layer_norm.default + # TODO - inputs cannot be randomly initialized, causes cyda failures + print(f"Skipping {operator}, input generator nyi") + return True + + # not covered by other non-compute operator heuristics + if operator == torch.ops.aten._unsafe_view.default: + print(f"Skipping {operator}, non compute operator") + return True + + # some of inductor registered to the OpOverload, some registered to OpOverloadPacket + op_impls = [operator] + if isinstance(operator, torch._ops.OpOverload): + op_impls.append(operator.overloadpacket) + + # TODO - skip benchmarking fallbacks. for some ops we have both lowerings and fallbacks + # so its not clear just from operator what will be lowered. + + if all(op not in decompositions and op not in lowerings for op in op_impls): + print(f"Skipping {operator}, no inductor impl") + return True + + if "convolution" in str(operator): + return True + + return False + + +@click.command() +@click.option( + "--suite", + help="suite to load inps from: options: timm, huggingface, torchbench", + default="torchbench", +) +@click.option("--op", help="operator overload to benchmark", default="all") +@click.option("--dtype", help="dtype to benchmark", default="float32") +@click.option("--max-samples", help="max samples per op", default=15) +@click.option("--accuracy-checking", help="check accuracy", default=False) +@click.option( + "--repeats", help="how many times to repeat for perf measurement", default=3 +) +@click.option( + "--inductor-config", + multiple=True, + help="Custom inductor config, options: " + ", ".join(inductor_config_options), +) +@click.option( + "--measure-nvfuser/--no-measure-nvfuser", + help="default we only measure inductor", + default=False, +) +@click.option("--device", help="cpu or cuda", default="cuda") +@click.option("--inp-file", help="use custom input file instead of suite", default=None) +@click.option("--start-idx", help="specify start index of samples", default=0) +@click.option( + "--channels-last", help="force inputs to channels last", is_flag=True, default=False +) +@click.option("--profile", help="profile the benchmark", is_flag=True, default=False) +def benchmark( + suite, + op, + dtype, + max_samples, + accuracy_checking, + repeats, + inductor_config, + measure_nvfuser, + device, + inp_file, + start_idx, + channels_last, + profile, +): + warnings.filterwarnings("ignore", module="torch.jit._check") + torch.set_float32_matmul_precision("high") + global profile_enabled + + if inp_file is not None: + loader = OperatorInputsLoader(inp_file) + else: + assert suite in ("timm", "huggingface", "torchbench"), f"got {suite}" + if suite == "timm": + loader = OperatorInputsLoader.get_timm_loader() + elif suite == "huggingface": + loader = OperatorInputsLoader.get_huggingface_loader() + else: + loader = OperatorInputsLoader.get_torchbench_loader() + + assert dtype in ("float16", "float32"), f"got {dtype}" + + inductor_configs = [{}] + backend_names = ["inductor"] + for name in inductor_config or (): + backend_names.append(name) + inductor_configs.append(inductor_config_options[name]) + if measure_nvfuser: + backend_names.append("nvfuser") + + compare2 = len(backend_names) == 2 + if compare2: + a, b = backend_names + backend_names.append(f"{a}/{b}") + + output_fd = None + output_csv = None + if op == "all": + filename = f"operatorbench_{suite}_{dtype}.csv" + output_fd = open(filename, "w") + output_csv = csv.writer(output_fd) + output_csv.writerow( + [ + "operator", + *[ + f"{a} {b}" + for a, b in itertools.product( + backend_names, + [f"{x * 100:.0f}th" for x in quantiles_thresholds], + ) + ], + "elapsed", + *map("{} abs".format, ["eager", *backend_names]), + ] + ) + + dtype = torch.float16 if dtype == "float16" else torch.float32 + + if op == "all": + ops = loader.get_all_ops() + else: + ops = [eval(op)] + + max_samples = max_samples + start_idx + profile_enabled = profile + + for operator in ops: + if skip_operator(operator): + continue + start = time.perf_counter() + inp_gen = loader.get_inputs_for_operator(operator, dtype=dtype, device=device) + timings = [] + inputs_list = [] + for _ in range(min(max_samples, 1000000)): + try: + inps = next(inp_gen) + inputs_list.append(inps) + except StopIteration: + break + + profiler_context = ( + torch.profiler.profile( + activities=[ + torch.profiler.ProfilerActivity.CPU, + torch.profiler.ProfilerActivity.CUDA, + ], + record_shapes=False, + profile_memory=False, + on_trace_ready=torch.profiler.tensorboard_trace_handler( + f"./log/operator_{operator}", use_gzip=True + ), + ) + if profile_enabled + else nullcontext() + ) + with profiler_context: + for i, inps in enumerate(tqdm(inputs_list[start_idx:], desc=str(operator))): + if inps is None: + break + args, kwargs = inps + if channels_last: + args, kwargs = tree_map_only( + torch.Tensor, to_channels_last, (args, kwargs) + ) + try: + with maybe_record_function(f"iter_{i}"): + # aten, nvfuser, inductor + timings.append( + microbenchmark( + operator, + args, + kwargs, + accuracy_checking, + repeats, + inductor_configs, + measure_nvfuser, + device, + ) + ) + except Exception as e: + print(f"error {operator} input {i}: {type(e).__name__}: {e}") + # comment out this line to avoid blocking other tests + # raise e + + if not timings: + continue + + timings = np.stack(timings) + speedups = [ + quantiles(timings[:, 0] / timings[:, x]) for x in range(1, timings.shape[1]) + ] + if compare2: + speedups.append(quantiles(timings[:, 1] / timings[:, 2])) + assert len(backend_names) == len(speedups) + + row = [f"{operator}"] + sys.stdout.write(f"{operator}: ") + for backend, (low, mid, high) in zip(backend_names, speedups): + sys.stdout.write(f"{backend}={mid:.4f}x ({low:.4f}-{high:.4f}) ") + row.extend(map("{:.6f}".format, [low, mid, high])) + elapsed = time.perf_counter() - start + row.append(f"{elapsed:1f}") + row.extend(map("{:.8f}".format, np.mean(timings, axis=0).tolist())) + sys.stdout.write(f"took {elapsed:.0f}s\n") + sys.stdout.flush() + if output_csv: + output_csv.writerow(row) + output_fd.flush() + + if output_fd: + print(f"Wrote {filename}") + output_fd.close() + + +if __name__ == "__main__": + benchmark() diff --git a/torchbenchmark/util/triton_op.py b/torchbenchmark/util/triton_op.py index f696998f6..f9297078f 100644 --- a/torchbenchmark/util/triton_op.py +++ b/torchbenchmark/util/triton_op.py @@ -158,10 +158,15 @@ def _split_params_by_comma(params: Optional[str]) -> List[str]: def _find_op_name_from_module_path(module_path: str) -> str: PATH_PREFIX = "torchbenchmark.operators." + # We have a separate operator loader for aten operator benchmark. + PATH_PREFIX_LOADER = "torchbenchmark.operator_loader." assert ( - PATH_PREFIX in module_path + PATH_PREFIX in module_path or PATH_PREFIX_LOADER in module_path ), f"We rely on module path prefix to identify operator name. Expected {PATH_PREFIX}, get {module_path}." - suffix = module_path.partition(PATH_PREFIX)[2] + if PATH_PREFIX_LOADER in module_path: + suffix = module_path.partition(PATH_PREFIX_LOADER)[2] + else: + suffix = module_path.partition(PATH_PREFIX)[2] if suffix.startswith("fb."): return suffix.split(".")[1] return suffix.split(".")[0] @@ -401,6 +406,42 @@ def _inner(self, *args, **kwargs): return decorator +def register_benchmark_mannually( + operator_name: str, + func_name: str, + baseline: bool = False, + enabled: bool = True, + label: Optional[str] = None, +): + """ + Manually register a benchmark function for a given operator. + + Args: + operator_name (str): The name of the operator for which the benchmark is being registered. + func_name (str): The name of the benchmark function to register. eager or + inductor for aten op benchmark. + baseline (bool, optional): If True, this benchmark function is considered the baseline. Defaults to False. + enabled (bool, optional): If True, this benchmark function is enabled. Defaults to True. + label (Optional[str], optional): An optional label for the benchmark function. Defaults to None. + + This function updates the global dictionaries REGISTERED_BENCHMARKS, BASELINE_BENCHMARKS, + and ENABLED_BENCHMARKS to include the new benchmark function. If the operator or function + is already registered, it updates the existing entries. + + We need this manually register function because decorator doesn't work for + dynamically created classes (operator_loader/__init__.py). + """ + if not operator_name in REGISTERED_BENCHMARKS: + REGISTERED_BENCHMARKS[operator_name] = OrderedDict() + REGISTERED_BENCHMARKS[operator_name][func_name] = func_name if not label else label + if baseline: + BASELINE_BENCHMARKS[operator_name] = func_name + if enabled: + if not operator_name in ENABLED_BENCHMARKS: + ENABLED_BENCHMARKS[operator_name] = [] + ENABLED_BENCHMARKS[operator_name].append(func_name) + + def register_metric( # Metrics that only apply to non-baseline impls # E.g., accuracy, speedup diff --git a/userbenchmark/triton/run.py b/userbenchmark/triton/run.py index 7ea660086..d9ac57e6a 100644 --- a/userbenchmark/triton/run.py +++ b/userbenchmark/triton/run.py @@ -5,6 +5,7 @@ from typing import List from torch import version as torch_version +from torchbenchmark.operator_loader import load_opbench_by_name_from_loader from torchbenchmark.operators import load_opbench_by_name from torchbenchmark.util.triton_op import ( @@ -133,6 +134,12 @@ def get_parser(args=None): action="store_true", help="Lock down GPU frequency and clocks to avoid throttling.", ) + parser.add_argument( + "--operator-loader", + action="store_true", + help="Benchmarking aten ops in torchbenchmark/operator_loader.", + ) + if not hasattr(torch_version, "git_version"): parser.add_argument("--log-scuba", action="store_true", help="Log to scuba.") @@ -145,7 +152,10 @@ def get_parser(args=None): def _run(args: argparse.Namespace, extra_args: List[str]) -> BenchmarkOperatorResult: - Opbench = load_opbench_by_name(args.op) + if args.operator_loader: + Opbench = load_opbench_by_name_from_loader(args) + else: + Opbench = load_opbench_by_name(args.op) if args.fwd_bwd: args.mode = "fwd_bwd" if args.bwd: