Skip to content

Commit

Permalink
Merge branch 'trt_8' into qat
Browse files Browse the repository at this point in the history
  • Loading branch information
peri044 authored Jul 23, 2021
2 parents 2c234f7 + 1d611b7 commit 715120f
Show file tree
Hide file tree
Showing 133 changed files with 9,779 additions and 2,193 deletions.
30 changes: 17 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ More Information / System Architecture:
#include "trtorch/trtorch.h"

...
auto compile_settings = trtorch::CompileSpec(dims);
// Set input datatypes. Allowerd options torch::{kFloat, kHalf, kChar, kInt32, kBool}
// Size of input_dtypes should match number of inputs to the network.
// If input_dtypes is not set, default precision follows traditional PyT / TRT rules
auto input = trtorch::CompileSpec::Input(dims, torch::kHalf)
auto compile_settings = trtorch::CompileSpec({input});
// FP16 execution
compile_settings.op_precision = torch::kFloat;
compile_settings.enabled_precisions = {torch::kHalf};
// Compile module
auto trt_mod = trtorch::CompileGraph(ts_mod, compile_settings);
// Run like normal
Expand All @@ -36,14 +40,14 @@ import trtorch
...
compile_settings = {
"input_shapes": [
{
"min": [1, 3, 224, 224],
"opt": [1, 3, 512, 512],
"max": [1, 3, 1024, 1024]
}, # For static size [1, 3, 224, 224]
],
"op_precision": torch.half # Run with FP16
"inputs": [trtorch.Input(
min_shape=[1, 3, 224, 224],
opt_shape=[1, 3, 512, 512],
max_shape=[1, 3, 1024, 1024]
# For static size shape=[1, 3, 224, 224]
dtype=torch.half, # Datatype of input tensor. Allowed options torch.(float|half|int8|int32|bool)
)],
"enabled_precision": {torch.half}, # Run with FP16
}
trt_ts_module = trtorch.compile(torch_script_module, compile_settings)
Expand All @@ -54,9 +58,9 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
```

> Notes on running in lower precisions:
> - Set precision with compile_spec.op_precision
> - Enabled lower precisions with compile_spec.enabled_precisions
> - The module should be left in FP32 before compilation (FP16 can support half tensor models)
> - In FP16 only input tensors should be converted to FP16, other precisions use FP32
> - In FP16 only input tensors by default should be FP16, other precisions use FP32. This can be overrided by setting Input::dtype
## Platform Support

Expand All @@ -77,7 +81,7 @@ These are the following dependencies used to verify the testcases. TRTorch can w
- Libtorch 1.8.1 (built with CUDA 11.1)
- CUDA 11.1 (10.2 on Jetson)
- cuDNN 8.1
- TensorRT 7.2.3
- TensorRT 8.0.1.6

## Prebuilt Binaries and Wheel files

Expand Down
2 changes: 1 addition & 1 deletion WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ http_archive(
name = "tensorrt",
build_file = "@//third_party/tensorrt/archive:BUILD",
sha256 = "def6a5ee50bed25a68a9c9e22ec671a8f29ee5414bde47c5767bd279e5596f88",
strip_prefix = "TensorRT-7.2.3.4",
strip_prefix = "TensorRT-8.0.1.6",
urls = [
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.1/tars/tensorrt-8.0.1.6.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
],
Expand Down
10 changes: 5 additions & 5 deletions core/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
LOG_INFO(*g << "(LoweringGraph)\n");

// segment the graph and convert segmented TensorRT block
auto segmented_blocks = partitioning::Partition(g, convert_cfg.input_ranges, cfg.partition_info);
auto segmented_blocks = partitioning::Partition(g, convert_cfg.inputs, cfg.partition_info);
if (segmented_blocks.size() == 1 && segmented_blocks[0].target() == partitioning::SegmentedBlock::kTorch) {
LOG_WARNING("Didn't generate any TensorRT engines, the compiler did nothing\n");
return mod;
Expand All @@ -210,16 +210,16 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
for (auto& seg_block : segmented_blocks) {
std::string cur_block_target =
seg_block.target() == partitioning::SegmentedBlock::kTensorRT ? "TensorRT" : "Torch";
LOG_INFO(*seg_block.g() << "(MiniGraphIn" << cur_block_target << "Block)\n");
LOG_INFO(*seg_block.g() << "(Sub Graph" << cur_block_target << "Block)\n");
std::ostringstream trt_engine_id;
trt_engine_id << reinterpret_cast<const int*>(&seg_block);
if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
std::vector<ir::InputRange> input_ranges;
std::vector<ir::Input> inputs;
for (auto& shape : seg_block.in_shape()) {
input_ranges.push_back(ir::InputRange(shape));
inputs.push_back(ir::Input(shape));
}
// update the input ranges for each segments
convert_cfg.input_ranges = input_ranges;
convert_cfg.inputs = inputs;
auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
auto temp_g = std::make_shared<torch::jit::Graph>();
auto device_spec = convert_cfg.engine_settings.device;
Expand Down
2 changes: 1 addition & 1 deletion core/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace trtorch {
namespace core {

struct CompileSpec {
CompileSpec(std::vector<ir::InputRange> input_ranges) : convert_info(std::move(input_ranges)) {}
CompileSpec(std::vector<ir::Input> inputs) : convert_info(std::move(inputs)) {}
conversion::ConversionInfo convert_info;
partitioning::PartitionInfo partition_info;
};
Expand Down
63 changes: 43 additions & 20 deletions core/conversion/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,7 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
<< "please report this error to https://www.github.com/NVIDIA/TRTorch/issues");
}

void AddInputs(
ConversionCtx* ctx,
at::ArrayRef<const torch::jit::Value*> inputs,
std::vector<ir::InputRange>& input_dims) {
void AddInputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> inputs, std::vector<ir::Input>& input_specs) {
std::vector<const torch::jit::Value*> input_tensors;
for (auto in : inputs) {
// Disregarding inputs that are not tensors
Expand All @@ -142,29 +139,40 @@ void AddInputs(
}
}

std::stringstream ss;
ss << "Input Dimension Specs: [\n";
for (auto i : input_specs) {
ss << " " << i << ",";
}
ss << ']';
LOG_DEBUG(ctx->logger, ss.str());

TRTORCH_CHECK(
input_tensors.size() == input_dims.size(),
input_tensors.size() == input_specs.size(),
"Expected dimension specifications for all input tensors"
<< ", but found " << input_tensors.size() << " input tensors and " << input_dims.size()
<< ", but found " << input_tensors.size() << " input tensors and " << input_specs.size()
<< " dimension specs (conversion.AddInputs)");

auto profile = ctx->builder->createOptimizationProfile();

for (size_t i = 0; i < input_tensors.size(); i++) {
auto in = input_tensors[i];
auto dims = input_dims[i];
auto spec = input_specs[i];
std::string name = std::string("input_") + std::to_string(ctx->num_inputs);
LOG_INFO(
ctx->logger, "Adding Input " << in->debugName() << " named " << name << " in engine (conversion.AddInputs)");
LOG_DEBUG(ctx->logger, "Input shape set to " << dims.input_shape);
auto trt_in = ctx->net->addInput(name.c_str(), ctx->input_type, dims.input_shape);
ctx->logger,
"Adding Input " << in->debugName() << " (named: " << name << "): " << spec
<< " in engine (conversion.AddInputs)");

auto trt_in = ctx->net->addInput(name.c_str(), spec.dtype, spec.input_shape);
TRTORCH_CHECK(trt_in, "Failed to add input node: " << in->debugName() << " (conversion.AddInputs)");
trt_in->setAllowedFormats(1U << static_cast<int>(spec.format));

profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMIN, dims.min);
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, dims.opt);
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMAX, dims.max);
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMIN, spec.min);
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, spec.opt);
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMAX, spec.max);

if (dims.input_is_dynamic) {
if (spec.input_is_dynamic) {
ctx->input_is_dynamic = true;
}

Expand All @@ -178,7 +186,7 @@ void AddInputs(

ctx->cfg->addOptimizationProfile(profile);
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
if (ctx->op_precision == nvinfer1::DataType::kINT8) {
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
ctx->cfg->setCalibrationProfile(profile);
}
#endif
Expand Down Expand Up @@ -350,7 +358,7 @@ void ConvertBlockToNetDef(

auto inputs = b->inputs();
AddParamsToCtxValueMap(ctx, static_params);
AddInputs(ctx, inputs, build_info.input_ranges);
AddInputs(ctx, inputs, build_info.inputs);

auto nodes = b->nodes();

Expand Down Expand Up @@ -428,8 +436,8 @@ std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo buil
return engine;
}

std::set<std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
std::set<std::string> unsupported_ops;
std::unordered_map<c10::OperatorName, std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
std::unordered_map<c10::OperatorName, std::string> unsupported_ops;
for (const auto n : b->nodes()) {
if (n->kind() != torch::jit::prim::Loop && n->kind() != torch::jit::prim::If && !OpSupported(n)) {
auto schema = n->maybeSchema();
Expand All @@ -438,7 +446,7 @@ std::set<std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
"Unable to get schema for Node " << util::node_info(n) << " (conversion.VerifyCoverterSupportForBlock)");
std::stringstream ss;
ss << *schema;
unsupported_ops.insert(ss.str());
unsupported_ops[schema->operator_name()] = ss.str();
}
for (const auto sub_b : n->blocks()) {
auto sub_b_unsupported_ops = GetUnsupportedOpsInBlock(sub_b);
Expand Down Expand Up @@ -480,12 +488,27 @@ bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
unsupported_msg << "Method requested cannot be compiled by TRTorch.\nUnsupported operators listed below:"
<< std::endl;
for (auto s : unsupported_ops) {
unsupported_msg << " - " << s << std::endl;
unsupported_msg << " - " << s.second << std::endl;
}
unsupported_msg << "You can either implement converters for these ops in your application or request implementation"
<< std::endl;
unsupported_msg << "https://www.github.com/nvidia/TRTorch/issues" << std::endl;
unsupported_msg << std::endl << "In Module:" << std::endl;

LOG_ERROR(unsupported_msg.str());

for (const auto n : b->nodes()) {
auto schema = n->maybeSchema();
if (schema) {
for (const auto& x : unsupported_ops) {
if (x.first == schema->operator_name()) {
LOG_ERROR(
"Unsupported operator: " << *schema << std::endl
<< trtorch::core::util::GetPyTorchSourceCode(n) << std::endl);
}
}
}
}
return false;
}

Expand Down
5 changes: 2 additions & 3 deletions core/conversion/conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ namespace core {
namespace conversion {

struct ConversionInfo {
std::vector<ir::InputRange> input_ranges;
std::vector<ir::Input> inputs;
BuilderSettings engine_settings;
ConversionInfo(std::vector<ir::InputRange> input_ranges)
: input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
ConversionInfo(std::vector<ir::Input> inputs) : inputs(std::move(inputs)), engine_settings(BuilderSettings()) {}
};

// TODO: REMOVE GRAPH AND PARAMS AND MOVE FULLY TO INLINED CONSTANTS
Expand Down
63 changes: 37 additions & 26 deletions core/conversion/conversionctx/ConversionCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ namespace conversion {
// clang-format off
std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
os << "Settings requested for TensorRT engine:" \
<< "\n Operating Precision: " << s.op_precision \
<< "\n TF32 Floating Point Computation Enabled: " << !s.disable_tf32 \
<< "\n Enabled Precisions: ";
for (auto p = s.enabled_precisions.begin(); p != s.enabled_precisions.end(); ++p) {
os << *p << ' ';
}
os << "\n TF32 Floating Point Computation Enabled: " << !s.disable_tf32 \
<< "\n Truncate Long and Double: " << s.truncate_long_and_double \
<< "\n Make Refittable Engine: " << s.refit \
<< "\n Debuggable Engine: " << s.debug \
Expand Down Expand Up @@ -57,36 +60,42 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
LOG_DEBUG(build_settings);
cfg = builder->createBuilderConfig();

switch (settings.op_precision) {
case nvinfer1::DataType::kHALF:
TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
input_type = nvinfer1::DataType::kHALF;
break;
case nvinfer1::DataType::kINT8:
TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
if (!settings.strict_types) {
for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) {
switch (*p) {
case nvinfer1::DataType::kHALF:
TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
}
input_type = nvinfer1::DataType::kFLOAT;
// Networks trained with Quantization aware training approach don't need a calibrator as they have Q/DQ nodes.
if (!settings.calibrator) {
LOG_WARNING(
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
}
break;
case nvinfer1::DataType::kFLOAT:
default:
input_type = nvinfer1::DataType::kFLOAT;
break;
break;
case nvinfer1::DataType::kINT8:
TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
if (!settings.calibrator) {
LOG_WARNING(
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
} else{
cfg->setInt8Calibrator(settings.calibrator);
}
break;
case nvinfer1::DataType::kFLOAT:
break;
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kBOOL:
default:
TRTORCH_THROW_ERROR(
"Requested kernel precision that is unsupported: " << *p << " options are float, half, int8");
}
}
op_precision = settings.op_precision;

enabled_precisions = settings.enabled_precisions;

if (settings.disable_tf32) {
cfg->clearFlag(nvinfer1::BuilderFlag::kTF32);
}

if (settings.sparse_weights) {
cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);
}

if (settings.refit) {
cfg->setFlag(nvinfer1::BuilderFlag::kREFIT);
}
Expand Down Expand Up @@ -119,7 +128,9 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
static_cast<int>(settings.device.dla_core) < nbDLACores,
"Configured DLA Core ID: " << settings.device.dla_core
<< " not available. Total number of available DLA Cores: " << nbDLACores);
TRTORCH_CHECK(settings.op_precision != nvinfer1::DataType::kFLOAT, "DLA supports only fp16 or int8 precision");
TRTORCH_CHECK(
settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
"DLA supports only fp16 or int8 precision");
cfg->setDLACore(settings.device.dla_core);
}
}
Expand Down
7 changes: 4 additions & 3 deletions core/conversion/conversionctx/ConversionCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <map>
#include <memory>
#include <set>
#include <unordered_map>

#include "NvInfer.h"
Expand All @@ -23,7 +24,8 @@ struct Device {
};

struct BuilderSettings {
nvinfer1::DataType op_precision = nvinfer1::DataType::kFLOAT;
bool sparse_weights = false;
std::set<nvinfer1::DataType> enabled_precisions = {nvinfer1::DataType::kFLOAT};
bool disable_tf32 = false;
bool refit = false;
bool debug = false;
Expand Down Expand Up @@ -57,8 +59,7 @@ struct ConversionCtx {
nvinfer1::IBuilder* builder;
nvinfer1::INetworkDefinition* net;
nvinfer1::IBuilderConfig* cfg;
nvinfer1::DataType input_type;
nvinfer1::DataType op_precision;
std::set<nvinfer1::DataType> enabled_precisions;
BuilderSettings settings;
util::logging::TRTorchLogger logger;
// Pointers to data that needs to remain alive until conversion is done
Expand Down
4 changes: 3 additions & 1 deletion core/conversion/converters/impl/activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ auto acthardtanh TRTORCH_UNUSED =
std::string pluginName = "CustomGeluPluginDynamic";
nvinfer1::PluginFieldCollection fc;
std::vector<nvinfer1::PluginField> f;
int type_id = ctx->settings.op_precision == nvinfer1::DataType::kFLOAT
// REVIEW is this right?
int type_id = ctx->settings.enabled_precisions.find(nvinfer1::DataType::kHALF) ==
ctx->settings.enabled_precisions.end()
? 0
: 1; // Integer encoding the DataType (0: FP32, 1: FP16)
f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));
Expand Down
1 change: 1 addition & 0 deletions core/conversion/converters/impl/batch_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ auto batch_norm_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns().
LOG_DEBUG("momentum disregarded");
LOG_DEBUG("training disregarded");
LOG_DEBUG("cudnn disregarded");
TRTORCH_CHECK(orig_shape.nbDims > 2, "Unable to create batch normalization layer from node: " << *n);

// Expand spatial dims from 1D to 2D if needed
bool expandDims = (orig_shape.nbDims < 4);
Expand Down
3 changes: 2 additions & 1 deletion core/conversion/converters/impl/conv_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
LOG_DEBUG("out_padding: " << out_padding);
LOG_DEBUG("groups: " << groups);

// Expand spatial dims from 1D to 2D if needed
TRTORCH_CHECK(orig_dims.nbDims > 2, "Unable to create convolution layer from node: " << *n);

bool expandDims = (orig_dims.nbDims < 4);
if (expandDims) {
in = addPadding(ctx, n, in, 4);
Expand Down
Loading

0 comments on commit 715120f

Please sign in to comment.