Skip to content

Commit

Permalink
feat(//cpp/api): Functional Dataloader based PTQ
Browse files Browse the repository at this point in the history
- Couple assorted fixes in conversion implementation
- Set up the space to have phase specific settings inside the compiler
- PTQ Calibrator implementation moved to the public API, means Python
will need its own but it probably did anyway
- PTQ now works with dataloader and all the overrides for Calibration
algorithm work
- CIFAR10 Dataloader implementation
- Application still has bugs in reporting accuracy and reading from
calibration cache

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
  • Loading branch information
narendasan committed Apr 22, 2020
1 parent 676bf56 commit f022dfe
Show file tree
Hide file tree
Showing 28 changed files with 758 additions and 261 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ py/.eggs
._DS_Store
*.pth
*.pyc
cpp/ptq/training/vgg16/data/
cpp/ptq/training/vgg16/data/*
*.bin
cpp/ptq/datasets/data/
._.DS_Store
*.tar.gz
2 changes: 1 addition & 1 deletion core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cc_library(
"@libtorch//:libtorch",
"@tensorrt//:nvinfer"
],
alwayslink=True,
alwayslink=True,
)


Expand Down
40 changes: 21 additions & 19 deletions core/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,24 @@
namespace trtorch {
namespace core {

c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::string method_name, std::shared_ptr<torch::jit::Graph>& g) {
c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::string method_name, std::shared_ptr<torch::jit::Graph>& g) {

std::vector<c10::Argument> args;
for (auto in : g->inputs()) {
args.push_back(c10::Argument(in->debugName(), in->type()));
}

std::vector<c10::Argument> returns;
for (auto out : g->outputs()) {
returns.push_back(c10::Argument(out->debugName(), out->type()));
}

return c10::FunctionSchema(method_name, method_name, args, returns);
}


void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
auto schema = execution::GetEngineFunctionSchema(uid);
auto num_io = execution::GetEngineIO(uid);

Expand All @@ -53,14 +53,14 @@ void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit
in_val->setType(c10::TensorType::get());
graph_inputs.push_back(in_val);
}

auto engine_node = g->create(c10::Symbol::fromQualString(schema.name()), torch::jit::ArrayRef<torch::jit::Value*>(graph_inputs), num_io.second);
g->block()->appendNode(engine_node);

for (auto o : engine_node->outputs()) {
g->registerOutput(o);
}

return;
}

Expand All @@ -69,48 +69,50 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod,
auto g = mod.get_method(method_name).graph();
// Go through PyTorch Lowering to simplify graph and extract weight parameters
auto graph_and_parameters = torch::jit::LowerGraph(*g, mod._ivalue());

g = graph_and_parameters.first;

// Go through TRTorch Lowering to reformat graph to be conversion friendly
// and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
lowering::LowerGraph(g);

auto params = graph_and_parameters.second;
auto named_params = conversion::get_named_params(g->inputs(), params);
LOG_DEBUG(*g << "(CheckMethodOperatorSupport)\n");

// Is this necessary?
lowering::LowerBlock(g->block());

return conversion::VerifyConverterSupportForBlock(g->block());
}

std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
std::string method_name,
conversion::ExtraInfo cfg) {
ExtraInfo cfg) {
auto convert_cfg = std::move(cfg.convert_info);

auto g = mod.get_method(method_name).graph();
// Go through PyTorch Lowering to simplify graph and extract weight parameters
auto graph_and_parameters = torch::jit::LowerGraph(*g, mod._ivalue());

g = graph_and_parameters.first;

// Go through TRTorch Lowering to reformat graph to be conversion friendly
// and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
lowering::LowerGraph(g);

auto params = graph_and_parameters.second;
auto named_params = conversion::get_named_params(g->inputs(), params);
LOG_INFO(*g << "(CompileGraph)\n");

// Is this necessary?
lowering::LowerBlock(g->block());
auto engine = ConvertBlockToEngine(g->block(), cfg, named_params);
auto engine = ConvertBlockToEngine(g->block(), convert_cfg, named_params);
return std::move(engine);
}

torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
conversion::ExtraInfo cfg) {
ExtraInfo cfg) {
// TODO: Should be doing a functional transform but need PR #31978
// [jit] More robust mangling
// torch::jit::script::Module new_mod = mod.clone();
Expand All @@ -128,7 +130,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,

return new_mod;
}

} // namespace core
} // namespace trtorch

13 changes: 10 additions & 3 deletions core/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@

namespace trtorch {
namespace core {

struct ExtraInfo {
ExtraInfo(std::vector<conversion::InputRange> input_ranges)
: convert_info(std::move(input_ranges)) {}
conversion::ConversionInfo convert_info;
};

bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name);

std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod,
std::string method_name, conversion::ExtraInfo cfg);
std::string method_name, ExtraInfo cfg);

torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, conversion::ExtraInfo cfg);
torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, ExtraInfo cfg);

} // namespace core
} // namespace trtorch
} // namespace trtorch
4 changes: 2 additions & 2 deletions core/conversion/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ void AddParamsToCtxValueMap(ConversionCtx* ctx, GraphParams& params) {
}
}

void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params) {
void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params) {
LOG_INFO(ctx->logger, "Converting Block");

auto inputs = b->inputs();
Expand Down Expand Up @@ -221,7 +221,7 @@ void ConvertBlockToNetDef(ConversionCtx* ctx, const torch::jit::Block* b, ExtraI
// a serialized TensorRT engine that can be deserialized and run

// Probably should consolidate these two functions
std::string ConvertBlockToEngine(const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params) {
std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params) {
ConversionCtx ctx(build_info.engine_settings);
ConvertBlockToNetDef(&ctx, b, build_info, static_params);
std::string engine = ctx.SerializeEngine();
Expand Down
6 changes: 3 additions & 3 deletions core/conversion/conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ struct InputRange {
std::vector<int64_t> max_shape);
};

struct ExtraInfo {
struct ConversionInfo {
std::vector<InputRange> input_ranges;
BuilderSettings engine_settings;
ExtraInfo(std::vector<InputRange> input_ranges)
ConversionInfo(std::vector<InputRange> input_ranges)
: input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
};

Expand All @@ -43,7 +43,7 @@ GraphParams get_named_params(c10::ArrayRef<torch::jit::Value*> inputs, std::vect

// Converts a already lowered block (blocks with no sub blocks) to
// a serialized TensorRT engine that can be deserialized and run
std::string ConvertBlockToEngine(const torch::jit::Block* b, ExtraInfo build_info, GraphParams& static_params);
std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo build_info, GraphParams& static_params);

bool OpSupported(const torch::jit::Node* n);

Expand Down
2 changes: 1 addition & 1 deletion core/conversion/conversionctx/ConversionCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
<< "\n Max Workspace Size: " << s.workspace_size \
<< "\n Device Type: " << s.device \
<< "\n Engine Capability: " << s.capability \
<< "\n Calibrator Created: " << s.calibrator ? true : false;
<< "\n Calibrator Created: " << (s.calibrator != nullptr);
return os;
}

Expand Down
3 changes: 2 additions & 1 deletion core/conversion/converters/impl/batch_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ volatile auto batch_norm_registrations = RegisterNodeConversionPatterns()
auto gamma = args[1].unwrapToTensor();

if (/*training*/ args[5].unwrapToBool()) {
LOG_WARNING("TensorRT only converts forward pass of graphs, but saw training = True, may see undefined behavior, consider placing module in eval mode");
LOG_WARNING(R"WARN(TRTorch only converts forward pass of graphs, but saw training = True, may see
unexpected behavior, consider placing module in eval mode before exporting the TorchScript module)WARN");
}

// If gamma is None this fails
Expand Down
9 changes: 3 additions & 6 deletions core/conversion/converters/impl/pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,17 @@ auto pooling_registrations = RegisterNodeConversionPatterns()
for (size_t i = 0; i < out_shape.size(); i++) {
stride[(stride.size() - 1) - i] = in_shape[(in_shape.size() - 1) - i] / out_shape[(out_shape.size() - 1) - i];
}
LOG_DEBUG("Stride" << util::toDims(stride));
LOG_DEBUG("Stride: " << util::toDims(stride));

std::vector<int64_t> window(out_shape.size());
for (size_t i = 0; i < out_shape.size(); i++) {
window[window.size() - 1 - i] = in_shape[in_shape.size() - 1 - i] - (out_shape[out_shape.size() - 1 - i] - 1) * stride[stride.size() - 1 - i];
}

LOG_DEBUG("Window" << util::toDims(window));
LOG_DEBUG("Window: " << util::toDims(window));

auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kAVERAGE, util::toDims(window));
if (!new_layer) {
LOG_ERROR("Unable to create average pooling layer from node: " << *n);
return false;
}
TRTORCH_CHECK(new_layer, "Unable to create average pooling layer from node: " << *n);

new_layer->setStrideNd(util::toDims(stride));

Expand Down
Empty file removed core/quantization/BUILD
Empty file.
64 changes: 0 additions & 64 deletions core/quantization/TRTEntropyCalibrator.cpp

This file was deleted.

69 changes: 0 additions & 69 deletions core/quantization/quantization.h

This file was deleted.

Loading

0 comments on commit f022dfe

Please sign in to comment.