Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Runtime] add set_output_zero_copy #8497

Merged
merged 72 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from 65 commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
629ec50
optimize resize vertor
sunjiweiswift Jun 25, 2021
5005373
tmp
sunjiweiswift Jun 25, 2021
a1b749f
DoMultiLevelTiling
sunjiweiswift Jun 26, 2021
f1fc313
modify size_t to int
sunjiweiswift Jun 26, 2021
65a7a00
modify
sunjiweiswift Jun 26, 2021
2368df9
modify level fill
sunjiweiswift Jun 26, 2021
e8ba850
Update utils.cc
sunjiweiswift Jun 26, 2021
a832739
format lower count
sunjiweiswift Jun 26, 2021
258f382
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Jun 26, 2021
2de6c99
delete blank lines
sunjiweiswift Jun 26, 2021
cb99388
delete blank lines
sunjiweiswift Jun 26, 2021
ece0e1d
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Jun 26, 2021
9da6fa3
re-commit message
sunjiweiswift Jun 27, 2021
718e58b
Merge pull request #1 from apache/main
sunjiweiswift Jul 15, 2021
7377e43
Update graph_executor.h
sunjiweiswift Jul 15, 2021
8853436
Merge pull request #2 from apache/main
sunjiweiswift Jul 19, 2021
4a007ab
add setoutputzero
sunjiweiswift Jul 19, 2021
8ca606f
add set output zero
sunjiweiswift Jul 19, 2021
6afb609
Update graph_executor.cc
sunjiweiswift Jul 19, 2021
d71dece
Update graph_executor.h
sunjiweiswift Jul 19, 2021
145219c
delete const_cast
sunjiweiswift Jul 20, 2021
e45c77b
add common function chechDltensor
sunjiweiswift Jul 20, 2021
b7a27c5
Update graph_executor.h
sunjiweiswift Jul 20, 2021
bf6ed08
Update graph_executor.cc
sunjiweiswift Jul 20, 2021
80fc91f
add output_ sort
sunjiweiswift Jul 20, 2021
ab5f957
Update graph_executor.cc
sunjiweiswift Jul 20, 2021
07e80ad
add a.nodeid == b.nodeid
sunjiweiswift Jul 20, 2021
e67b839
add unit test for set output zero
sunjiweiswift Jul 21, 2021
052fa56
add include <algorithm>
sunjiweiswift Jul 22, 2021
847634e
modify Setoutput zero copy
sunjiweiswift Jul 22, 2021
b2d9471
modify by clang-format
sunjiweiswift Jul 22, 2021
5d0461a
add unit test for set output zero
sunjiweiswift Jul 22, 2021
4ebf2bd
rrealy ut go back
sunjiweiswift Jul 22, 2021
c221b51
rrealy ut go back
sunjiweiswift Jul 22, 2021
92294d3
modify input->output
sunjiweiswift Jul 22, 2021
dd54915
delete sort output input
sunjiweiswift Jul 23, 2021
66ef5fe
modify build_module_test.cc
sunjiweiswift Jul 23, 2021
7918c7b
re-pr
sunjiweiswift Jul 24, 2021
c7e00cb
empty commit
sunjiweiswift Jul 24, 2021
2558aee
empty commit
sunjiweiswift Jul 25, 2021
bf85d3e
empty commit
sunjiweiswift Jul 25, 2021
df24fc3
modify input to ouput
sunjiweiswift Jul 28, 2021
c1bf14c
modify zero ouput copy disorder issus
sunjiweiswift Jul 29, 2021
c666527
Merge remote-tracking branch 'upstream/main'
sunjiweiswift Aug 2, 2021
85b4fc3
Merge remote-tracking branch 'upstream/main'
sunjiweiswift Aug 3, 2021
81143b9
modify nid->eid to record output, add var to record the dltensor both…
sunjiweiswift Aug 3, 2021
6f7b068
character too long >= 100
sunjiweiswift Aug 3, 2021
0d25674
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
6fc5047
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
969c80f
modify zero copy UT add set input zero copy
sunjiweiswift Aug 3, 2021
889106d
Merge branch 'main' of https://github.com/sunjiweiswift/tvm
sunjiweiswift Aug 3, 2021
5f858cc
empty commit
sunjiweiswift Aug 3, 2021
1762cb5
trigger CI
sunjiweiswift Aug 4, 2021
0575cb8
Merge pull request #4 from apache/main
sunjiweiswift Aug 4, 2021
2640e76
trigger CI
sunjiweiswift Aug 4, 2021
a10562f
trigger CI
sunjiweiswift Aug 4, 2021
07128aa
empty commit
sunjiweiswift Aug 20, 2021
c0e89f5
empty commit
sunjiweiswift Aug 20, 2021
3e46c0e
trigger CI
Aug 21, 2021
6b3a126
trigger CI
sunjiweiswift Aug 21, 2021
37b69b1
trigger CI
sunjiweiswift Aug 21, 2021
d66c4e1
Merge pull request #5 from apache/main
sunjiweiswift Aug 21, 2021
e622619
trigger CI
sunjiweiswift Aug 21, 2021
8f9287f
trigger CI
sunjiweiswift Aug 21, 2021
1644d91
resolve conflicts
sunjiweiswift Aug 24, 2021
1c4f9e3
Merge pull request #6 from apache/main
sunjiweiswift Aug 25, 2021
13a1355
modify C style
sunjiweiswift Aug 25, 2021
cb09eab
add runtime test
sunjiweiswift Aug 25, 2021
3205590
add runtime test
sunjiweiswift Aug 25, 2021
aab0ef7
add runtime test
sunjiweiswift Aug 25, 2021
8c0dfb6
realy build generatr the json
sunjiweiswift Aug 26, 2021
2603263
realy build generatr the json
sunjiweiswift Aug 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 92 additions & 14 deletions src/runtime/graph_executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module mod
std::string& name = nodes_[nid].name;
input_map_[name] = i;
}
for (size_t i = 0; i < outputs_.size(); i++) {
const uint32_t nid = outputs_[i].node_id;
std::string& name = nodes_[nid].name;
output_map_[name] = i;
}
}
/*!
* \brief Get the input index given the name of input.
Expand All @@ -104,6 +109,18 @@ int GraphExecutor::GetInputIndex(const std::string& name) {
}
return -1;
}
/*!
* \brief Get the output index given the name of output.
* \param name The name of the output.
* \return The index of output.
*/
int GraphExecutor::GetOutputIndex(const std::string& name) {
auto it = output_map_.find(name);
if (it != output_map_.end()) {
return it->second;
}
return -1;
}
/*!
* \brief set index-th input to the graph.
* \param index The input index.
Expand All @@ -114,6 +131,23 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) {
uint32_t eid = this->entry_id(input_nodes_[index], 0);
data_entry_[eid].CopyFrom(data_in);
}
/*!
* \brief Check the legality of external DLTensor*.
* \param external The external DLTensor*.
* \param eid The data_enrty_ index.
*/
void GraphExecutor::CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const {
const DLTensor* internal = data_entry_[eid].operator->();

ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*external));
ICHECK_EQ(reinterpret_cast<size_t>(external->data) % kAllocAlignment, 0);
ICHECK_EQ(internal->ndim, static_cast<size_t>(external->ndim));
ICHECK_EQ(internal->device.device_type, external->device.device_type);
ICHECK_EQ(internal->device.device_id, external->device.device_id);
for (auto i = 0; i < external->ndim; ++i) {
ICHECK_EQ(internal->shape[i], external->shape[i]);
}
}
/*!
* \brief set index-th input to the graph without copying the data.
* \param index The input index.
Expand All @@ -122,23 +156,37 @@ void GraphExecutor::SetInput(int index, DLTensor* data_in) {
void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) {
ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
uint32_t eid = this->entry_id(input_nodes_[index], 0);
const DLTensor* old_t = data_entry_[eid].operator->();

// check the consistency of input
ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
for (auto i = 0; i < data_ref->ndim; ++i) {
ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
}

CheckExternalDLTensor(data_ref, eid);
// Update the data pointer for each argument of each op
for (DLTensor* t : input_dltensors_[eid]) {
t->data = data_ref->data;
}
}
/*!
* \brief set index-th output to the graph without copying the data.
* \param index The output index.
* \param data_ref The output data that is referred.
*/
void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
ICHECK_LT(static_cast<size_t>(index), outputs_.size());
ICHECK_LT(static_cast<size_t>(index), output_dltensors_.size());
const NodeEntry& output_node = outputs_[index];
uint32_t output_node_eid = this->entry_id(output_node);

// check the consistency of output
CheckExternalDLTensor(data_ref, output_node_eid);

// Update the data pointer for output op
for (DLTensor* t : output_dltensors_[output_node_eid]) {
t->data = data_ref->data;
}

// Update the input of the op connected to the output
for (DLTensor* t : both_output_opinput_dltensors_[output_node_eid]) {
t->data = data_ref->data;
}
}
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -358,11 +406,17 @@ void GraphExecutor::SetupStorage() {
void GraphExecutor::SetupOpExecs() {
op_execs_.resize(this->GetNumOfNodes());
input_dltensors_.resize(num_node_entries());
output_dltensors_.resize(num_node_entries());
both_output_opinput_dltensors_.resize(num_node_entries());
std::unordered_set<uint32_t> input_node_eids;
for (size_t i = 0; i < input_nodes_.size(); i++) {
uint32_t nid = input_nodes_[i];
input_node_eids.insert(entry_id(nid, 0));
}
std::unordered_set<uint32_t> output_node_eids;
for (size_t i = 0; i < outputs_.size(); i++) {
output_node_eids.insert(entry_id(outputs_[i]));
}

// setup the array and requirements.
for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) {
Expand All @@ -383,10 +437,25 @@ void GraphExecutor::SetupOpExecs() {
std::tie(op_execs_[nid], op_args) = CreateTVMOp(inode.param, args);

for (size_t i = 0; i < inode.inputs.size(); i++) {
uint32_t eid = this->entry_id(inode.inputs[i]);
uint32_t input_eid = this->entry_id(inode.inputs[i]);
// check if op input is model input
if (input_node_eids.count(eid) > 0) {
input_dltensors_[eid].push_back(static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
if (input_node_eids.count(input_eid) > 0) {
input_dltensors_[input_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
// check if any model output is the input of the op
if (output_node_eids.count(input_eid) > 0) {
both_output_opinput_dltensors_[input_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}

for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + inode.param.num_outputs; ++i) {
uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size());
// check if op output is model output
if (output_node_eids.count(output_eid) > 0) {
output_dltensors_[output_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}
}
Expand Down Expand Up @@ -462,6 +531,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
} else if (name == "set_output_zero_copy") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (String::CanConvertFrom(args[0])) {
int out_idx = this->GetOutputIndex(args[0].operator String());
if (out_idx >= 0) this->SetOutputZeroCopy(out_idx, args[1]);
} else {
this->SetOutputZeroCopy(args[0], args[1]);
}
});
} else if (name == "get_output") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (args.num_args == 2) {
Expand Down
28 changes: 28 additions & 0 deletions src/runtime/graph_executor/graph_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ class TVM_DLL GraphExecutor : public ModuleNode {
*/
int GetInputIndex(const std::string& name);

/*!
* \brief Get the output index given the name of output.
* \param name The name of the output.
* \return The index of output.
*/
int GetOutputIndex(const std::string& name);

/*!
* \brief set index-th input to the graph.
* \param index The input index.
Expand All @@ -119,6 +126,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
* \param data_ref The input data that is referred.
*/
void SetInputZeroCopy(int index, DLTensor* data_ref);
/*!
* \brief set index-th output to the graph without copying the data.
* \param index The output index.
* \param data_ref The output data that is referred.
*/
void SetOutputZeroCopy(int index, DLTensor* data_ref);
/*!
* \brief Get the number of outputs
*
Expand Down Expand Up @@ -193,6 +206,9 @@ class TVM_DLL GraphExecutor : public ModuleNode {
uint32_t node_id;
uint32_t index;
uint32_t version;
inline bool operator==(const NodeEntry& other) const {
return node_id == other.node_id && index == other.index && version == other.version;
}
// JSON Loader
void Load(dmlc::JSONReader* reader) {
reader->BeginArray();
Expand Down Expand Up @@ -377,6 +393,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
void SetupStorage();
/*! \brief Setup the executors. */
void SetupOpExecs();
/*!
* \brief Check the legality of external DLTensor*.
* \param external The external DLTensor*.
* \param eid The data_enrty_ index.
*/
void CheckExternalDLTensor(const DLTensor* external, uint32_t eid) const;
/*!
* \brief Create an execution function given input.
* \param attrs The node attributes.
Expand All @@ -397,8 +419,14 @@ class TVM_DLL GraphExecutor : public ModuleNode {
std::vector<uint32_t> input_nodes_;
/*! \brief Map of input names to input indices. */
std::unordered_map<std::string, uint32_t> input_map_;
/*! \brief Map of output names to output indices. */
std::unordered_map<std::string, uint32_t> output_map_;
/*! \brief Used for quick node input DLTensor* lookup given an input eid. */
std::vector<std::vector<DLTensor*>> input_dltensors_;
/*! \brief Used for quick node output DLTensor* lookup given an output eid. */
std::vector<std::vector<DLTensor*>> output_dltensors_;
/*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an eid. */
std::vector<std::vector<DLTensor*>> both_output_opinput_dltensors_;
/*! \brief Used for quick entry indexing. */
std::vector<uint32_t> node_row_ptr_;
/*! \brief Output entries. */
Expand Down
126 changes: 126 additions & 0 deletions tests/cpp/build_module_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,129 @@ TEST(BuildModule, Heterogeneous) {
ICHECK_LT(std::fabs(p_out[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5);
}
}

TEST(BuildModule, ZeroCopy) {
/*
*
* A B
* \ /
* elemwise_add(out0)
* \
* C copy
* \ /
* elemwise_sub(out1)
*/

using namespace tvm;
using namespace tvm::te;

auto target_llvm = Target("llvm");

// The shape of input tensors.
const int n = 4;
Array<PrimExpr> shape{n};

auto A = placeholder(shape, DataType::Float(32), "A");
auto B = placeholder(shape, DataType::Float(32), "B");
auto C = placeholder(shape, DataType::Float(32), "C");

auto elemwise_add = compute(
A->shape, [&A, &B](PrimExpr i) { return A[i] + B[i]; }, "elemwise_add");

auto copy = placeholder(shape, DataType::Float(32), "__copy");
auto elemwise_sub = compute(
C->shape, [&copy, &C](PrimExpr i) { return copy[i] - C[i]; }, "elemwise_sub");

With<Target> llvm_scope(target_llvm);
auto s1 = create_schedule({elemwise_add->op});
auto s2 = create_schedule({elemwise_sub->op});

auto args1 = Array<Tensor>({A, B, elemwise_add});
auto args2 = Array<Tensor>({copy, C, elemwise_sub});

std::unordered_map<Tensor, Buffer> binds;
auto lowered_s1 = LowerSchedule(s1, args1, "elemwise_add", binds);
auto lowered_s2 = LowerSchedule(s2, args2, "elemwise_sub", binds);
Map<tvm::Target, IRModule> inputs = {{target_llvm, lowered_s1}, {target_llvm, lowered_s2}};
auto module = build(inputs, Target());

// Execute the graph and check the correctness.
// Setup graph json.
std::string json =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really don't like testing in this way. Hard-coded the expected output (e.g., assembly, JSON, etc) may make future maintenance difficult. IMHO, it should be sufficient to just build two modules and set one of them to zero copy, so that the only difference between these two modules should just be the execution latency, and their outputs should be the same.

Also, it would be good to also have a Python test so that we could also demonstrate how this could be used in Python; otherwise no one will know this feature at all as there's no documentation neither.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

graph json is not the expected output. It is used to construct the graph executor as the serialized input graph

I think set_output_zero only is only used when calling libtvm_runtime.so(tvm graph executor) in other frameworks and allocates input and output memory in advance. Not a python api

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I see. In this case it would be better to go through the Relay build process that also generates the JSON.

Why not set_output_zero can be a Python API even it is only used when calling libtvm_runtime.so for now? Since it has the corresponding process in GraphExecutor::GetFunction, I suppose it's doable to support a Python interface.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed json generate by relay build

Python has no pointer function. Generally, there is no such thing as zero copy in python. At the same time, set input zero copy is not python API is also the reason I think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't understand why there is no Python API. It is totally reasonable to pass pre-allocated ndarray storage for get_output.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I will submit another python API PR later

"{\"nodes\": [{\"op\": \"null\", \"name\": \"A\", \"inputs\": []}, "
"{\"op\": \"null\", \"name\": \"B\", \"inputs\": []}, {\"op\": "
"\"tvm_op\", \"name\": \"elemwise_add\", \"attrs\": {\"flatten_data\": "
"\"1\", \"func_name\": \"elemwise_add\", \"num_inputs\": \"2\", "
"\"num_outputs\": \"1\"}, \"inputs\": [[0, 0, 0], [1, 0, 0]]}, {\"op\": "
"\"tvm_op\", \"name\": \"__copy_add_to_sub\", \"attrs\": "
"{\"flatten_data\": \"0\", \"func_name\": \"__copy\", \"num_inputs\": "
"\"1\", \"num_outputs\": \"1\"}, \"inputs\": [[2, 0, 0]]}, {\"op\": "
"\"null\", \"name\": \"C\", \"inputs\": []}, {\"op\": \"tvm_op\", "
"\"name\": \"elemwise_sub\", \"attrs\": {\"flatten_data\": \"0\", "
"\"func_name\": \"elemwise_sub\", \"num_inputs\": \"2\", "
"\"num_outputs\": \"1\"}, \"inputs\": [[3, 0, 0], [4, 0, 0]]}], "
"\"arg_nodes\": [0, 1, 4], \"node_row_ptr\": [0, 1, 2, 3, 4, 5, 6], "
"\"heads\": [[2, 0, 0], [5, 0, 0]], \"attrs\": {\"storage_id\": [\"list_int\", "
"[3, 4, 0, 1, 5, 2]], \"shape\": [\"list_shape\", [[4], [4], [4], [4], [4], "
"[4]]], \"device_index\": [\"list_int\", [2, 2, 2, 1, 1, 1]], \"dtype\": "
"[\"list_int\", [0, 0, 0, 0, 0, 0]], \"dltype\": [\"list_str\", "
"[\"float32\", \"float32\", \"float32\", \"float32\", \"float32\", "
"\"float32\"]]}}";
// Setup inputs.
auto a_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto b_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto c_val = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0});

auto pa = (float*)(a_val->data);
auto pb = (float*)(b_val->data);
auto pc = (float*)(c_val->data);

// Assign values.
for (int i = 0; i < n; i++) {
pa[i] = i;
pb[i] = i + 1.0;
pc[i] = i - 1.0;
}

// // Initialize graph executor.
int device_type = static_cast<int>(kDLCPU);
int device_id = 0;

const runtime::PackedFunc* graph_executor =
tvm::runtime::Registry::Get("tvm.graph_executor.create");
runtime::Module mod = (*graph_executor)(json, module, device_type, device_id);

// test FFI for module.
auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) {
int tcode = args[1];
ICHECK_EQ(args[0].type_code(), tcode);
});

test_ffi(runtime::Module(mod), static_cast<int>(kTVMModuleHandle));
test_ffi(Optional<runtime::Module>(mod), static_cast<int>(kTVMModuleHandle));

PackedFunc set_input_zero_copy = mod.GetFunction("set_input_zero_copy", false);
PackedFunc set_output_zero_copy = mod.GetFunction("set_output_zero_copy", false);
PackedFunc run = mod.GetFunction("run", false);
set_input_zero_copy("A", a_val);
set_input_zero_copy("B", b_val);
set_input_zero_copy("C", c_val);

tvm::runtime::NDArray out0 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0});
tvm::runtime::NDArray out1 = runtime::NDArray::Empty({n}, {kDLFloat, 32, 1}, {kDLCPU, 0});
set_output_zero_copy("elemwise_add", out0);
set_output_zero_copy("elemwise_sub", out1);

run();
float* p_out0 = (float*)out0->data;
float* p_out1 = (float*)out1->data;

// Check correctness.
for (int i = 0; i < n; ++i) {
ICHECK_LT(std::fabs(p_out0[i] - (i + (i + 1.0))), 1e-5);
}

for (int i = 0; i < n; ++i) {
ICHECK_LT(std::fabs(p_out1[i] - (i + (i + 1.0) - (i - 1.0))), 1e-5);
}
sunjiweiswift marked this conversation as resolved.
Show resolved Hide resolved
}