Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix NPUDeviceContext in all c++ unittest #32198

Merged
merged 2 commits into from
Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,12 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) {

TEST(check_finite_and_unscale, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx);
}

TEST(check_finite_and_unscale, NPU_fp16) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<p::float16>(&scope, ctx);
auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<p::float16>(&scope, *ctx);
}
15 changes: 5 additions & 10 deletions paddle/fluid/operators/assign_op_npu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();

auto op = f::OpRegistry::CreateOp(op_type,
{{"X", {"X"}}},
{{"Out", {"Out"}}},
{});
auto op =
f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}}, {{"Out", {"Out"}}}, {});

op->Run(*scope, place);

Expand All @@ -75,11 +73,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
EXPECT_EQ(out_vec[3], static_cast<T>(4.0));
}


TEST(assign, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx, "assign");
f::Scope scope;
auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0));
Compare<float>(&scope, *ctx, "assign");
}


56 changes: 27 additions & 29 deletions paddle/fluid/operators/collective/c_allgather_op_npu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h>
#endif

#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>

#include "gtest/gtest.h"

#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"

#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"

#if defined(PADDLE_WITH_ASCEND_CL)
Expand All @@ -50,25 +50,23 @@ USE_OP_DEVICE_KERNEL(c_allgather, NPU);

DECLARE_string(selected_npus);

template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}

void Prepare(f::Scope* scope, const p::DeviceContext& ctx){

void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));

VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));

VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));

std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
comm_init_attrs["ring_id"] = 0;
Expand All @@ -90,7 +88,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {

std::vector<float> init;
int rank_id = atoi(getenv("RANK_ID"));

int num1 = 1;
int num2 = 4;

Expand All @@ -112,18 +110,18 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {

// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["ring_id"]=0;
attrs["nranks"]=2;
attrs["tag"] = std::string("tagx");
attrs["ring_id"] = 0;
attrs["nranks"] = 2;

auto op = f::OpRegistry::CreateOp("c_allgather", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);

for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();

std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait();
Expand All @@ -139,13 +137,13 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
}
}


TEST(c_allgather, NPU) {
f::Scope scope;

// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
// only support one device, if more than one device, use first default
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));

Prepare(&scope, ctx);
TestHCCLAllGatherOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLAllGatherOp(&scope, *ctx);
}
43 changes: 21 additions & 22 deletions paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ limitations under the License. */
#include <unistd.h>
#endif

#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>

#include "gtest/gtest.h"

#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"

#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"

#if defined(PADDLE_WITH_ASCEND_CL)
Expand All @@ -50,24 +50,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_max, NPU);

DECLARE_string(selected_npus);

template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(2) << preStr << ":" << std::endl <<debugstring;
VLOG(2) << preStr << ":" << std::endl << debugstring;
}

void Prepare(f::Scope* scope, const p::DeviceContext& ctx){

void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));

VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));

std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
Expand Down Expand Up @@ -112,13 +110,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {

// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx");
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx");
attrs["ring_id"] = 0;

auto op = f::OpRegistry::CreateOp("c_allreduce_max", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
{{"Out", {"Out"}}}, attrs);

for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
Expand All @@ -139,8 +137,9 @@ TEST(c_allreduce_max, NPU) {
f::Scope scope;

// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));

Prepare(&scope, ctx);
TestHCCLAllReduceOp(&scope, ctx);
Prepare(&scope, *ctx);
TestHCCLAllReduceOp(&scope, *ctx);
}
48 changes: 23 additions & 25 deletions paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,19 @@ limitations under the License. */
#include <unistd.h>
#endif

#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <stdio.h>

#include "gtest/gtest.h"

#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"

#include "paddle/fluid/operators/collective/c_allreduce_op.h"

Expand All @@ -47,24 +47,22 @@ USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU);

DECLARE_string(selected_npus);

template<typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T> &data){
template <typename T>
void PrintDebugInfo(const std::string preStr, const std::vector<T>& data) {
std::string debugstring = "";
for (auto ele : data) {
debugstring += std::to_string(ele) + std::string(",");
}
VLOG(3) << preStr << ":" << std::endl <<debugstring;
VLOG(3) << preStr << ":" << std::endl << debugstring;
}

void Prepare(f::Scope* scope, const p::DeviceContext& ctx){

void Prepare(f::Scope* scope, const p::DeviceContext& ctx) {
int rank_id = atoi(getenv("RANK_ID"));
int device_id = atoi(getenv("DEVICE_ID"));

VLOG(2) << "rank_id = " << rank_id
<< "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));
VLOG(2) << "rank_id = " << rank_id << "; device_id = " << device_id
<< "; rank_id = " << rank_id
<< "; RANK_TABLE_FILE = " << atoi(getenv("RANK_TABLE_FILE"));

std::vector<int> rank_ids{0, 1};
f::AttributeMap comm_init_attrs;
Expand All @@ -80,7 +78,8 @@ void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
ctx.Wait();
}

void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx,
int iter) {
// init
auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>();
Expand Down Expand Up @@ -109,15 +108,13 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter)

// run
f::AttributeMap attrs;
attrs["tag"]=std::string("tagx_"+ std::to_string(iter));
attrs["ring_id"]=0;
attrs["tag"] = std::string("tagx_" + std::to_string(iter));
attrs["ring_id"] = 0;

auto op = f::OpRegistry::CreateOp("c_allreduce_sum",
{{"X", {"X"}}},
{{"Out", {"Out"}}},
attrs);
auto op = f::OpRegistry::CreateOp("c_allreduce_sum", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);

for (int i = 0; i < 10; i ++) {
for (int i = 0; i < 10; i++) {
op->Run(*scope, place);
}
ctx.Wait();
Expand All @@ -138,11 +135,12 @@ TEST(c_allreduce_sum, NPU) {
f::Scope scope;

// only support one device, if more than one device, use first default
p::NPUDeviceContext ctx(p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));
auto* ctx = p::DeviceContextPool::Instance().Get(
p::NPUPlace(atoi(FLAGS_selected_npus.c_str())));

Prepare(&scope, ctx);
for(int i = 0; i < 1; i ++){
Prepare(&scope, *ctx);
for (int i = 0; i < 1; i++) {
VLOG(2) << "iter num: " << i;
TestHCCLAllReduceOp(&scope, ctx, i);
TestHCCLAllReduceOp(&scope, *ctx, i);
}
}
Loading