Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into complex_op
Browse files Browse the repository at this point in the history
  • Loading branch information
chenfeiyu committed Dec 16, 2021
2 parents 7e78eeb + 023ff4f commit 31a5ef4
Show file tree
Hide file tree
Showing 486 changed files with 27,015 additions and 5,709 deletions.
6 changes: 3 additions & 3 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,16 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)

# copy api headers for pten & custom op
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/*
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/ext/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/*
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/include/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/all.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/*
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/*.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/bfloat16.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
Expand Down
3 changes: 1 addition & 2 deletions cmake/pten.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ string(FIND ${TARGET_PATH} "experimental" pos)
if (pos GREATER 1)
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
string(FIND ${header} ".h" hpos)
if (hpos GREATER 1)
if (${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/pten/" "paddle/include/experimental/pten/" HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" HEADER_CONTENT "${HEADER_CONTENT}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ endif (WIN32)

if (WITH_INFRT)
include(external/llvm)
list(APPEND third_party_deps external_llvm)
list(APPEND third_party_deps ${llvm_libs})
endif()

if (WITH_IPU)
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/distributed/common/cost_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,8 @@ class CostTimer {
}
~CostTimer() {
if (_is_print_cost) {
LOG(INFO) << "CostTimer label:" << _label
<< ", cost:" << butil::gettimeofday_ms() - _start_time_ms
<< "ms";
VLOG(3) << "CostTimer label:" << _label
<< ", cost:" << butil::gettimeofday_ms() - _start_time_ms << "ms";
} else {
*(_profiler_node->recorder) << butil::gettimeofday_ms() - _start_time_ms;
}
Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/distributed/fleet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,13 +439,16 @@ void FleetWrapper::PushSparseFromTensorAsync(
const LoDTensor* shows, const LoDTensor* clks,
std::vector<LoDTensor*>* outputs) {
int batch_size = -1;
bool batch_size_consist = true;
for (auto* input : *inputs) {
int cur_batch_size =
input->lod().size() ? input->lod()[0].size() - 1 : input->dims()[0];
if (batch_size == -1) {
batch_size = cur_batch_size;
} else {
CHECK(batch_size == cur_batch_size); // NOLINT
// CHECK(batch_size == cur_batch_size); // NOLINT
batch_size_consist = false;
break;
}
}
CHECK(batch_size > 0); // NOLINT
Expand All @@ -461,8 +464,8 @@ void FleetWrapper::PushSparseFromTensorAsync(
for (framework::LoDTensor* g_tensor : *outputs) {
float* g_ori = g_tensor->data<float>();
// no cvm
if (true) { // TODO(zhaocaibei123): add config
// scale_sparse_gradient_with_batch_size_
if (batch_size_consist) { // TODO(zhaocaibei123): add config
// scale_sparse_gradient_with_batch_size_
Eigen::Map<
Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
g_mat(g_ori, g_tensor->numel() / fea_dim, fea_dim);
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/distributed/fleet_executor/carrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class Carrier final {

bool IsInit() const;

// NOTE: This mutex will be used in interceptor's RunOps function.
// This mutex is used for avoiding forward ops and backward ops run
// simultaneously, which will lead to a random hang for some sync ops.
std::mutex run;

DISABLE_COPY_AND_ASSIGN(Carrier);

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"

#include "paddle/fluid/distributed/fleet_executor/task_node.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
Expand Down Expand Up @@ -169,6 +170,8 @@ void ComputeInterceptor::ReplyCompletedToUpStream() {
}

void ComputeInterceptor::RunOps() {
Carrier& carrier_instance = Carrier::Instance();
std::unique_lock<std::mutex> lock(carrier_instance.run);
VLOG(3) << "ComputeInterceptor " << interceptor_id_ << " running ops for the "
<< step_ + 1 << " time.";
for (auto op : node_->ops()) {
Expand Down
20 changes: 16 additions & 4 deletions paddle/fluid/distributed/fleet_executor/fleet_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,22 @@ FleetExecutor::FleetExecutor(const std::string& exe_desc_str) {

FleetExecutor::~FleetExecutor() { root_scope_->DropKids(); }

void FleetExecutor::Init(const framework::ProgramDesc& program_desc,
framework::Scope* scope,
const platform::Place& place) {
runtime_graph_ = std::make_shared<RuntimeGraph>(program_desc, exe_desc_);
void FleetExecutor::Init(
const framework::ProgramDesc& program_desc, framework::Scope* scope,
const platform::Place& place, const std::vector<TaskNode*>& task_nodes,
const std::unordered_map<int64_t, int64_t>& task_id_to_rank) {
if (task_nodes.size() == 0) {
runtime_graph_ = std::make_shared<RuntimeGraph>(program_desc, exe_desc_);
} else {
runtime_graph_ = std::make_shared<RuntimeGraph>();
std::unordered_map<int64_t, TaskNode*> interceptor_id_to_task;
for (auto task_node : task_nodes) {
int64_t interceptor_id = task_node->task_id();
interceptor_id_to_task.emplace(interceptor_id, task_node);
}
runtime_graph_->SetInterceptorIdToRank(task_id_to_rank);
runtime_graph_->SetInterceptorIdToNode(interceptor_id_to_task);
}
root_scope_ = scope;
place_ = place;
PADDLE_ENFORCE_NOT_NULL(root_scope_, platform::errors::InvalidArgument(
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/distributed/fleet_executor/fleet_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@ namespace distributed {
class RuntimeGraph;
class Carrier;
class MessageBus;
class TaskNode;

class FleetExecutor final {
public:
FleetExecutor() = delete;
explicit FleetExecutor(const std::string& exe_desc_str);
~FleetExecutor();
void Init(const framework::ProgramDesc& program_desc, framework::Scope* scope,
const platform::Place& place);
const platform::Place& place,
const std::vector<TaskNode*>& task_nodes,
const std::unordered_map<int64_t, int64_t>& task_id_to_rank);
void Run();

private:
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/distributed/fleet_executor/runtime_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class RuntimeGraph final {
const std::unordered_map<int64_t, int64_t>& intercepter_id_to_rank() const {
return intercepter_id_to_rank_;
}
void SetInterceptorIdToRank(
const std::unordered_map<int64_t, int64_t>& intercepter_id_to_rank) {
intercepter_id_to_rank_ = intercepter_id_to_rank;
}
void SetInterceptorIdToNode(
const std::unordered_map<int64_t, TaskNode*>& intercepter_id_to_node) {
intercepter_id_to_node_ = intercepter_id_to_node;
}
std::string DebugString() const;

private:
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/distributed/service/communicator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,10 @@ void GeoCommunicator::InitDense(std::vector<std::string> &varnames,
auto *old_var = old_scope_->Var(t);
old_var->GetMutable<framework::LoDTensor>();
framework::CopyVariable(*global_var, old_var);
// init pserver_scope_
auto *pserver_var = pserver_scope_->Var(t);
pserver_var->GetMutable<framework::LoDTensor>();
framework::CopyVariable(*global_var, pserver_var);
}
VLOG(1) << "init dense table " << table_id << " done";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,31 @@ static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor,
pten::DenseTensor* dense_out) {
switch (dense_tensor.dtype()) {
case pten::DataType::FLOAT64: {
pten::Scale<double>(dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
pten::Scale<double, DeviceContext>(
dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */, bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::FLOAT32: {
pten::Scale<float>(dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
pten::Scale<float, DeviceContext>(dev_ctx, dense_tensor /* tensor */,
scale /* scale */, bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::INT64: {
pten::Scale<int64_t>(dev_ctx, dense_tensor /* tensor */,
scale /* scale */, bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
pten::Scale<int64_t, DeviceContext>(
dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */, bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
case pten::DataType::INT32: {
pten::Scale<int32_t>(dev_ctx, dense_tensor /* tensor */,
scale /* scale */, bias /* bias */,
bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
pten::Scale<int32_t, DeviceContext>(
dev_ctx, dense_tensor /* tensor */, scale /* scale */,
bias /* bias */, bias_after_scale /* bias_after_scale */,
dense_out /* out tensor */);
break;
}
default: {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/api/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
cc_library(tensor_utils SRCS tensor_utils.cc DEPS pten pten_api autograd_meta grad_node_info accumulation_node)
cc_library(hook_utils SRCS hook_utils.cc DEPS pten tensor_utils autograd_meta grad_node_info utils accumulation_node)
cc_library(global_utils SRCS global_utils.cc DEPS place)
cc_library(global_utils SRCS global_utils.cc DEPS place tracer)
44 changes: 32 additions & 12 deletions paddle/fluid/eager/api/utils/global_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <atomic>
#include <memory>
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/imperative/tracer.h"

namespace egr {

Expand All @@ -34,29 +34,49 @@ class UniqueNameGenerator {
};

// Global
// TODO(jiabin): Now we are using imperative tracer, move it here when we
// deprecate imperative.

class Controller {
public:
static Controller& Instance() { return *controller_; }
const paddle::platform::Place& GetExpectedPlace() const {
return *expected_place_.get();
paddle::platform::Place GetExpectedPlace() const {
return tracer_->ExpectedPlace();
}
void SetExpectedPlace(const paddle::platform::Place& place) {
expected_place_ = std::make_shared<paddle::platform::Place>(place);
tracer_->SetExpectedPlace(place);
}
void SetAMPLevel(paddle::imperative::AmpLevel level) {
tracer_->SetAmpLevel(level);
}
void SetAMPLevel(int level) { amp_level_ = level; }
int GetAMPLevel() const { return amp_level_; }
bool HasGrad() const { return has_grad_; }
paddle::imperative::AmpLevel GetAMPLevel() const {
return tracer_->GetAmpLevel();
}
bool HasGrad() const { return tracer_->HasGrad(); }
void SetHasGrad(bool has_grad) { tracer_->SetHasGrad(has_grad); }
std::string GenerateUniqueName(std::string key = "eager_tmp") {
return generator_->Generate(key);
return tracer_->GenerateUniqueName(key);
}
const std::shared_ptr<paddle::imperative::Tracer>& GetCurrentTracer() {
return tracer_;
}
void SetCurrentTracer(
const std::shared_ptr<paddle::imperative::Tracer>& tracer) {
tracer_ = tracer;
VLOG(6) << "Set current tracer: " << tracer_;
}

bool InEagerMode() const { return in_eager_mode_; }

void SetInEagerMode(bool in_eager_mode) { in_eager_mode_ = in_eager_mode; }

private:
Controller() = default;
static Controller* controller_;
std::shared_ptr<paddle::platform::Place> expected_place_ = nullptr;
int amp_level_ = 0;
bool has_grad_ = true;
std::unique_ptr<UniqueNameGenerator> generator_{new UniqueNameGenerator()};
std::shared_ptr<paddle::imperative::Tracer> tracer_{
new paddle::imperative::Tracer()};
// TODO(jiabin): remove when we don't need imperative.
bool in_eager_mode_{false};
DISABLE_COPY_AND_ASSIGN(Controller);
};

Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/eager/auto_code_generator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ if(WIN32)
endif()

add_custom_target(eager_codegen
COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated"
DEPENDS ${EAGER_CODEGEN_DEPS}
VERBATIM)
else()
add_custom_target(eager_codegen
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${CMAKE_CURRENT_BINARY_DIR}/../../pybind"
"${CMAKE_CURRENT_BINARY_DIR}/eager_generator"
"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated"
DEPENDS eager_generator
VERBATIM)
endif()
Loading

0 comments on commit 31a5ef4

Please sign in to comment.