Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… llama_ops
  • Loading branch information
cyber-pioneer committed Oct 12, 2023
2 parents 9d19f03 + f04f6ee commit 004fd3e
Show file tree
Hide file tree
Showing 158 changed files with 2,924 additions and 1,868 deletions.
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ modernize-use-override,
modernize-use-transparent-functors,
-modernize-use-uncaught-exceptions,
performance-faster-string-find,
-performance-for-range-copy,
performance-for-range-copy,
-performance-implicit-conversion-in-loop,
-performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
Expand Down
8 changes: 8 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ if(NOT DEFINED WITH_MKLDNN)
endif()
endif()

if(WIN32)
if(MSVC)
if(MSVC_VERSION LESS 1920)
set(WITH_MKLDNN OFF)
endif()
endif()
endif()

if(WIN32
OR APPLE
OR NOT WITH_GPU
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/auto_parallel/dist_attr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ OperatorDistAttr& OperatorDistAttr::operator=(

void OperatorDistAttr::initialize(const OpDesc* op) {
if (op == nullptr) return;
for (std::string name : op->InputArgumentNames()) {
for (std::string const& name : op->InputArgumentNames()) {
VarDesc* input = op->Block()->FindVarRecursive(name);
VLOG(4) << "[OperatorDistAttr create input dist attr] " << name;
if (input == nullptr || op->Type() == "create_py_reader") {
Expand All @@ -92,7 +92,7 @@ void OperatorDistAttr::initialize(const OpDesc* op) {
input_dist_attrs_[name] = TensorDistAttr(get_tensor_shape(input));
}
}
for (std::string name : op->OutputArgumentNames()) {
for (std::string const& name : op->OutputArgumentNames()) {
VarDesc* output = op->Block()->FindVarRecursive(name);
VLOG(4) << "[OperatorDistAttr create output dist attr] " << name;
if (output == nullptr) {
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ void EagerReducer::ProcessUnusedDenseVars() {
opts.reduce_op = ReduceOp::SUM;
std::vector<Tensor> reduce_tensors = {global_used_vars_};
std::vector<phi::DenseTensor> in_out;
in_out.reserve(reduce_tensors.size());
for (auto &t : reduce_tensors) {
in_out.push_back(*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
}
Expand Down Expand Up @@ -1081,6 +1082,7 @@ void EagerReducer::FusedAllReduceSchedule(EagerGroup *group,
// all_reduce
std::vector<Tensor> reduce_tensors = {group->dense_contents_};
std::vector<phi::DenseTensor> in_out;
in_out.reserve(reduce_tensors.size());
for (auto &t : reduce_tensors) {
in_out.push_back(*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
}
Expand Down Expand Up @@ -1166,6 +1168,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
opts.reduce_op = ReduceOp::SUM;
std::vector<Tensor> reduce_tensors = {rows_num_tensor};
std::vector<phi::DenseTensor> in_out;
in_out.reserve(reduce_tensors.size());
for (auto &t : reduce_tensors) {
in_out.push_back(*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
}
Expand Down Expand Up @@ -1214,6 +1217,8 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
std::vector<Tensor> dst_rows_tensors = {dst_rows_tensor};
std::vector<phi::DenseTensor> in;
std::vector<phi::DenseTensor> out;
in.reserve(src_rows_tensors.size());
out.reserve(dst_rows_tensors.size());
for (auto &t : src_rows_tensors) {
in.push_back(*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
}
Expand Down Expand Up @@ -1245,6 +1250,8 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
std::vector<Tensor> dst_value_tensors = {dst_value_tensor};
std::vector<phi::DenseTensor> src_dense;
std::vector<phi::DenseTensor> dst_dense;
src_dense.reserve(src_value_tensors.size());
dst_dense.reserve(dst_value_tensors.size());
for (auto &t : src_value_tensors) {
src_dense.push_back(
*std::dynamic_pointer_cast<phi::DenseTensor>(t.impl()));
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/distributed/fleet_executor/carrier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void Carrier::CopyParameters(
const framework::ProgramDesc& program,
const std::vector<std::string>& inference_root_scope_vars) {
std::map<std::string, int> inference_root_scope_var_map;
for (auto var_name : inference_root_scope_vars) {
for (auto const& var_name : inference_root_scope_vars) {
inference_root_scope_var_map.insert({var_name, 1});
}
for (size_t i = 0; i < program.Size(); ++i) {
Expand Down Expand Up @@ -392,6 +392,7 @@ void Carrier::CreateInterceptors(
}
}

cores.reserve(microbatch_scopes_.size());
for (framework::Scope* scope : microbatch_scopes_) {
cores.push_back(std::make_shared<InterpreterCore>(
place_, task_node->program()->Block(0), scope, execution_config));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ InterceptorMessage ComputeInterceptor::PrepareVarsMsg() {
ready_msg.set_message_type(DATA_WITH_VARS);
ready_msg.set_scope_idx(cur_scope_id_);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
for (auto iter : node_->vars_to_dtype()) {
for (auto const& iter : node_->vars_to_dtype()) {
VarList* vars = ready_msg.add_vars_list();
const auto& var_name = iter.first;
vars->set_name(var_name);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/fleet_executor/dist_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ void DistModel::InsertCommOp(std::string tmp_var_name,
<< ". The ring id is: " << ring_id << ". The group has: " << nranks
<< " ranks. Current rank in the group is: " << rank
<< ". The endpoint is: " << endpoint << ". Peer endpoints are: ";
for (auto ep : peer_endpoints) {
for (const auto &ep : peer_endpoints) {
ss << ep << ", ";
}
VLOG(3) << ss.str();
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/fleet_executor/fleet_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void PreventVarsDelete(
for (const auto& pair : *unused_vars) {
const framework::OperatorBase* op = pair.first;
std::vector<std::string> cur_unused = pair.second;
for (auto name : vars_not_gc) {
for (auto const& name : vars_not_gc) {
auto iter = std::find(cur_unused.begin(), cur_unused.end(), name);
if (iter != cur_unused.end()) {
VLOG(3) << "Removing var: [" << name
Expand Down Expand Up @@ -165,7 +165,7 @@ void FleetExecutor::Init(
while_block_vars = GetUnusedVarsAfterWhile(
program_desc, task_node, inference_root_scope_vars);
VLOG(3) << "Vars will be gced after while op";
for (auto var : while_block_vars) {
for (auto const& var : while_block_vars) {
VLOG(3) << var;
}
task_node->SetWhileBlockVars(while_block_vars);
Expand Down
22 changes: 11 additions & 11 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ static std::pair<std::string, std::string> GetAttrType(
ret = "std::vector<std::string>";
if (is_arg) ret += "&";
val += "{";
for (auto x : PADDLE_GET_CONST(std::vector<std::string>, attr)) {
for (auto const& x : PADDLE_GET_CONST(std::vector<std::string>, attr)) {
val += "\"" + x + "\"" + ",";
}
if (val.size() > 1) val.pop_back();
Expand Down Expand Up @@ -1238,7 +1238,7 @@ static std::string GenerateGradNodeCreationContent(
bool found_target_name = false;
for (const auto& iter : op_base_infos) {
const auto& grad_outs_slot_map = iter.GetGradOutsSlotnameMap();
for (auto iter : grad_outs_slot_map) {
for (auto const& iter : grad_outs_slot_map) {
if ((!found_target_name) && (input_name == iter.second)) {
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n";
Expand All @@ -1256,7 +1256,7 @@ static std::string GenerateGradNodeCreationContent(
bool found_target_name = false;
for (const auto& iter : op_base_infos) {
const auto& grad_outs_slot_map = iter.GetGradOutsSlotnameMap();
for (auto iter : grad_outs_slot_map) {
for (auto const& iter : grad_outs_slot_map) {
if ((!found_target_name) && (input_name == iter.second)) {
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n";
Expand Down Expand Up @@ -2142,7 +2142,7 @@ static std::string GenerateSingleOpBase(
// [Generation] Get Full Zero
std::string fill_zero_str = "";
if (ops_to_fill_zero_for_empty_grads.count(fwd_op_type)) {
for (auto iter : grad_ins) {
for (auto const& iter : grad_ins) {
const std::string& grad_input_name = iter.first;
if (grad_ins_grad_slotname_map.count(grad_input_name)) {
size_t fwd_output_position = fwd_outputs_name_pos_map.at(
Expand Down Expand Up @@ -2189,7 +2189,7 @@ static std::string GenerateSingleOpBase(
"backward_inplace_tensor" + std::to_string(*outs_size);
bool process_backward_inplace = false;
std::string ins_contents_str = "";
for (auto iter : grad_ins) {
for (auto const& iter : grad_ins) {
const std::string& grad_input_name = iter.first;

if (grad_ins_fwd_slotname_map.count(grad_input_name)) {
Expand Down Expand Up @@ -2293,7 +2293,7 @@ static std::string GenerateSingleOpBase(
paddle::string::Sprintf(BWD_INS_MAP_TEMPLATE, ins_name, ins_contents_str);
generated_grad_function_body += ins_map_str;

for (auto iter : grad_ins) {
for (auto const& iter : grad_ins) {
const std::string& grad_input_name = iter.first;

if (grad_ins_fwd_slotname_map.count(grad_input_name)) {
Expand Down Expand Up @@ -2335,7 +2335,7 @@ static std::string GenerateSingleOpBase(
VLOG(6) << "Generated Ins Map";
// [Generation] Get Outs Map
std::string outs_contents_str = "";
for (auto iter : grad_outs) {
for (auto const& iter : grad_outs) {
const std::string& grad_output_name = iter.first;

if (grad_outs_slotname_map.count(grad_output_name)) {
Expand Down Expand Up @@ -2440,7 +2440,7 @@ static std::string GenerateSingleOpBase(
generated_grad_function_body += outs_map_str;
generated_grad_function_body += outs_contents_str;
generated_grad_function_body += "\n";
for (auto iter : grad_outs) {
for (auto const& iter : grad_outs) {
const std::string& grad_output_name = iter.first;

if (grad_outs_slotname_map.count(grad_output_name)) {
Expand Down Expand Up @@ -2498,7 +2498,7 @@ static std::string GenerateSingleOpBase(
"%s[\"%s\"][0]);\n"
" };\n";
std::string backward_inplace_map_str = "";
for (auto iter : backward_inplace_map) {
for (auto const& iter : backward_inplace_map) {
std::string backward_inplace_input_name = iter.first;
std::string backward_inplace_output_name = iter.second;
backward_inplace_map_str += paddle::string::Sprintf(
Expand Down Expand Up @@ -2553,7 +2553,7 @@ static std::string GenerateSingleOpBase(
// [Generation] Get Return
std::string outputs_str = "";
size_t num_appended_outputs = 0;
for (auto iter : grad_outs) {
for (auto const& iter : grad_outs) {
const std::string& grad_out_name = iter.first;
const std::string& fwd_name = grad_outs_slotname_map.at(grad_out_name);

Expand Down Expand Up @@ -2594,7 +2594,7 @@ static std::string GenerateSingleOpBase(

/* Handle Special Case: "PullSparseOp", etc
For returns, append "GradOut" to the very end of return list. */
for (auto iter : grad_outs) {
for (auto const& iter : grad_outs) {
const std::string& grad_out_name = iter.first;
const std::string& fwd_name = grad_outs_slotname_map.at(grad_out_name);

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/backward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void EnforceGradNodeHasInput(GradNodeBase* node) {
void DuplicateCheck(const std::vector<paddle::Tensor>& inputs, bool is_input) {
std::unordered_set<AutogradMeta*> visisted_ins;
std::string msg = is_input ? "inputs" : "outputs";
for (auto in : inputs) {
for (auto const& in : inputs) {
AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(in);
PADDLE_ENFORCE_EQ(
visisted_ins.count(auto_grad_meta),
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/eager/custom_operator/custom_operator_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,12 @@ RunCustomOpNode::operator()(paddle::small_vector<std::vector<paddle::Tensor>,
}
}

for (auto it : fwd_outs) {
for (auto it : fwd_outs) { // NOLINT
VLOG(7) << "Insert fwd_outs to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpNode::Recover(&(it.second));
}

for (auto it : fwd_ins) {
for (auto it : fwd_ins) { // NOLINT
// NOTE(HongyuJia): returned tensor maybe un-defined tensor when inputs
// optional<Tensor>
VLOG(7) << "Insert fwd_ins to grad_inputs: " << it.first;
Expand Down Expand Up @@ -406,12 +406,12 @@ RunCustomOpDoubleGradNode::operator()(
}
}

for (auto it : fwd_outs) {
for (auto it : fwd_outs) { // NOLINT
VLOG(7) << "Insert fwd_outs to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpDoubleGradNode::Recover(&(it.second));
}

for (auto it : fwd_ins) {
for (auto it : fwd_ins) { // NOLINT
VLOG(7) << "Insert fwd_ins to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpDoubleGradNode::Recover(&(it.second));
}
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,15 @@ void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in,
meta.SetTensorMeta(dense_tensor.meta());
meta.SetPlace(fwd_in.place());
// Set DistAttr
PADDLE_ENFORCE_EQ(dist_tensor->defined(),
true,
phi::errors::InvalidArgument(
"The forward input DistTensor is not defined."));
PADDLE_ENFORCE_NE(
dist_tensor->dist_attr().empty(),
true,
phi::errors::InvalidArgument(
"The forward input DistTensor's dist attr is empty."));
meta.SetDistAttr(dist_tensor->dist_attr());
SetIsRunAutoParallel(true);
} else {
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/eager/to_static/run_program_op_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ static auto GetNameFromValue(const ::pir::Block *block,
.dyn_cast<pir::StrAttribute>()
.AsString();
value2name[op->operand(0).source()] = name;
} else if (!is_input && op->name() == "builtin.shadow_output") {
name = op->attributes()
.at("output_name")
.dyn_cast<pir::StrAttribute>()
.AsString();
value2name[op->operand(0).source()] = name;
} else if (is_input && op->name() == "builtin.get_parameter") {
name = op->attributes()
.at("parameter_name")
Expand Down
32 changes: 32 additions & 0 deletions paddle/fluid/eager/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,38 @@
#include "paddle/fluid/framework/variable.h"

namespace egr {

void SetGradOutputDistAttrIter::visit_element(paddle::Tensor* element,
const GradSlotMeta& meta) {
if (element == nullptr) {
VLOG(4) << "The input element is nullptr when calling "
"SetGradOutputDistAttrIter.";
return;
}
// Here the element is empty or defined DistTensor
VLOG(4) << "The input element is set DistTensor impl when calling "
"SetGradOutputDistAttrIter.";
element->set_impl(std::make_shared<phi::distributed::DistTensor>(
phi::DDim(), meta.DistAttr()));
}

void SetGradOutputDistAttrIter::visit(paddle::Tensor* element) {
if (!out_meta_[out_indexes_[cur_pos_]].empty()) {
visit_element(element, out_meta_[out_indexes_[cur_pos_]][0]);
}
cur_pos_++;
}

void SetGradOutputDistAttrIter::visit(
const std::vector<paddle::Tensor*>& elements) {
if (!out_meta_[out_indexes_[cur_pos_]].empty()) {
for (size_t i = 0; i < elements.size(); ++i) {
visit_element(elements.at(i), out_meta_[out_indexes_[cur_pos_]][i]);
}
}
cur_pos_++;
}

/**
* Implementation of Eager Utils.
**/
Expand Down
38 changes: 3 additions & 35 deletions paddle/fluid/eager/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,41 +97,9 @@ class SetGradOutputDistAttrIter : public IterHelper<paddle::Tensor*> {
: out_meta_(out_meta), out_indexes_{out_indexes} {}

private:
void visit_element(paddle::Tensor* element, const GradSlotMeta& meta) {
if (element == nullptr) {
return;
}
if (meta.DistAttr().empty()) {
return;
}
if (element->defined()) {
if (element->is_dist_tensor()) {
PADDLE_THROW(phi::errors::Unimplemented(
"Unsupport set defined dist tensor now."));
} else {
// Only deal with dist tensor here
return;
}
} else {
element->set_impl(std::make_shared<phi::distributed::DistTensor>(
phi::DDim(), meta.DistAttr()));
}
}
void visit(paddle::Tensor* element) override {
if (!out_meta_[out_indexes_[cur_pos_]].empty()) {
visit_element(element, out_meta_[out_indexes_[cur_pos_]][0]);
}
cur_pos_++;
}

void visit(const std::vector<paddle::Tensor*>& elements) override {
if (!out_meta_[out_indexes_[cur_pos_]].empty()) {
for (size_t i = 0; i < elements.size(); ++i) {
visit_element(elements.at(i), out_meta_[out_indexes_[cur_pos_]][i]);
}
}
cur_pos_++;
}
void visit_element(paddle::Tensor* element, const GradSlotMeta& meta);
void visit(paddle::Tensor* element) override;
void visit(const std::vector<paddle::Tensor*>& elements) override;

const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
out_meta_;
Expand Down
Loading

0 comments on commit 004fd3e

Please sign in to comment.