From e8a7d3ee77bd1fa871eea9751756ce6a5aac1ed4 Mon Sep 17 00:00:00 2001 From: veyron95 Date: Mon, 1 Nov 2021 12:47:48 +0000 Subject: [PATCH 1/7] Expose func for varbase --- paddle/fluid/imperative/layer.cc | 64 ++++++++++++++++++++++++++ paddle/fluid/imperative/layer.h | 4 ++ paddle/fluid/pybind/imperative.cc | 75 +++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 53ae5b8127fdba..3ca2133657a68c 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -356,6 +356,70 @@ void VarBase::BumpInplaceVersion() { MutableVar()->BumpInplaceVersion(); } +std::shared_ptr VarBase::To(const platform::Place& dst_place, + framework::proto::VarType::Type data_type, + const bool blocking) const { + PADDLE_ENFORCE_EQ( + Var().IsInitialized() && (Var().IsType() || + Var().IsType()), + true, platform::errors::InvalidArgument( + "Variable is not initialized or Variable's type is not " + "LoDTensor or SelectedRows when getting numpy tensor")); + + if (Var().IsType()) { + auto& src_tensor = Var().Get(); + // TODO(Jiabin): change this after move unique_name generator to CXX + auto new_var = std::make_shared( + true, Name() + std::to_string(copied_counter_++)); + + new_var->SetPersistable(Persistable()); + new_var->SetDataType(data_type); + new_var->SetType(Type()); + auto* dst_tensor = + new_var->MutableVar()->GetMutable(); + dst_tensor->set_lod(src_tensor.lod()); + framework::TensorCopy(src_tensor, dst_place, dst_tensor); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); + auto src_place = src_tensor.place(); + if (!(src_place == dst_place)) { + platform::DeviceContextPool::Instance().Get(src_place)->Wait(); + } + } + VLOG(4) << "copy tensor " << Name() << " from " << Place() << " to " + << dst_place; + VLOG(4) << "copy tensor " << Name() << " from " << DataType() << " to " + << data_type; + + return new_var; + } else { + auto& src_selected_rows = Var().Get(); + auto new_var = std::make_shared( + false, "Itmp" + std::to_string(copied_counter_++)); + new_var->SetType(framework::proto::VarType::SELECTED_ROWS); + new_var->SetDataType(data_type); + auto* dst_selected_rows = + new_var->MutableVar()->GetMutable(); + + framework::TensorCopy(src_selected_rows.value(), dst_place, + dst_selected_rows->mutable_value()); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); + auto src_place = src_selected_rows.place(); + if (!(src_place == dst_place)) { + platform::DeviceContextPool::Instance().Get(src_place)->Wait(); + } + } + dst_selected_rows->set_height(src_selected_rows.height()); + dst_selected_rows->set_rows(src_selected_rows.rows()); + VLOG(4) << "copy tensor " << Name() << " from " << Place() << " to " + << dst_place; + VLOG(4) << "copy tensor " << Name() << " from " << DataType() << " to " + << data_type; + return new_var; + } +} + void OpBase::SetType(const std::string& type) { op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 16580627ed1964..3443a61e659d5d 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -252,6 +252,10 @@ class VarBase { std::forward>>(hook)); } + std::shared_ptr To(const platform::Place& dst_place, + framework::proto::VarType::Type data_type, + const bool blocking) const; + private: /** * NOTE(zengjinle): never remove the const qualifier of `var_` if you are diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 4403eb469723a5..f8a08fde47f269 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1865,6 +1865,81 @@ void BindImperative(py::module *m_ptr) { py::return_value_policy::copy) .def("value", [](imperative::VarBase &self) { return self.MutableVar(); }, py::return_value_policy::reference) + .def("_clear", + [](const std::shared_ptr &self) { + auto *t = self->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + t->clear(); + }) + .def("_offset", + [](const std::shared_ptr &self) { + auto *t = self->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + return t->offset(); + }) + .def("_share_buffer_with", + [](const std::shared_ptr &self, + std::shared_ptr &target_t) { + auto *t = self->MutableVar()->GetMutable(); + auto *t_t = + target_t->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + PADDLE_ENFORCE_EQ(t_t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + t->ShareBufferWith(*t_t); + }) + .def("_is_shared_buffer_with", + [](const std::shared_ptr &self, + std::shared_ptr &target_t) { + auto *t = self->MutableVar()->GetMutable(); + auto *t_t = + target_t->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + PADDLE_ENFORCE_EQ(t_t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + return t->IsSharedBufferWith(*t_t); + }) + .def("_Slice", + [](const std::shared_ptr &self, + int64_t begin_idx, int64_t end_idx) { + auto *t = self->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + return t->Slice(begin_idx, end_idx); + }) + .def("_To", + [](const std::shared_ptr &self, + const platform::Place &place, + framework::proto::VarType::Type data_type, bool blocking) { + auto new_var = self->To(place, data_type, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def("_To", + [](const std::shared_ptr &self, + const platform::CPUPlace &place, + framework::proto::VarType::Type data_type, bool blocking) { + auto new_var = self->To(place, data_type, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) .def_property("name", &imperative::VarBase::Name, &imperative::VarBase::SetName) .def_property("stop_gradient", From 05268199ee0ff4da72365ca924b45f36005c74c6 Mon Sep 17 00:00:00 2001 From: veyron95 Date: Wed, 3 Nov 2021 07:36:04 +0000 Subject: [PATCH 2/7] Expose func for varbase and enhance varbase init func --- paddle/fluid/imperative/layer.cc | 85 +++++---------- paddle/fluid/imperative/layer.h | 6 +- paddle/fluid/pybind/imperative.cc | 66 ++++++++---- .../fluid/dygraph/varbase_patch_methods.py | 45 +++++++- .../fluid/tests/unittests/test_var_base.py | 100 ++++++++++++++++++ 5 files changed, 213 insertions(+), 89 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 3ca2133657a68c..d24af5dfef826c 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -356,67 +356,30 @@ void VarBase::BumpInplaceVersion() { MutableVar()->BumpInplaceVersion(); } -std::shared_ptr VarBase::To(const platform::Place& dst_place, - framework::proto::VarType::Type data_type, - const bool blocking) const { - PADDLE_ENFORCE_EQ( - Var().IsInitialized() && (Var().IsType() || - Var().IsType()), - true, platform::errors::InvalidArgument( - "Variable is not initialized or Variable's type is not " - "LoDTensor or SelectedRows when getting numpy tensor")); - - if (Var().IsType()) { - auto& src_tensor = Var().Get(); - // TODO(Jiabin): change this after move unique_name generator to CXX - auto new_var = std::make_shared( - true, Name() + std::to_string(copied_counter_++)); - - new_var->SetPersistable(Persistable()); - new_var->SetDataType(data_type); - new_var->SetType(Type()); - auto* dst_tensor = - new_var->MutableVar()->GetMutable(); - dst_tensor->set_lod(src_tensor.lod()); - framework::TensorCopy(src_tensor, dst_place, dst_tensor); - if (blocking) { - platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); - auto src_place = src_tensor.place(); - if (!(src_place == dst_place)) { - platform::DeviceContextPool::Instance().Get(src_place)->Wait(); - } - } - VLOG(4) << "copy tensor " << Name() << " from " << Place() << " to " - << dst_place; - VLOG(4) << "copy tensor " << Name() << " from " << DataType() << " to " - << data_type; - - return new_var; - } else { - auto& src_selected_rows = Var().Get(); - auto new_var = std::make_shared( - false, "Itmp" + std::to_string(copied_counter_++)); - new_var->SetType(framework::proto::VarType::SELECTED_ROWS); - new_var->SetDataType(data_type); - auto* dst_selected_rows = - new_var->MutableVar()->GetMutable(); - - framework::TensorCopy(src_selected_rows.value(), dst_place, - dst_selected_rows->mutable_value()); - if (blocking) { - platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); - auto src_place = src_selected_rows.place(); - if (!(src_place == dst_place)) { - platform::DeviceContextPool::Instance().Get(src_place)->Wait(); - } - } - dst_selected_rows->set_height(src_selected_rows.height()); - dst_selected_rows->set_rows(src_selected_rows.rows()); - VLOG(4) << "copy tensor " << Name() << " from " << Place() << " to " - << dst_place; - VLOG(4) << "copy tensor " << Name() << " from " << DataType() << " to " - << data_type; - return new_var; +// NOTE(weilong wu): This func try to share grad_var_ data with target varbase +void VarBase::_ShareDataWith(const VarBase& src) { + if (Var().IsInitialized()) { + PADDLE_ENFORCE_EQ(DataType(), src.DataType(), + platform::errors::PreconditionNotMet( + "Tensor %s has different data type with Tensor %s", + Name(), src.Name())); + PADDLE_ENFORCE_EQ(Type(), src.Type(), + platform::errors::PreconditionNotMet( + "Tensor %s has different type with Tensor %s, Tensor " + "ShareGradientDataWith cannot be performed!", + Name(), src.Name())); + } + VLOG(4) << " VarBase ShareDataWith " << src.Name(); + if (grad_var_) { + auto& src_tensor = src.Var().Get(); + PADDLE_ENFORCE_EQ(src_tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized", src.Name())); + auto* grad_t = grad_var_->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(grad_t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor %s has not been initialized", Name())); + grad_t->ShareDataWith(src_tensor); } } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 3443a61e659d5d..5834e3f76d71d3 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -230,6 +230,8 @@ class VarBase { void BumpInplaceVersion(); + void _ShareDataWith(const imperative::VarBase& src); + /* Hook related method: now only used for GradVarBase */ bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); } @@ -252,10 +254,6 @@ class VarBase { std::forward>>(hook)); } - std::shared_ptr To(const platform::Place& dst_place, - framework::proto::VarType::Type data_type, - const bool blocking) const; - private: /** * NOTE(zengjinle): never remove the const qualifier of `var_` if you are diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index f8a08fde47f269..05cfa127a27587 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -282,6 +282,27 @@ static void InitVarBaseFromTensorWithArgDefault( } } +template +static void InitVarBaseFromTensorWithArg(imperative::VarBase *self, + const framework::Tensor &tensor, + const P &place) { + VLOG(4) << "Init VarBase"; + new (self) imperative::VarBase( + imperative::GetCurrentTracer()->GenerateUniqueName("generated_tensor")); + self->SetPersistable(false); + self->SetType(framework::proto::VarType::LOD_TENSOR); + self->SetDataType(tensor.type()); + auto *new_tensor = self->MutableVar()->GetMutable(); + // Same place,share data directly + if (platform::is_same_place(place, tensor.place())) { + new_tensor->ShareDataWith(tensor); + VLOG(4) << "Same place, do ShareDataWith"; + } else { + framework::TensorCopy(tensor, place, new_tensor); + VLOG(4) << "Different place, do TensorCopy"; + } +} + static std::string GetTypeName(const imperative::VarBase &var) { if (var.Type() == framework::proto::VarType::RAW) { return "RAW"; @@ -899,6 +920,16 @@ void BindImperative(py::module *m_ptr) { py::arg("stop_gradient") = -1) .def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value")) .def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor")) + .def("__init__", &InitVarBaseFromTensorWithArg, + py::arg("tensor"), py::arg("place")) + .def("__init__", &InitVarBaseFromTensorWithArg, + py::arg("tensor"), py::arg("place")) + .def("__init__", &InitVarBaseFromTensorWithArg, + py::arg("tensor"), py::arg("place")) + .def("__init__", &InitVarBaseFromTensorWithArg, + py::arg("tensor"), py::arg("place")) + .def("__init__", &InitVarBaseFromTensorWithArg, + py::arg("tensor"), py::arg("place")) .def("__init__", &InitVarBaseFromNumpyWithKwargs) .def( "__setitem_varbase__", @@ -1909,7 +1940,7 @@ void BindImperative(py::module *m_ptr) { "tensor has not been initialized")); return t->IsSharedBufferWith(*t_t); }) - .def("_Slice", + .def("_slice", [](const std::shared_ptr &self, int64_t begin_idx, int64_t end_idx) { auto *t = self->MutableVar()->GetMutable(); @@ -1918,28 +1949,17 @@ void BindImperative(py::module *m_ptr) { "tensor has not been initialized")); return t->Slice(begin_idx, end_idx); }) - .def("_To", - [](const std::shared_ptr &self, - const platform::Place &place, - framework::proto::VarType::Type data_type, bool blocking) { - auto new_var = self->To(place, data_type, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_To", - [](const std::shared_ptr &self, - const platform::CPUPlace &place, - framework::proto::VarType::Type data_type, bool blocking) { - auto new_var = self->To(place, data_type, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) + .def("_share_data_with", + [](std::shared_ptr &self, + const imperative::VarBase &src) { self->_ShareDataWith(src); }) + .def("_numel", + [](std::shared_ptr &self) { + auto *t = self->MutableVar()->GetMutable(); + PADDLE_ENFORCE_EQ(t->IsInitialized(), true, + platform::errors::InvalidArgument( + "tensor has not been initialized")); + return t->numel(); + }) .def_property("name", &imperative::VarBase::Name, &imperative::VarBase::SetName) .def_property("stop_gradient", diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index e2fd36448ba654..e4d579e15e98b3 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -357,6 +357,49 @@ def double_hook_fn(grad): helper = TensorHookRemoveHelper(self, hook_id) return helper + @framework.dygraph_only + def _to(self, device=None, dtype=None, blocking=None): + + if device is None and dtype is None and blocking is None: + return self + + if device is not None: + if isinstance(device, str): + device = paddle.device._convert_to_place(device) + elif isinstance(device, (core.CPUPlace, core.CUDAPlace, + core.CUDAPinnedPlace, core.XPUPlace)): + pass + else: + raise ValueError( + "device value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is " + + type(device).__name__) + + if blocking is None: + blocking = True + else: + assert isinstance( + blocking, + bool), "blocking value error, must be the True, False or None" + + def transform(t, device, dtype, blocking): + if device is None: + device = t.place + if dtype is None: + dtype = t.dtype + + new_t = t._copy_to(device, blocking) + + if dtype is not None and dtype != t.dtype: + new_t = new_t.cast(dtype=dtype) + + return new_t + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + return transform(self, device, dtype, blocking) + + return self + @property def grad(self): """ @@ -650,7 +693,7 @@ def is_combine_index(item): ("__deepcopy__", __deepcopy__), ("__module__", "paddle"), ("__name__", "Tensor"), ("__array__", __array__), ("__getitem__", __getitem__), ("item", item), - ("__setitem__", __setitem__)): + ("__setitem__", __setitem__), ("_to", _to)): setattr(core.VarBase, method_name, method) # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index cfaef15c1d335a..c909aa9bb48e61 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1154,5 +1154,105 @@ def test_bump_inplace_version(self): self.assertEqual(var.inplace_version, 2) +class TestVarBaseSlice(unittest.TestCase): + def test_slice(self): + paddle.disable_static() + np_x = np.random.random((3, 8, 8)) + x = paddle.to_tensor(np_x, dtype="float64") + actual_x = x._slice(0, 1) + actual_x = paddle.to_tensor(actual_x) + self.assertEqual(actual_x.numpy().all(), np_x[0:1].all()) + + +class TestVarBaseClear(unittest.TestCase): + def test_clear(self): + paddle.disable_static() + np_x = np.random.random((3, 8, 8)) + x = paddle.to_tensor(np_x, dtype="float64") + x._clear() + self.assertEqual(str(x), "Tensor(Not initialized)") + + +class TestVarBaseOffset(unittest.TestCase): + def test_offset(self): + paddle.disable_static() + np_x = np.random.random((3, 8, 8)) + x = paddle.to_tensor(np_x, dtype="float64") + expected_offset = 0 + actual_x = x._slice(expected_offset, 1) + actual_x = paddle.to_tensor(actual_x) + self.assertEqual(actual_x._offset(), expected_offset) + + +class TestVarBaseShareBufferWith(unittest.TestCase): + def test_share_buffer_with(self): + paddle.disable_static() + np_x = np.random.random((3, 8, 8)) + np_y = np.random.random((3, 8, 8)) + x = paddle.to_tensor(np_x, dtype="float64") + y = paddle.to_tensor(np_y, dtype="float64") + x._share_buffer_with(y) + self.assertEqual(x._is_shared_buffer_with(y), True) + + +class TestVarBaseTo(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.np_x = np.random.random((3, 8, 8)) + self.x = paddle.to_tensor(self.np_x, dtype="float64") + + def test_to_api(self): + x_double = self.x._to(dtype='double') + self.assertEqual(x_double.dtype, paddle.fluid.core.VarDesc.VarType.FP64) + self.assertTrue(np.allclose(self.np_x, x_double)) + + x_ = self.x._to() + self.assertEqual(self.x.dtype, paddle.fluid.core.VarDesc.VarType.FP64) + self.assertTrue(np.allclose(self.np_x, x_)) + + if paddle.fluid.is_compiled_with_cuda(): + x_gpu = self.x._to(device=paddle.CUDAPlace(0)) + self.assertTrue(x_gpu.place.is_gpu_place()) + self.assertEqual(x_gpu.place.gpu_device_id(), 0) + + x_gpu0 = self.x._to(device='gpu:0') + self.assertTrue(x_gpu0.place.is_gpu_place()) + self.assertEqual(x_gpu0.place.gpu_device_id(), 0) + + x_cpu = self.x._to(device=paddle.CPUPlace()) + self.assertTrue(x_cpu.place.is_cpu_place()) + + x_cpu0 = self.x._to(device='cpu') + self.assertTrue(x_cpu0.place.is_cpu_place()) + + +class TestVarBaseInitVarBaseFromTensorWithDevice(unittest.TestCase): + def test_varbase_init(self): + paddle.disable_static() + t = fluid.Tensor() + np_x = np.random.random((3, 8, 8)) + t.set(np_x, fluid.CPUPlace()) + + if paddle.fluid.is_compiled_with_cuda(): + device = paddle.CUDAPlace(0) + tmp = fluid.core.VarBase(t, device) + self.assertTrue(tmp.place.is_gpu_place()) + self.assertEqual(tmp.numpy().all(), np_x.all()) + + device = paddle.CPUPlace() + tmp = fluid.core.VarBase(t, device) + self.assertEqual(tmp.numpy().all(), np_x.all()) + + +class TestVarBaseNumel(unittest.TestCase): + def test_numel(self): + paddle.disable_static() + np_x = np.random.random((3, 8, 8)) + x = paddle.to_tensor(np_x, dtype="float64") + x_actual_numel = x._numel() + x_expected_numel = np.product((3, 8, 8)) + self.assertEqual(x_actual_numel, x_expected_numel) + + if __name__ == '__main__': unittest.main() From 8e1312a27649fb41690596a308bfcd19ab5f8c6b Mon Sep 17 00:00:00 2001 From: veyron95 Date: Wed, 3 Nov 2021 14:22:56 +0000 Subject: [PATCH 3/7] Change func name and add test case for _CopyGradientWith --- paddle/fluid/imperative/layer.cc | 10 +++++++--- paddle/fluid/imperative/layer.h | 2 +- paddle/fluid/pybind/imperative.cc | 4 ++-- .../paddle/fluid/tests/unittests/test_var_base.py | 13 +++++++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index d24af5dfef826c..27c9c659fe6fb5 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -356,8 +356,10 @@ void VarBase::BumpInplaceVersion() { MutableVar()->BumpInplaceVersion(); } -// NOTE(weilong wu): This func try to share grad_var_ data with target varbase -void VarBase::_ShareDataWith(const VarBase& src) { +// NOTE(weilong wu): +// This function try to copy the data from target varbase, +// and fill into the grad_var_ of the current varbase. +void VarBase::_CopyGradientWith(const VarBase& src) { if (Var().IsInitialized()) { PADDLE_ENFORCE_EQ(DataType(), src.DataType(), platform::errors::PreconditionNotMet( @@ -369,7 +371,7 @@ void VarBase::_ShareDataWith(const VarBase& src) { "ShareGradientDataWith cannot be performed!", Name(), src.Name())); } - VLOG(4) << " VarBase ShareDataWith " << src.Name(); + VLOG(4) << " VarBase copy gradient with " << src.Name(); if (grad_var_) { auto& src_tensor = src.Var().Get(); PADDLE_ENFORCE_EQ(src_tensor.IsInitialized(), true, @@ -379,7 +381,9 @@ void VarBase::_ShareDataWith(const VarBase& src) { PADDLE_ENFORCE_EQ(grad_t->IsInitialized(), true, platform::errors::InvalidArgument( "tensor %s has not been initialized", Name())); + auto* var_ = MutableVar()->GetMutable(); grad_t->ShareDataWith(src_tensor); + grad_t->Resize(var_->dims()); } } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 5834e3f76d71d3..650998a356e329 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -230,7 +230,7 @@ class VarBase { void BumpInplaceVersion(); - void _ShareDataWith(const imperative::VarBase& src); + void _CopyGradientWith(const imperative::VarBase& src); /* Hook related method: now only used for GradVarBase */ bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 05cfa127a27587..1f60da627cdc23 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1949,9 +1949,9 @@ void BindImperative(py::module *m_ptr) { "tensor has not been initialized")); return t->Slice(begin_idx, end_idx); }) - .def("_share_data_with", + .def("_copy_gradient_with", [](std::shared_ptr &self, - const imperative::VarBase &src) { self->_ShareDataWith(src); }) + const imperative::VarBase &src) { self->_CopyGradientWith(src); }) .def("_numel", [](std::shared_ptr &self) { auto *t = self->MutableVar()->GetMutable(); diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index c909aa9bb48e61..31b6c6c5b83bd4 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1254,5 +1254,18 @@ def test_numel(self): self.assertEqual(x_actual_numel, x_expected_numel) +class TestVarBaseCopyGradientWith(unittest.TestCase): + def test_copy_gradient_with(self): + paddle.disable_static() + np_x = np.random.random((2, 2)) + np_y = np.random.random((2, 2)) + x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False) + y = paddle.to_tensor(np_y, dtype="float64") + out = x + x + out.backward() + x._copy_gradient_with(y) + self.assertEqual(x.grad.numpy().all(), np_y.all()) + + if __name__ == '__main__': unittest.main() From ed8980d8c1d2887869a79f92dd1753744886a09e Mon Sep 17 00:00:00 2001 From: veyron95 Date: Wed, 3 Nov 2021 18:42:43 +0000 Subject: [PATCH 4/7] Rename func --- paddle/fluid/imperative/layer.cc | 2 +- paddle/fluid/imperative/layer.h | 2 +- paddle/fluid/pybind/imperative.cc | 4 ++-- python/paddle/fluid/tests/unittests/test_var_base.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 27c9c659fe6fb5..f2469e613b0d6c 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -359,7 +359,7 @@ void VarBase::BumpInplaceVersion() { // NOTE(weilong wu): // This function try to copy the data from target varbase, // and fill into the grad_var_ of the current varbase. -void VarBase::_CopyGradientWith(const VarBase& src) { +void VarBase::_CopyGradientFrom(const VarBase& src) { if (Var().IsInitialized()) { PADDLE_ENFORCE_EQ(DataType(), src.DataType(), platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 650998a356e329..f66f72a48fba9e 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -230,7 +230,7 @@ class VarBase { void BumpInplaceVersion(); - void _CopyGradientWith(const imperative::VarBase& src); + void _CopyGradientFrom(const imperative::VarBase& src); /* Hook related method: now only used for GradVarBase */ bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 1f60da627cdc23..ced5d0390a02e3 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1949,9 +1949,9 @@ void BindImperative(py::module *m_ptr) { "tensor has not been initialized")); return t->Slice(begin_idx, end_idx); }) - .def("_copy_gradient_with", + .def("_copy_gradient_from", [](std::shared_ptr &self, - const imperative::VarBase &src) { self->_CopyGradientWith(src); }) + const imperative::VarBase &src) { self->_CopyGradientFrom(src); }) .def("_numel", [](std::shared_ptr &self) { auto *t = self->MutableVar()->GetMutable(); diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 31b6c6c5b83bd4..de02d08fdda6d4 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1254,8 +1254,8 @@ def test_numel(self): self.assertEqual(x_actual_numel, x_expected_numel) -class TestVarBaseCopyGradientWith(unittest.TestCase): - def test_copy_gradient_with(self): +class TestVarBaseCopyGradientFrom(unittest.TestCase): + def test_copy_gradient_from(self): paddle.disable_static() np_x = np.random.random((2, 2)) np_y = np.random.random((2, 2)) @@ -1263,7 +1263,7 @@ def test_copy_gradient_with(self): y = paddle.to_tensor(np_y, dtype="float64") out = x + x out.backward() - x._copy_gradient_with(y) + x._copy_gradient_from(y) self.assertEqual(x.grad.numpy().all(), np_y.all()) From c54c549d67986bd428e2db2fb1d36ec6cbd8799a Mon Sep 17 00:00:00 2001 From: veyron95 Date: Thu, 4 Nov 2021 13:51:17 +0000 Subject: [PATCH 5/7] Add test cases to increase coverage --- python/paddle/fluid/dygraph/varbase_patch_methods.py | 2 -- python/paddle/fluid/tests/unittests/test_var_base.py | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index e4d579e15e98b3..6f155360e4e165 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -398,8 +398,6 @@ def transform(t, device, dtype, blocking): warnings.filterwarnings("ignore", category=UserWarning) return transform(self, device, dtype, blocking) - return self - @property def grad(self): """ diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index de02d08fdda6d4..5a855bfc5ff1dd 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1225,6 +1225,9 @@ def test_to_api(self): x_cpu0 = self.x._to(device='cpu') self.assertTrue(x_cpu0.place.is_cpu_place()) + self.assertRaises(ValueError, self.x._to, device=1) + self.assertRaises(AssertionError, self.x._to, blocking=1) + class TestVarBaseInitVarBaseFromTensorWithDevice(unittest.TestCase): def test_varbase_init(self): From fe60c02e78927c3fdd2b8d1e120827a6df3c695e Mon Sep 17 00:00:00 2001 From: veyron95 Date: Tue, 9 Nov 2021 08:39:39 +0000 Subject: [PATCH 6/7] Refine the logic of _to func --- .../fluid/dygraph/varbase_patch_methods.py | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 6f155360e4e165..75df8e7f29d31b 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -387,12 +387,46 @@ def transform(t, device, dtype, blocking): if dtype is None: dtype = t.dtype - new_t = t._copy_to(device, blocking) + # 1. gpu place need to determine whether the memory is sufficient for allocation. + if t.place.is_gpu_place(): + gpu_memory_available = core.gpu_memory_available() + # for gpu, minimum memory allocation unit is 256 bytes. + if type(dtype) is str: + size_dtype = core.size_of_dtype( + framework.convert_np_dtype_to_dtype_(dtype)) + else: + size_dtype = core.size_of_dtype(dtype) + # Note(weilong wu): Paddle GPU minimum memory allocation unit is 256 bytes, + # waiting_alloc_memory will compute the memory space occupied by 't'. + # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough. + waiting_alloc_memory = ( + (t.numel().numpy()[0] * size_dtype) / 256 + 1) * 256 * 1.2 + if gpu_memory_available < waiting_alloc_memory: + # Copy Tensor to cpu + t_used = t._copy_to(paddle.CPUPlace(), blocking) + # Release memory of t + t.value().get_tensor()._clear() + else: + # Tensor still in GPU + t_used = t + else: + t_used = t + + # 2. cast Tensor to dtype + if dtype is not None and dtype != t_used.dtype: + t_casted = t_used.cast(dtype=dtype) + else: + t_casted = t_used + + # 3. Copy casted Tensor(in CPU or GPU) to device + new_t = t_casted._copy_to(device, blocking) - if dtype is not None and dtype != t.dtype: - new_t = new_t.cast(dtype=dtype) + # 4. Share Tensor to origin Tensor + dst_tensor = t.value().get_tensor() + src_tensor = new_t.value().get_tensor() + dst_tensor._share_data_with(src_tensor) - return new_t + return t with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) From c6a16b313ae4d28e2c2348995c14918961ff346f Mon Sep 17 00:00:00 2001 From: veyron95 Date: Tue, 9 Nov 2021 14:22:51 +0000 Subject: [PATCH 7/7] Replace numel() with _numel(), Add test code --- .../fluid/dygraph/varbase_patch_methods.py | 4 ++-- .../fluid/tests/unittests/test_var_base.py | 22 ++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 75df8e7f29d31b..32a4b5145effe3 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -400,12 +400,12 @@ def transform(t, device, dtype, blocking): # waiting_alloc_memory will compute the memory space occupied by 't'. # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough. waiting_alloc_memory = ( - (t.numel().numpy()[0] * size_dtype) / 256 + 1) * 256 * 1.2 + (t._numel() * size_dtype) / 256 + 1) * 256 * 1.2 if gpu_memory_available < waiting_alloc_memory: # Copy Tensor to cpu t_used = t._copy_to(paddle.CPUPlace(), blocking) # Release memory of t - t.value().get_tensor()._clear() + t._clear() else: # Tensor still in GPU t_used = t diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 5a855bfc5ff1dd..95f7c0aca788aa 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1199,7 +1199,7 @@ class TestVarBaseTo(unittest.TestCase): def setUp(self): paddle.disable_static() self.np_x = np.random.random((3, 8, 8)) - self.x = paddle.to_tensor(self.np_x, dtype="float64") + self.x = paddle.to_tensor(self.np_x, dtype="float32") def test_to_api(self): x_double = self.x._to(dtype='double') @@ -1219,12 +1219,32 @@ def test_to_api(self): self.assertTrue(x_gpu0.place.is_gpu_place()) self.assertEqual(x_gpu0.place.gpu_device_id(), 0) + x_gpu1 = self.x._to(device='gpu:0', dtype="float64") + self.assertTrue(x_gpu1.place.is_gpu_place()) + self.assertEqual(x_gpu1.place.gpu_device_id(), 0) + self.assertEqual(x_gpu1.dtype, + paddle.fluid.core.VarDesc.VarType.FP64) + + x_gpu2 = self.x._to(device='gpu:0', dtype="float16") + self.assertTrue(x_gpu2.place.is_gpu_place()) + self.assertEqual(x_gpu2.place.gpu_device_id(), 0) + self.assertEqual(x_gpu2.dtype, + paddle.fluid.core.VarDesc.VarType.FP16) + x_cpu = self.x._to(device=paddle.CPUPlace()) self.assertTrue(x_cpu.place.is_cpu_place()) x_cpu0 = self.x._to(device='cpu') self.assertTrue(x_cpu0.place.is_cpu_place()) + x_cpu1 = self.x._to(device=paddle.CPUPlace(), dtype="float64") + self.assertTrue(x_cpu1.place.is_cpu_place()) + self.assertEqual(x_cpu1.dtype, paddle.fluid.core.VarDesc.VarType.FP64) + + x_cpu2 = self.x._to(device='cpu', dtype="float16") + self.assertTrue(x_cpu2.place.is_cpu_place()) + self.assertEqual(x_cpu2.dtype, paddle.fluid.core.VarDesc.VarType.FP16) + self.assertRaises(ValueError, self.x._to, device=1) self.assertRaises(AssertionError, self.x._to, blocking=1)