Skip to content

Commit

Permalink
[PTen] Move dot kernel impl (#38359)
Browse files Browse the repository at this point in the history
* move dot kernel impl

* remove needless cmake items
  • Loading branch information
chenwhql authored Dec 23, 2021
1 parent ebbd356 commit 0a4ffbc
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 118 deletions.
4 changes: 2 additions & 2 deletions paddle/pten/api/lib/kernel_declare.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ limitations under the License. */
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed

PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
Expand Down
3 changes: 2 additions & 1 deletion paddle/pten/include/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/dot_kernel.h"
#include "paddle/pten/kernels/gpu/linalg.h"

namespace pten {
Expand All @@ -31,7 +32,7 @@ DenseTensor Dot(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Dot<T>(dev_ctx, x, y, &dense_out);
Dot<T, ContextT>(dev_ctx, x, y, &dense_out);
return dense_out;
}

Expand Down
61 changes: 61 additions & 0 deletions paddle/pten/kernels/cpu/dot_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/pten/kernels/dot_kernel.h"

#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"

namespace pten {

template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>();

// Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis
auto&& d = x.dims();
auto const N = x.numel();
auto const B = d[d.size() - 1];

for (int j = 0; j < N / B; j++) {
T ss = 0;
for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
z[j] = ss;
}
}

} // namespace pten

using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;

PT_REGISTER_CTX_KERNEL(dot,
CPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
33 changes: 0 additions & 33 deletions paddle/pten/kernels/cpu/linalg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,6 @@

namespace pten {

template <typename T>
void Dot(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>();

// Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis
auto&& d = x.dims();
auto const N = x.numel();
auto const B = d[d.size() - 1];

for (int j = 0; j < N / B; j++) {
T ss = 0;
for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
z[j] = ss;
}
}

template <typename T>
void Matmul(const CPUContext& dev_ctx,
const DenseTensor& x,
Expand All @@ -73,17 +51,6 @@ void Matmul(const CPUContext& dev_ctx,
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;

PT_REGISTER_KERNEL(dot,
CPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}

PT_REGISTER_KERNEL(matmul,
CPU,
ALL_LAYOUT,
Expand Down
6 changes: 0 additions & 6 deletions paddle/pten/kernels/cpu/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@

namespace pten {

template <typename T>
void Dot(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);

template <typename T>
void Matmul(const CPUContext& dev_ctx,
const DenseTensor& x,
Expand Down
27 changes: 27 additions & 0 deletions paddle/pten/kernels/dot_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/pten/core/dense_tensor.h"

namespace pten {

template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);

} // namespace pten
64 changes: 64 additions & 0 deletions paddle/pten/kernels/gpu/dot_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/pten/kernels/dot_kernel.h"

#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex.h"

namespace pten {

template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
out->mutable_data<T>();
if (1 == out->dims().size()) {
auto eigen_out = pten::EigenScalar<T>::From(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x);
auto eigen_y = pten::EigenVector<T>::Flatten(y);

auto& dev = *dev_ctx.eigen_device();
eigen_out.device(dev) = (eigen_x * eigen_y).sum();
} else {
auto eigen_out = pten::EigenMatrix<T>::From(*out);
auto eigen_x = pten::EigenMatrix<T>::From(x);
auto eigen_y = pten::EigenMatrix<T>::From(y);

auto& dev = *dev_ctx.eigen_device();
eigen_out.device(dev) = (eigen_x * eigen_y).sum(Eigen::DSizes<int, 1>(1));
}
}

} // namespace pten

using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;

PT_REGISTER_CTX_KERNEL(dot,
GPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
20 changes: 0 additions & 20 deletions paddle/pten/kernels/gpu/linalg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,13 @@
#include "paddle/pten/kernels/gpu/linalg.h"

#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/dot.h"
#include "paddle/pten/kernels/hybird/math/matmul_func.h"

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"

namespace pten {

template <typename T>
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
eigen::Dot<GPUContext, T>(dev_ctx, x, y, out);
}

template <typename T>
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
Expand Down Expand Up @@ -58,17 +49,6 @@ using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;

PT_REGISTER_KERNEL(dot,
GPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}

PT_REGISTER_KERNEL(matmul,
GPU,
ALL_LAYOUT,
Expand Down
6 changes: 0 additions & 6 deletions paddle/pten/kernels/gpu/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@

namespace pten {

template <typename T>
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);

template <typename T>
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
Expand Down
50 changes: 0 additions & 50 deletions paddle/pten/kernels/hybird/eigen/dot.h

This file was deleted.

0 comments on commit 0a4ffbc

Please sign in to comment.