Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove some copyfrom in AgentLayer and ExpandLayer, fix warning in se… #183

Merged
merged 1 commit into from
Oct 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions demo/seqToseq/seqToseq_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf,
encoded_vector = concat_layer(input=[src_forward, src_backward])

with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(encoded_vector)
encoded_proj += full_matrix_projection(input=encoded_vector)

backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(backward_first)
decoder_boot += full_matrix_projection(input=backward_first)

def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder',
Expand All @@ -113,8 +113,8 @@ def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_state=decoder_mem, )

with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context)
decoder_inputs += full_matrix_projection(current_word)
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)

gru_step = gru_step_layer(name='gru_decoder',
input=decoder_inputs,
Expand Down
2 changes: 1 addition & 1 deletion paddle/cuda/include/hl_sequence.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
*/
extern void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch);
Expand Down
2 changes: 1 addition & 1 deletion paddle/cuda/include/stub/hl_sequence_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ inline void hl_context_projection_backward_weight(real* outputGrad,

inline void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {}
Expand Down
4 changes: 2 additions & 2 deletions paddle/cuda/src/hl_cuda_sequence.cu
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ template<int blockDimX, int blockDimY, int gridDimX, bool seq2batch, bool isAdd>
__global__
void KeSequence2Batch(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount) {
int idx = threadIdx.x;
Expand Down Expand Up @@ -405,7 +405,7 @@ void KeSequence2Batch(real *batch,

void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {
Expand Down
29 changes: 12 additions & 17 deletions paddle/gserver/layers/AgentLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


#include "AgentLayer.h"

#include "paddle/utils/Logging.h"
Expand Down Expand Up @@ -62,8 +61,8 @@ void SequenceAgentLayer::forward(PassType passType) {

// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows = realOutput.sequenceStartPositions->
getData(false)[numSamples_];
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput, /* offset */ 0, numRows, getSize(), useGpu_,
/* trans */ false, /* seqFlag */ true,
Expand Down Expand Up @@ -141,8 +140,8 @@ void ScatterAgentLayer::forward(PassType passType) {

int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_);
} else { // used in generation
if (realLayer_->getOutput().ids) {
IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_);
Expand Down Expand Up @@ -224,8 +223,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {

if (realOutArg_.value || realOutArg_.ids) {
CHECK(realOutArg_.sequenceStartPositions);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_, /* trans */ false, /* seqFlag */ true,
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_, /* trans */ false, /* seqFlag */ true,
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
Expand All @@ -249,11 +248,12 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
CHECK_NE(input.sequenceStartPositions.get(),
output_.sequenceStartPositions.get());
ICpuGpuVector::resizeOrCreate(output_.sequenceStartPositions,
numSequences + 1, false);
numSequences + 1, false);
int* outStarts = output_.sequenceStartPositions->getMutableData(false);

IVector::resizeOrCreate(cpuInputStartPos_, height, false);
int* inStarts = cpuInputStartPos_->getData();
ICpuGpuVector::resizeOrCreate(inputStartPos_, height, false);
int* inStarts = inputStartPos_->getMutableData(false);

size_t offsetOut = 0;
for (size_t i = 0; i < numSequences; ++i) {
outStarts[i] = offsetOut;
Expand All @@ -266,13 +266,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
outStarts[numSequences] = offsetOut;

if (useGpu_) {
IVector::resizeOrCreate(inputStartPos_, height, true);
inputStartPos_->copyFrom(*cpuInputStartPos_, HPPL_STREAM_DEFAULT);
} else {
inputStartPos_ = cpuInputStartPos_;
}
outputValue->copyByRowIndex(*input.value, *inputStartPos_);
outputValue->copyByRowIndex(*input.value,
*inputStartPos_->getVector(useGpu_));
}
}

Expand Down
6 changes: 1 addition & 5 deletions paddle/gserver/layers/AgentLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,7 @@ class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
IVectorPtr cpuInputStartPos_;

// point to cpuInputStartPos_ when useGpu_ is false
// copy from cpuInputStartPos_ when useGpu_ is true
IVectorPtr inputStartPos_;
ICpuGpuVectorPtr inputStartPos_;

public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
Expand Down
31 changes: 10 additions & 21 deletions paddle/gserver/layers/ExpandLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


#include "ExpandLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
Expand Down Expand Up @@ -53,9 +52,8 @@ void ExpandLayer::forward(PassType passType) {
const Argument& shapeInput = getInput(1);
const Argument& dataInput = getInput(0);
size_t outputBatchSize = shapeInput.getBatchSize();
auto startPositions =
type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
auto startPositions = type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
size_t numSequences = startPositions->getSize() - 1;
const int* starts = startPositions->getData(false);

Expand All @@ -71,8 +69,7 @@ void ExpandLayer::forward(PassType passType) {
// set output sequence info as shape sequence
output_.sequenceStartPositions = shapeInput.sequenceStartPositions;
if (shapeInput.hasSubseq()) {
output_.subSequenceStartPositions =
shapeInput.subSequenceStartPositions;
output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions;
}

// reserve output: Expand output to batchsize of sequence data.
Expand All @@ -81,24 +78,17 @@ void ExpandLayer::forward(PassType passType) {
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();

IVector::resizeOrCreate(cpuExpandStartsPos_, outputBatchSize, false);
int* expandStarts = cpuExpandStartsPos_->getData();
ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false);
int* expandStarts = expandStartsPos_->getMutableData(false);
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
for (int j = 0; j < sequenceLength; j++) {
expandStarts[starts[sequenceId] + j] = sequenceId;
}
}

if (useGpu_) {
// TODO(Dangqingqing) move copyFrom
IVector::resizeOrCreate(expandStartsPos_, outputBatchSize, true);
expandStartsPos_->copyFrom(*cpuExpandStartsPos_, HPPL_STREAM_DEFAULT);
} else {
expandStartsPos_ = cpuExpandStartsPos_;
}

outputValue->copyByRowIndex(*inputValue, *expandStartsPos_);
outputValue->copyByRowIndex(*inputValue,
*expandStartsPos_->getVector(useGpu_));

if (biases_.get() != NULL) {
outputValue->addBias(*(biases_->getW()), 1);
Expand All @@ -108,16 +98,15 @@ void ExpandLayer::forward(PassType passType) {
void ExpandLayer::backward(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
/* Increasing the number of gradient */
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}

if (!getInputGrad(0)) return;
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
auto cpuSeqStartPos =
type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
size_t numSequences = cpuSeqStartPos->getSize() - 1;
const int* starts = cpuSeqStartPos->getData(false);

Expand Down
7 changes: 1 addition & 6 deletions paddle/gserver/layers/ExpandLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,9 @@ class ExpandLayer : public Layer {
enum ExpandLevel { kNonSeq = 0, kSeq = 1 };
/// store the ExpandLevel
int type_;
// TODO(luotao) use ICpuGpuVectorPtr to merge cpuExpandStartsPos_
// and expandStartsPos_
/// expanded sequenceStartPositions or subSequenceStartPositions
/// of input[1]
IVectorPtr cpuExpandStartsPos_;
/// point to cpuExpandStartsPos_ when useGpu_ is false,
/// copy from cpuExpandStartsPos_ when useGpu_ is true
IVectorPtr expandStartsPos_;
ICpuGpuVectorPtr expandStartsPos_;

public:
explicit ExpandLayer(const LayerConfig& config) : Layer(config) {}
Expand Down
8 changes: 4 additions & 4 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,13 @@ void GpuMatrix::copyFrom(const IVector& src) {
copyFrom(matrix);
}

void GpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void GpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
real* dst = getData();
real* src = b.getData();
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
hl_sequence2batch_copy(dst, src, index, width, height, true);
}

Expand Down Expand Up @@ -1278,11 +1278,11 @@ void CpuMatrix::copyFrom(const IVector& src) {
}
}

void CpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void CpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
for (size_t i = 0; i < height; i++) {
CHECK_LT(static_cast<size_t>(index[i]), b.getHeight());
real* src = b.getData() + index[i] * width;
Expand Down
6 changes: 3 additions & 3 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ class Matrix : public BaseMatrix {
LOG(FATAL) << "copy data from int vector only available on CpuMatrix.";
}

virtual void copyByRowIndex(Matrix& b, IVector& rowIndex) {
virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -979,7 +979,7 @@ class GpuMatrix : public Matrix {

void copyFrom(const IVector& src);

void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);

MatrixPtr clone(size_t height, size_t width, bool useGpu = false);

Expand Down Expand Up @@ -1241,7 +1241,7 @@ class CpuMatrix : public Matrix {

void copyFrom(CpuSparseMatrix& src);

void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);

MatrixPtr clone(size_t height, size_t width, bool useGpu = false);

Expand Down