void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]); const auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data()); CHECK_EQ(out_seq.shape().ndims(), 2UL); CHECK_EQ(in_seq.shape().ndims(), 2UL); CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); /// output layer grad dim == input layer grad dim * context_length_ CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); /// input and output has the same batch_size CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); const auto out_grad_mat = in_seq.matrix<Device>(); const auto seq_vec = in_seq.getSequenceId().vector<int, Device>(); auto in_grad_mat = out_seq.matrix<Device>(); ContextProjectionBackwardData<Device>( out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_); }
void CosSimVecMatLayer::backward(const UpdateCallback& callback) { CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr outV = getOutputValue(); MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpRow1->setData(inG0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpMtx1->setData(inG1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); tmpRow3->setData(outG->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpRow3); inputs.addArg(*tmpRow2); inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpMtx1, ADD_TO); outputs.addArg(*tmpRow1, ADD_TO); backward_[0]->calc(inputs, outputs); } }
void BlockExpandLayer::forward(PassType passType) { Layer::forward(passType); size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); size_t blockNum = getBlockNum(); size_t blockSize = blockH_ * blockW_ * channels_; resetOutput(blockNum * batchSize, blockSize); // calculate output_.value inputShape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); outputShape_ = TensorShape({batchSize, blockNum, blockSize}); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*getInputValue(0), inputShape_); outputs.addArg(*getOutputValue(), outputShape_, ASSIGN_TO); forward_[0]->calc(inputs, outputs); // calculate output_.sequenceStartPositions and output_.cpuSequenceDims Argument& out = getOutput(); ICpuGpuVector::resizeOrCreate( out.sequenceStartPositions, batchSize + 1, false); IVector::resizeOrCreate(out.cpuSequenceDims, 2 * batchSize, false); int* start = out.sequenceStartPositions->getMutableData(false); int* dims = out.cpuSequenceDims->getData(); for (size_t i = 0; i < batchSize; i++) { start[i] = i * blockNum; dims[2 * i] = outputH_; dims[2 * i + 1] = outputW_; } start[batchSize] = batchSize * blockNum; }
void testBufferArgs(const BufferArgs& inputs, const std::vector<CheckBufferArg>& check) { EXPECT_EQ(inputs.size(), check.size()); for (size_t i = 0; i < inputs.size(); i++) { check[i](inputs[i]); } }
void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); size_t batchSize = inV0->getHeight(); size_t numKeys = getSize(); CHECK_EQ(batchSize, inV1->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, numKeys); } MatrixPtr outV = getOutputValue(); CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpRow2, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } }
void ScaleSubRegionLayer::forward(PassType passType) { Layer::forward(passType); auto in0 = getInput(0); imgH_ = in0.getFrameHeight(); imgW_ = in0.getFrameWidth(); if (imgH_ == 0 || imgW_ == 0) { auto& conf = config_.inputs(0).scale_sub_region_conf(); imgH_ = conf.image_conf().img_size_y(); imgW_ = conf.image_conf().img_size(); } MatrixPtr imgV = in0.value; size_t batchSize = imgV->getHeight(); size_t spatialSize = imgH_ * imgW_; channelsNum_ = imgV->getWidth() / spatialSize; shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_}); resetOutput(batchSize, imgV->getWidth()); auto& out = getOutput(); out.setFrameHeight(imgH_); out.setFrameWidth(imgW_); MatrixPtr indicesV = getInputValue(1); indicesShape_ = TensorShape({batchSize, 6}); REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str()); BufferArgs inArgs; BufferArgs outArgs; inArgs.addArg(*imgV, shape_); inArgs.addArg(*indicesV, indicesShape_); outArgs.addArg(*out.value, shape_, ASSIGN_TO); forward_[0]->calc(inArgs, outArgs); }
void CosSimLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); reserveOutput(batchSize, size); } MatrixPtr outV = getOutputValue(); /* activation */ { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); CHECK(outV && prevOut1 && prevOut2); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*prevOut1); inputs.addArg(*prevOut2); outputs.addArg(*outV, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data()); CHECK_EQ(outputs[0].shape().ndims(), 2UL); CHECK_EQ(in_seq.shape().ndims(), 2UL); CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); /// output layer grad dim == weight dim * context_length_ CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); const auto seq_vec = in_seq.getSequenceId().vector<int, Device>(); const auto out_grad_mat = in_seq.matrix<Device>(); auto w_grad_mat = outputs[0].matrix<Device>(); ContextProjectionBackwardWeight<Device>(out_grad_mat, w_grad_mat, seq_vec, context_length_, context_start_, total_pad_, begin_pad_); }
void check(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(numInputs_, inputs.size()); CHECK_EQ(numOutputs_, outputs.size()); CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[1].shape()); }
void ScaleSubRegionLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("ScaleSubRegionBackward", getName().c_str()); BufferArgs inArgs; BufferArgs outArgs; inArgs.addArg(*getOutputGrad(), shape_); inArgs.addArg(*getInputValue(1), indicesShape_); outArgs.addArg(*getInputGrad(0), shape_, ADD_TO); backward_[0]->calc(inArgs, outArgs); }
void BlockExpandLayer::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ if (getInputGrad(0)) { BufferArgs inputs; BufferArgs outputs; inputs.addArg(*getOutputGrad(), outputShape_); outputs.addArg(*getInputGrad(0), inputShape_, ADD_TO); backward_[0]->calc(inputs, outputs); } }
TEST(Arguments, BufferArg) { BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 3U); EXPECT_EQ(arg.shape()[0], 1U); EXPECT_EQ(arg.shape()[1], 2U); EXPECT_EQ(arg.shape()[2], 3U); }; BufferArgs argments; argments.addArg(arg); testBufferArgs(argments, check); }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1UL, inputs.size()); CHECK(1UL == outputs.size() || 2UL == outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]); auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data()); CHECK_EQ(in_seq.shape().ndims(), 2UL); CHECK_EQ(out_seq.shape().ndims(), 2UL); CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL); /// input and output grad has the same batch_size CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); /// dim of output grad = dim of input grad * context_length CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); CHECK_EQ(out_seq.getArgType(), ADD_TO); if (2UL == outputs.size()) { CHECK_EQ(outputs[1].shape().ndims(), 2UL); /// dim of input grad == dim of weight CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); CHECK_EQ(outputs[1].getArgType(), ADD_TO); } const auto seq_vec = in_seq.getSequenceId().vector<int, Device>(); const auto out_grad_mat = in_seq.matrix<Device>(); auto in_grad_mat = !out_seq.data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) : out_seq.matrix<Device>(); auto w_grad_mat = (2UL == outputs.size() && outputs[1].data()) ? outputs[1].matrix<Device>() : typename Tensor<real, Device>::Matrix(nullptr, 0, 0); ContextProjectionBackward<Device>(out_grad_mat, in_grad_mat, w_grad_mat, seq_vec, context_length_, context_start_, begin_pad_, is_padding_, total_pad_); }
TEST(Arguments, Vector) { VectorPtr vector = Vector::create(100, false); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 1U); EXPECT_EQ(arg.shape()[0], 100U); EXPECT_EQ(arg.data(), vector->getData()); CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>(); EXPECT_EQ(inVector.getSize(), vector->getSize()); EXPECT_EQ(inVector.getData(), vector->getData()); }; BufferArgs argments; argments.addArg(*vector); std::vector<CheckBufferArg> checkFunc; checkFunc.push_back(check); testBufferArgs(argments, checkFunc); }
TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 2U); EXPECT_EQ(arg.shape()[0], 100U); EXPECT_EQ(arg.shape()[1], 200U); EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData()); }; BufferArgs argments; argments.addArg(*matrix); std::vector<CheckBufferArg> checkFunc; checkFunc.push_back(check); testBufferArgs(argments, checkFunc); }
TEST(Arguments, CpuSparseMatrix) { CpuSparseMatrix sparse(200, 300, 50); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 2U); EXPECT_EQ(arg.shape()[0], 200U); EXPECT_EQ(arg.shape()[1], 300U); EXPECT_EQ(arg.data(), sparse.getData()); // CHECK_EQ(arg.sparse().nnz(), 50); // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); // CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE); EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows()); EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols()); }; BufferArgs argments; argments.addArg(sparse); std::vector<CheckBufferArg> checkFunc; checkFunc.push_back(check); testBufferArgs(argments, checkFunc); }
// Only need the shape of one input, can calculate the // floating-point operation. size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_LT((size_t)1, inputs.size()); size_t batchSize = inputs[0].shape()[0]; size_t maps = inputs[0].shape()[1]; size_t rows = inputs[0].shape()[2]; size_t columns = inputs[0].shape()[3]; // number of floating-point operations // an approximate value size_t ops = batchSize * maps * rows * columns * (size_ * 4 + 2); return ops; }
void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; const auto outG = this->getOutputGrad(); const auto outV = this->getOutputValue(); const auto inV1 = this->getInputValue(0); const auto inV2 = this->getInputValue(1); auto inG1 = this->getInputGrad(0); auto inG2 = this->getInputGrad(1); CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*outG); inputs.addArg(*outV); inputs.addArg(*inV1); inputs.addArg(*inV2); outputs.addArg(*inG1, ADD_TO); outputs.addArg(*inG2, ADD_TO); backward_[0]->calc(inputs, outputs); } }
TEST(Function, BufferArgs) { CpuMatrix cpuInput = CpuMatrix(100, 200); CpuMatrix cpuOutput = CpuMatrix(100, 200); BufferArgs cpuArgments; cpuArgments.addArg(cpuInput); cpuArgments.addArg(cpuOutput); Function<DEVICE_TYPE_CPU>(cpuArgments); GpuMatrix gpuInput = GpuMatrix(10, 20); GpuMatrix gpuOutput = GpuMatrix(10, 20); BufferArgs gpuArgments; gpuArgments.addArg(gpuInput); gpuArgments.addArg(gpuOutput); Function<DEVICE_TYPE_GPU>(gpuArgments); }
void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) { EXPECT_EQ(inputs.size(), 1U); check(inputs[0]); }