TEST(Matrix, SparseMatrixTranspose) { for (auto height : {10, 50, 100}) { for (auto width : {10, 50, 100}) { auto nnz = height * width; for (auto valueType : {FLOAT_VALUE, NO_VALUE}) { for (auto format : {SPARSE_CSR, SPARSE_CSC}) { for (auto sparseRate : {0.1, 0.2, 0.5}) { MatrixPtr matA = Matrix::createSparseMatrix( height, width, size_t(nnz * sparseRate), valueType, format); MatrixPtr matB(new CpuSparseMatrix( width, height, size_t(nnz * sparseRate), valueType, format)); matA->randomizeUniform(); matA->transpose(matB, false); /*dense matrix transpose*/ CpuMatrixPtr matC(new CpuMatrix(height, width)); matC->copyFrom(*matA); MatrixPtr matD(new CpuMatrix(width, height)); matC->transpose(matD, false); /*check result*/ checkSMatrixEqual2Dense( std::dynamic_pointer_cast<CpuSparseMatrix>(matB), std::dynamic_pointer_cast<CpuMatrix>(matD)); } } } } } }
void Reflection::generateReflectionIds() { if (millerCount() == 0) { std::cout << "Warning! Miller count is 0" << std::endl; } int h = miller(0)->getH(); int k = miller(0)->getK(); int l = miller(0)->getL(); cctbx::miller::index<> cctbxMiller = cctbx::miller::index<>(h, k, l); for (int i = 0; i < ambiguityCount(); i++) { MatrixPtr ambiguityMat = matrixForAmbiguity(i); cctbx::miller::index<> cctbxTwinnedMiller = ambiguityMat->multiplyIndex(&cctbxMiller); asym_index asymmetricMiller = asym_index(spaceGroup, asymmetricUnit, cctbxTwinnedMiller); // sym_equiv_indices equivMaker = sym_equiv_indices(spaceGroup, cctbxTwinnedMiller); // cctbx::miller::index<> asymmetricMiller = equivMaker(0).h(); int newId = reflectionIdForMiller(asymmetricMiller.h()); // int newId = reflectionIdForMiller(cctbxMiller); reflectionIds.push_back(newId); } }
TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) { const size_t HEIGHT = 20; const size_t WIDTH = 10; const size_t WIDTH_TEST = 15; MatrixPtr testMatrix( new CpuSparseMatrix(HEIGHT, WIDTH, HEIGHT * 2, FLOAT_VALUE, SPARSE_CSR)); MatrixPtr testCpuMatrix(new CpuMatrix(HEIGHT, WIDTH)); testCpuMatrix->randomizeUniform(); testMatrix->copyFrom(*testCpuMatrix, HPPL_STREAM_DEFAULT); MatrixPtr testGpuMatrix = testMatrix->clone(HEIGHT, WIDTH, true); hl_stream_t gpuStream(HPPL_STREAM_3); testGpuMatrix->copyFrom(*testMatrix, gpuStream); hl_stream_synchronize(gpuStream); MatrixPtr mulCpuMatrix(new CpuMatrix(WIDTH, WIDTH_TEST)); mulCpuMatrix->randomizeUniform(); MatrixPtr mulGpuMatrix(new GpuMatrix(WIDTH, WIDTH_TEST)); mulGpuMatrix->copyFrom(*mulCpuMatrix); MatrixPtr ret1(new CpuMatrix(HEIGHT, WIDTH_TEST)); MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST)); ret1->zeroMem(); ret2->zeroMem(); ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0); checkMatrixEqual(ret1, ret2); }
/** @brief Update by mean vectors and covariance matrices */ TEST_F(TestBCM, CovTest) { // pointer should be NULL initially EXPECT_FALSE(m_pSumOfWeightedMeans); EXPECT_FALSE(m_pSumOfInvCovs); // prediction 1 update(pMean1, pCov1); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvCovs1)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByCov1)); // prediction 2 update(pMean2, pCov2); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvCovs2)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByCov2)); // prediction 3 update(pMean3, pCov3); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvCovs3)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByCov3)); // final VectorPtr pMean; MatrixPtr pCov; get(pMean, pCov); EXPECT_TRUE(pCov->isApprox(pCovFinal->diagonal())); // get a variance!!! EXPECT_TRUE(pMean->isApprox(*pMeanByCovFinal)); }
void PowerLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); CHECK_EQ(getSize(), dataDim); CHECK_EQ(1U, inV0->getWidth()); CHECK_EQ(batchSize, inV0->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, dataDim); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwPowerTimer", getName().c_str()); outV->rowPow(0, *inV1, *inV0); } }
void InterpolationLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr weightV = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inV2 = getInputValue(2); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); CHECK_EQ(dataDim, getSize()); CHECK_EQ(dataDim, inV2->getWidth()); CHECK_EQ(batchSize, inV1->getHeight()); CHECK_EQ(batchSize, inV2->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); resetOutput(batchSize, dataDim); } MatrixPtr outV = getOutputValue(); Matrix::resizeOrCreate(weightLast_, batchSize, 1, false, useGpu_); weightLast_->one(); weightLast_->sub(*weightV); REGISTER_TIMER_INFO("FwInterpTimer", getName().c_str()); // outV = inV1 * weight + inV2 * weightLast outV->addRowScale(0, *inV1, *weightV); outV->addRowScale(0, *inV2, *weightLast_); }
void ScaleSubRegionLayer::forward(PassType passType) { Layer::forward(passType); auto in0 = getInput(0); imgH_ = in0.getFrameHeight(); imgW_ = in0.getFrameWidth(); if (imgH_ == 0 || imgW_ == 0) { auto& conf = config_.inputs(0).scale_sub_region_conf(); imgH_ = conf.image_conf().img_size_y(); imgW_ = conf.image_conf().img_size(); } MatrixPtr imgV = in0.value; size_t batchSize = imgV->getHeight(); size_t spatialSize = imgH_ * imgW_; channelsNum_ = imgV->getWidth() / spatialSize; shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_}); resetOutput(batchSize, imgV->getWidth()); auto& out = getOutput(); out.setFrameHeight(imgH_); out.setFrameWidth(imgW_); MatrixPtr indicesV = getInputValue(1); indicesShape_ = TensorShape({batchSize, 6}); REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str()); BufferArgs inArgs; BufferArgs outArgs; inArgs.addArg(*imgV, shape_); inArgs.addArg(*indicesV, indicesShape_); outArgs.addArg(*out.value, shape_, ASSIGN_TO); forward_[0]->calc(inArgs, outArgs); }
void MultiplexLayer::forward(PassType passType) { Layer::forward(passType); IVectorPtr copyIds = getInput(0).ids; MatrixPtr inV1 = getInputValue(1); CHECK_EQ(copyIds->getSize(), inV1->getHeight()); for (size_t i = 2; i < inputLayers_.size(); i++) { CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); } calculateCopySchedule(copyIds, inputLayers_.size() - 1); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(inV1->getHeight(), inV1->getWidth()); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { outV->subMatrix(info.startIdx, info.length, tmpDest_) ->copyFrom(*getInputValue(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpSrc_)); } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void CosSimVecMatLayer::backward(const UpdateCallback& callback) { CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr outV = getOutputValue(); MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpRow1->setData(inG0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpMtx1->setData(inG1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); tmpRow3->setData(outG->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpRow3); inputs.addArg(*tmpRow2); inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpMtx1, ADD_TO); outputs.addArg(*tmpRow1, ADD_TO); backward_[0]->calc(inputs, outputs); } }
ImagePtr Container::Join() const { const MatrixPtr joined_pixel_data = std::make_shared<Matrix>(); const unsigned height = std::max( training_image_->Height(), working_image_->Height()); const unsigned width = std::max( training_image_->Width(), working_image_->Width()); for (unsigned y = 0; y < height; ++y) { const PixelsPtr joined_row = std::make_shared<Pixels>(); for (unsigned x = 0; x < width; ++x) { if (x < working_image_->Width() && y < working_image_->Height()) { joined_row->push_back(working_image_->PixelData(x, y)); } if (x < training_image_->Width() && y < training_image_->Height()) { joined_row->push_back(training_image_->PixelData(x, y)); } } joined_pixel_data->push_back(joined_row); } return std::make_shared<Image>(joined_pixel_data); }
void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); size_t batchSize = inV0->getHeight(); size_t numKeys = getSize(); CHECK_EQ(batchSize, inV1->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, numKeys); } MatrixPtr outV = getOutputValue(); CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpRow2, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } }
/** Generates twinned/untwinned reflection IDs for easy searching. */ void Reflection::generateReflectionIds() { if (millerCount() == 0) { std::cout << "Warning! Miller count is 0" << std::endl; } int h = miller(0)->getH(); int k = miller(0)->getK(); int l = miller(0)->getL(); vec hkl = new_vector(h, k, l); for (int i = 0; i < ambiguityCount(); i++) { MatrixPtr ambiguityMat = matrixForAmbiguity(i); int asuH, asuK, asuL; ccp4spg_put_in_asu(spaceGroup, hkl.h, hkl.k, hkl.l, &asuH, &asuK, &asuL); vec hklAsu = new_vector(asuH, asuK, asuL); ambiguityMat->multiplyVector(&hklAsu); int newId = reflectionIdForMiller(hklAsu); reflectionIds.push_back(newId); } }
void PowerLayer::backward(const UpdateCallback& callback) { MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr outV = getOutputValue(); MatrixPtr outG = getOutputGrad(); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); { REGISTER_TIMER_INFO("BwPowerTimer", getName().c_str()); Matrix::resizeOrCreate(tmpMtx, batchSize, dataDim, false, useGpu_); if (inG0) { tmpMtx->log2(*inV1); tmpMtx->dotMul(*tmpMtx, *outV); // inG0 += outG .* (log(inV1) * outV) inG0->rowDotMul(0, *outG, *tmpMtx); } if (inG1) { // tmp = (outV / inV1) * inV0 tmpMtx->dotDiv(*outV, *inV1); tmpMtx->rowScale(0, *tmpMtx, *inV0); inG1->addDotMul(*outG, *tmpMtx, 1, 1); } } }
/** @brief Update by mean vectors and variance vectors */ TEST_F(TestBCM, VarTest) { // pointer should be NULL initially EXPECT_FALSE(m_pSumOfWeightedMeans); EXPECT_FALSE(m_pSumOfInvCovs); // prediction 1 update(pMean1, pVar1); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvVar1)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByVar1)); // prediction 2 update(pMean2, pVar2); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvVar2)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByVar2)); // prediction 3 update(pMean3, pVar3); EXPECT_TRUE(m_pSumOfInvCovs->isApprox(*pSumOfInvVar3)); EXPECT_TRUE(m_pSumOfWeightedMeans->isApprox(*pSumOfWeightedMeansByVar3)); // final VectorPtr pMean; MatrixPtr pCov; get(pMean, pCov); EXPECT_TRUE(pCov->isApprox(*pVarFinal)); EXPECT_TRUE(pMean->isApprox(*pMeanByVarFinal)); }
void MKLPackedRecurrentLayer::forwardBatch(int batchSize, size_t numSequences, const int* starts) { if (!batchValue_) { batchValue_.reset(new SequenceToBatch(useGpu_)); } batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); batchValue_->copyFromSeq(*output_.value); { REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); /* forward one batch */ for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { MatrixPtr batchValue = batchValue_->getBatchValue(n); if (n != 0) { MatrixPtr preBatchValue = batchValue_->getBatchValue(n - 1, batchValue->getHeight()); packed_weight_->gemm_compute(preBatchValue, batchValue); } Argument arg; arg.value = batchValue; activation_->forward(arg).check(); } } batchValue_->copyBackSeq(*output_.value); }
double Miller::expectedRadius(double spotSize, double mosaicity, vec *hkl) { vec usedHKL; if (hkl == NULL) { MatrixPtr newMatrix = MatrixPtr(); rotateMatrixHKL(latestHRot, latestKRot, 0, matrix, &newMatrix); usedHKL = new_vector(h, k, l); hkl = &usedHKL; newMatrix->multiplyVector(hkl); } spotSize = fabs(spotSize); double radMos = fabs(mosaicity) * M_PI / 180; double distanceFromOrigin = length_of_vector(*hkl); double spotSizeIncrease = fabs(radMos * distanceFromOrigin); double radius = (spotSize + spotSizeIncrease); return radius; }
void FullGateWriterImpl::write(GatePtr pGate, std::ostream& outputStream) { std::string delimeter = "*"; outputStream << "Gate:"; unsigned int nbSeq = pGate->getLabelSeq().size(); for(unsigned int i = 0; i < nbSeq; i++) { outputStream << pGate->getLabelSeq()[i]; if(i < nbSeq - 1) { outputStream << delimeter; } } outputStream << std::endl; outputStream << "--Gate cost:" << pGate->getCost(); outputStream << std::endl; outputStream << "--Gate matrix:" << std::endl; MatrixPtr pMatrix = pGate->getMatrix(); int nbRows, nbColumns; pMatrix->getSize(nbRows, nbColumns); for(int i = 0; i < nbRows; i++) { for(int j = 0; j < nbColumns; j++) { char printfBuffer[PRINT_BUFFER_LENGTH]; ComplexVal val = pMatrix->getValue(i,j); printVal(printfBuffer, val); outputStream << printfBuffer; } outputStream << std::endl; } }
void ImageTab::calculateAutoThreshold() { MatrixPtr matrix = _matrix->selectedMatrix(); if (matrix) { matrix->readLock(); _lowerThreshold->setText(QString::number(matrix->minValue())); _upperThreshold->setText(QString::number(matrix->maxValue())); matrix->unlock(); } }
void SlopeInterceptLayer::backward(const UpdateCallback& callback) { MatrixPtr inG = getInputGrad(0); MatrixPtr outG = getOutputGrad(); if (inG) { REGISTER_TIMER_INFO("BwSlopeInterceptTimer", getName().c_str()); inG->add(*outG, config_.slope()); } }
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) { EXPECT_EQ(a->getWidth(), b->getWidth()); EXPECT_EQ(a->getHeight(), b->getHeight()); EXPECT_EQ(a->isTransposed(), b->isTransposed()); for (size_t r = 0; r < a->getHeight(); ++r) { for (size_t c = 0; c < a->getWidth(); ++c) { EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c)); } } }
void testSigmoid(real illegal) { MatrixPtr A = std::make_shared<Matrix>(10, 10); MatrixPtr B = std::make_shared<Matrix>(10, 10); A->randomizeUniform(); B->randomizeUniform(); SetTensorValue(*A, illegal); A->sigmoid(*B); }
const real* getData(const Matrix& matrix) { if (matrix.useGpu()) { MatrixPtr cpuMatrix = Matrix::create( matrix.getHeight(), matrix.getWidth(), matrix.isTransposed(), false); cpuMatrix->copyFrom(matrix); return cpuMatrix->getData(); } else { return matrix.getData(); } }
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { const int nx = 100; const int ny = 50; const int numSamples = 50; MatrixPtr cMat = Matrix::create(numSamples, nx * ny, false, false); MatrixPtr gMat = Matrix::create(numSamples, nx * ny, false, true); MatrixPtr cBatchTransMat = Matrix::create(numSamples, nx * ny, false, false); MatrixPtr gBatchTransMat = Matrix::create(numSamples, nx * ny, false, true); MatrixPtr cMat_d2h = Matrix::create(numSamples, nx * ny, false, false); real* cData = cMat->getData(); real* gold = cBatchTransMat->getData(); // host for (int sample_id = 0; sample_id < numSamples; ++sample_id) for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) cData[sample_id * nx * ny + j * nx + i] = j * nx + i; // correct result for error checking for (int sample_id = 0; sample_id < numSamples; ++sample_id) for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) gold[sample_id * nx * ny + i * ny + j] = cData[sample_id * nx * ny + j * nx + i]; // device gMat->copyFrom(*cMat, HPPL_STREAM_DEFAULT); batchTranspose( gMat->getData(), gBatchTransMat->getData(), nx, ny, numSamples); cMat_d2h->copyFrom(*gBatchTransMat, HPPL_STREAM_DEFAULT); checkMatrixEqual(cBatchTransMat, cMat_d2h); }
void ExpandLayer::backward(const UpdateCallback& callback) { if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } if (!getInputGrad(0)) return; MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions : getInput(1).sequenceStartPositions; size_t numSequences = cpuSeqStartPos->getSize() - 1; const int* starts = cpuSeqStartPos->getData(false); CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth()); CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]); AsyncGpuBlock asyncGpuBlock; // sum to get the grad real scale = 1; for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) { // TODO(Dangqingqing) optimization for GPU int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; if (sequenceLength == 0) { // empty sequence continue; } MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1); copyData->collectBias( *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale); } }
void MaxOutLayer::backward(const UpdateCallback& callback) { (void)callback; /* Do derivation */ MatrixPtr inputG = getInputGrad(0); MatrixPtr outG = getOutputGrad(); if (inputG) { inputG->maxoutBackward(*outG, *maxoutId_, outputChannels_, groups_); } }
void ResizeLayer::forward(PassType passType) { Layer::forward(passType); const Argument& input = getInput(0); size_t height = input.value->getHeight(); size_t width = input.value->getWidth(); CHECK_EQ((height * width) % getSize(), 0UL); reserveOutput(height * width / getSize(), getSize()); MatrixPtr tmp = Matrix::create(output_.value->getData(), height, width, false, useGpu_); tmp->assign(*input.value); }
void ExpandLayer::forward(PassType passType) { Layer::forward(passType); // Expand layer should have exactly 2 input, one for data, one for size CHECK_EQ(2U, inputLayers_.size()); // using two input: // * first one for data; // * second one only for sequence info const Argument& shapeInput = getInput(1); const Argument& dataInput = getInput(0); size_t outputBatchSize = shapeInput.getBatchSize(); auto startPositions = type_ ? shapeInput.subSequenceStartPositions : shapeInput.sequenceStartPositions; size_t numSequences = startPositions->getSize() - 1; const int* starts = startPositions->getData(false); CHECK_EQ(starts[numSequences], shapeInput.getBatchSize()); if (type_) { // when trans_type = seq, input[1] must hasSubseq CHECK_EQ(shapeInput.hasSubseq(), 1UL); CHECK_EQ(dataInput.getNumSequences(), shapeInput.getNumSequences()); } else { CHECK_EQ(dataInput.getBatchSize(), shapeInput.getNumSequences()); } // set output sequence info as shape sequence output_.sequenceStartPositions = shapeInput.sequenceStartPositions; if (shapeInput.hasSubseq()) { output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions; } // reserve output: Expand output to batchsize of sequence data. reserveOutput(outputBatchSize, dataInput.value->getWidth()); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false); int* expandStarts = expandStartsPos_->getMutableData(false); for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; for (int j = 0; j < sequenceLength; j++) { expandStarts[starts[sequenceId] + j] = sequenceId; } } outputValue->copyByRowIndex(*inputValue, *expandStartsPos_->getVector(useGpu_)); if (biases_.get() != NULL) { outputValue->addBias(*(biases_->getW()), 1); } }
void DeConv3DLayer::addBias() { MatrixPtr outMat = getOutputValue(); MatrixPtr bias = Matrix::create(biases_->getW()->getData(), 1, biases_->getW()->getElementCnt(), false, useGpu_); if (this->sharedBiases_) { outMat->addSharedBias(*(bias), 1.0f); } else { outMat->addBias(*(bias), 1.0f); } }
void GatedRecurrentLayer::forwardBatch(int batchSize, size_t numSequences, const int* starts, MatrixPtr inputValue) { REGISTER_TIMER_INFO("GruFwBatchTime", getName().c_str()); hl_gru_value gruValue; gruValue.gateWeight = (gateWeight_->getW())->getData(); gruValue.stateWeight = (stateWeight_->getW())->getData(); if (!batchValue_) { batchValue_.reset(new SequenceToBatch(useGpu_)); } batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); batchValue_->resizeOrCreate(*output_.value); batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */true); if (bias_ && bias_->getWGrad()) { gate_.value->addBias(*(bias_->getW()), 1); } { int numBatch = batchValue_->getNumBatch(); int batchSize = 0; AsyncGpuBlock asyncGpuBlock; for (int n = 0; n < numBatch; n++) { MatrixPtr outputValueTmp = batchValue_->getBatchValue(n); gruValue.outputValue = outputValueTmp->getData(); gruValue.gateValue = (batchValue_->getBatchValue(*gate_.value, n))->getData(); gruValue.resetOutputValue = (batchValue_->getBatchValue(*resetOutput_.value, n))->getData(); batchSize = outputValueTmp->getHeight(); gruValue.prevOutValue = (n == 0 ? nullptr : (batchValue_->getBatchValue(n - 1, batchSize))->getData()); { if (useGpu_) { GruCompute::forward<1>(gruValue, getSize(), batchSize); } else { GruCompute::forward<0>(gruValue, getSize(), batchSize); } } } } { batchValue_->copyBackSeq(*output_.value); } }
void MaxOutLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ /* note: one sample correspond to one column */ size_t batchSize = getInput(0).getBatchSize(); size_t size = getSize(); resetOutput(batchSize, size); MatrixPtr inputV = getInputValue(0); MatrixPtr outV = getOutputValue(); IVector::resizeOrCreate(maxoutId_, size * batchSize, useGpu_); outV->maxoutForward(*inputV, *maxoutId_, outputChannels_, groups_); }