void MultiplexLayer::forward(PassType passType) { Layer::forward(passType); IVectorPtr copyIds = getInput(0).ids; MatrixPtr inV1 = getInputValue(1); CHECK_EQ(copyIds->getSize(), inV1->getHeight()); for (size_t i = 2; i < inputLayers_.size(); i++) { CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); } calculateCopySchedule(copyIds, inputLayers_.size() - 1); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(inV1->getHeight(), inV1->getWidth()); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { outV->subMatrix(info.startIdx, info.length, tmpDest_) ->copyFrom(*getInputValue(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpSrc_)); } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
TEST(Argument, poolSequenceWithStride) { Argument input, output; ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); int* inStart = input.sequenceStartPositions->getMutableData(false); inStart[0] = 0; inStart[1] = 9; inStart[2] = 14; inStart[3] = 17; inStart[4] = 30; int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; for (auto reversed : {false, true}) { IVectorPtr stridePositions; output.poolSequenceWithStride( input, 5 /* stride */, &stridePositions, reversed); const int* outStart = output.sequenceStartPositions->getData(false); CHECK_EQ(outStart[0], 0); CHECK_EQ(outStart[1], 2); CHECK_EQ(outStart[2], 3); CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[4], 7); CHECK_EQ(stridePositions->getSize(), 8); auto result = reversed ? strideResultReversed : strideResult; for (int i = 0; i < 8; i++) { CHECK_EQ(stridePositions->getData()[i], result[i]); } } }
void zadatakprvi() { IVectorPtr a = Vector::parseSimple("2 3 -4"); IVectorPtr b = Vector::parseSimple("-1 4 -3"); IVectorPtr c = Vector::parseSimple("2 2 4"); IMatrixPtr d = Matrix::parseSimple("1 2 3 | 2 1 3 | 4 5 1"); IMatrixPtr e = Matrix::parseSimple("-1 2 -3 | 5 -2 7 | -4 -1 3"); IMatrixPtr f = Matrix::parseSimple("-24 18 5 | 20 -15 -4 | -5 4 1"); IMatrixPtr g = Matrix::parseSimple("1 2 3 | 0 1 4 | 5 6 0"); IVectorPtr v1 = a->nAdd(b); cout << "v1: " << endl << v1->toString() << endl; double s = v1->scalarProduct(b); cout << "s: " << endl << s << endl << endl; IVectorPtr v2 = v1->nVectorProduct(a); cout << "v2: " << endl << v2->toString() << endl; IVectorPtr v3 = v2->copy()->normalize(); cout << "v3: " << endl << v3->toString() << endl; IVectorPtr v4 = v2->copy()->nScalarMultiply(-1); cout << "v4: " << endl << v4->toString() << endl; IMatrixPtr m1 = d->nAdd(e); cout << "m1: " << endl << m1->toString() << endl; IMatrixPtr m2 = d->nMultiply(e->nTranspose(true)); cout << "m2: " << endl << m2->toString() << endl; IMatrixPtr m3 = f->nInvert()->nMultiply(g->nInvert()); cout << "m3: " << endl << m3->toString() << endl; }
void SparsePrefetchRowCpuMatrix::addRows(IVectorPtr ids) { std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices; size_t numSamples = ids->getSize(); int* index = ids->getData(); for (size_t i = 0; i < numSamples; ++i) { if (index[i] == -1) continue; unsigned int id = (unsigned int)index[i]; CHECK_LT(id, this->getHeight()) << "id:" << id << "Height:" << this->getHeight() << "sparse id value exceeds the max input dimension, " << "it could be caused invalid input data samples"; localIndices.push_back(id); } }
void prepareSamples() { CHECK(!useGpu_) << "GPU is not supported"; int batchSize = getInput(*labelLayer_).getBatchSize(); IVectorPtr label = getInput(*labelLayer_).ids; CpuSparseMatrixPtr multiLabel = std::dynamic_pointer_cast<CpuSparseMatrix>( getInput(*labelLayer_).value); CHECK(label || multiLabel) << "The label layer must have ids or NonValueSparseMatrix value"; auto& randEngine = ThreadLocalRandomEngine::get(); samples_.clear(); samples_.reserve(batchSize * (1 + config_.num_neg_samples())); real* weight = weightLayer_ ? getInputValue(*weightLayer_)->getData() : nullptr; for (int i = 0; i < batchSize; ++i) { real w = weight ? weight[i] : 1; if (label) { int* ids = label->getData(); samples_.push_back({i, ids[i], true, w}); } else { const int* cols = multiLabel->getRowCols(i); int n = multiLabel->getColNum(i); for (int j = 0; j < n; ++j) { samples_.push_back({i, cols[j], true, w}); } } for (int j = 0; j < config_.num_neg_samples(); ++j) { int id = sampler_ ? sampler_->gen(randEngine) : rand_(randEngine); samples_.push_back({i, id, false, w}); } } prepared_ = true; }
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions, IVectorPtr& cpuSequenceDims) { /* generate sequences with 2 dims */ int numSeqs = sequenceStartPositions->getSize() - 1; int numDims = 2; cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false); int* bufStarts = sequenceStartPositions->getData(); int* bufDims = cpuSequenceDims->getData(); for (int i = 0; i < numSeqs; i++) { int len = bufStarts[i + 1] - bufStarts[i]; /* get width and height randomly */ std::vector<int> dimVec; for (int j = 0; j < len; j++) { if (len % (j + 1) == 0) { dimVec.push_back(1); } } int idx = rand() % dimVec.size(); // NOLINT use rand_r bufDims[i * numDims] = dimVec[idx]; bufDims[i * numDims + 1] = len / dimVec[idx]; } }
void MultiplexLayer::calculateCopySchedule(const IVectorPtr& copyIds, size_t numIns) { copySchedule_.clear(); CopyInfo prevCopyInfo(0, 0, -1); for (size_t i = 0; i < copyIds->getSize(); i++) { int copyId = copyIds->getElement(i); CHECK_GE(copyId, 0); CHECK_LT(copyId, int(numIns)); // copy same input layer with prevous and will copy consecutive. if (copyId == prevCopyInfo.copyIdx) { ++prevCopyInfo.length; } else { if (prevCopyInfo.copyIdx != -1) { copySchedule_.emplace_back(prevCopyInfo); } prevCopyInfo.startIdx = i; prevCopyInfo.length = 1; prevCopyInfo.copyIdx = copyId; } } if (prevCopyInfo.copyIdx != -1) { copySchedule_.emplace_back(prevCopyInfo); } }
void prefetch() override { prepareSamples(); IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_); int* ids = labelIds_->getData(); for (size_t i = 0; i < samples_.size(); ++i) { ids[i] = samples_[i].labelId; } for (int i = 0; i < numInputs_; ++i) { auto sparseParam = dynamic_cast<SparsePrefetchRowCpuMatrix*>(weights_[i]->getW().get()); if (sparseParam) { sparseParam->addRows(labelIds_); } } }
void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b) { EXPECT_EQ(a->getSize(), b->getSize()); for (size_t r = 0; r < a->getSize(); ++r) { EXPECT_FLOAT_EQ(a->get(r), b->get(r)); } }
void prepareData(DataBatch* batch, const int* numPerSlotType, bool iid, bool useGpu) { batch->clear(); int64_t size = uniformRandom(100) + 10; batch->setSize(size); ICpuGpuVectorPtr sequenceStartPositions; ICpuGpuVectorPtr subSequenceStartPositions; if (!iid) { int numSeqs = uniformRandom(10) + 1; sequenceStartPositions = ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); int* buf = sequenceStartPositions->getMutableData(false); subSequenceStartPositions = ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); int* subBuf = subSequenceStartPositions->getMutableData(false); int64_t pos = 0; int maxLen = 2 * size / numSeqs; for (int i = 0; i < numSeqs; ++i) { int len = uniformRandom(min<int64_t>(maxLen, size - pos - numSeqs + i)) + 1; buf[i] = pos; subBuf[i] = pos; pos += len; VLOG(1) << " len=" << len; } buf[numSeqs] = size; subBuf[numSeqs] = size; } vector<Argument>& arguments = batch->getStreams(); for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_DENSE]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r MatrixPtr mat = Matrix::create(size, dim, /* trans= */ false, false); mat->randomizeUniform(); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE]; ++i) { MatrixPtr mat = makeRandomSparseMatrix(size, kSpraseMatrixDim, false, useGpu); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arg.subSequenceStartPositions = subSequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE]; ++i) { MatrixPtr mat = makeRandomSparseMatrix(size, kSpraseMatrixDim, true, useGpu); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::STRING]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r SVectorPtr vec = std::make_shared<std::vector<std::string>>(); for (int j = 0; j < size; ++j) { vec->push_back(randStr(dim)); } Argument arg; arg.strs = vec; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::INDEX]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r IVectorPtr vec = IVector::create(size, /* useGpu= */ false); int* buf = vec->getData(); for (int j = 0; j < size; ++j) { buf[j] = uniformRandom(dim); } Argument arg; arg.ids = vec; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } }