TEST(Argument, poolSequenceWithStride) { Argument input, output; ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); int* inStart = input.sequenceStartPositions->getMutableData(false); inStart[0] = 0; inStart[1] = 9; inStart[2] = 14; inStart[3] = 17; inStart[4] = 30; int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; for (auto reversed : {false, true}) { IVectorPtr stridePositions; output.poolSequenceWithStride( input, 5 /* stride */, &stridePositions, reversed); const int* outStart = output.sequenceStartPositions->getData(false); CHECK_EQ(outStart[0], 0); CHECK_EQ(outStart[1], 2); CHECK_EQ(outStart[2], 3); CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[4], 7); CHECK_EQ(stridePositions->getSize(), 8); auto result = reversed ? strideResultReversed : strideResult; for (int i = 0; i < 8; i++) { CHECK_EQ(stridePositions->getData()[i], result[i]); } } }
void SparsePrefetchRowCpuMatrix::addRows(IVectorPtr ids) { std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices; size_t numSamples = ids->getSize(); int* index = ids->getData(); for (size_t i = 0; i < numSamples; ++i) { if (index[i] == -1) continue; unsigned int id = (unsigned int)index[i]; CHECK_LT(id, this->getHeight()) << "id:" << id << "Height:" << this->getHeight() << "sparse id value exceeds the max input dimension, " << "it could be caused invalid input data samples"; localIndices.push_back(id); } }
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions, IVectorPtr& cpuSequenceDims) { /* generate sequences with 2 dims */ int numSeqs = sequenceStartPositions->getSize() - 1; int numDims = 2; cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false); int* bufStarts = sequenceStartPositions->getData(); int* bufDims = cpuSequenceDims->getData(); for (int i = 0; i < numSeqs; i++) { int len = bufStarts[i + 1] - bufStarts[i]; /* get width and height randomly */ std::vector<int> dimVec; for (int j = 0; j < len; j++) { if (len % (j + 1) == 0) { dimVec.push_back(1); } } int idx = rand() % dimVec.size(); // NOLINT use rand_r bufDims[i * numDims] = dimVec[idx]; bufDims[i * numDims + 1] = len / dimVec[idx]; } }
void prefetch() override { prepareSamples(); IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_); int* ids = labelIds_->getData(); for (size_t i = 0; i < samples_.size(); ++i) { ids[i] = samples_[i].labelId; } for (int i = 0; i < numInputs_; ++i) { auto sparseParam = dynamic_cast<SparsePrefetchRowCpuMatrix*>(weights_[i]->getW().get()); if (sparseParam) { sparseParam->addRows(labelIds_); } } }
void prepareSamples() { CHECK(!useGpu_) << "GPU is not supported"; int batchSize = getInput(*labelLayer_).getBatchSize(); IVectorPtr label = getInput(*labelLayer_).ids; CpuSparseMatrixPtr multiLabel = std::dynamic_pointer_cast<CpuSparseMatrix>( getInput(*labelLayer_).value); CHECK(label || multiLabel) << "The label layer must have ids or NonValueSparseMatrix value"; auto& randEngine = ThreadLocalRandomEngine::get(); samples_.clear(); samples_.reserve(batchSize * (1 + config_.num_neg_samples())); real* weight = weightLayer_ ? getInputValue(*weightLayer_)->getData() : nullptr; for (int i = 0; i < batchSize; ++i) { real w = weight ? weight[i] : 1; if (label) { int* ids = label->getData(); samples_.push_back({i, ids[i], true, w}); } else { const int* cols = multiLabel->getRowCols(i); int n = multiLabel->getColNum(i); for (int j = 0; j < n; ++j) { samples_.push_back({i, cols[j], true, w}); } } for (int j = 0; j < config_.num_neg_samples(); ++j) { int id = sampler_ ? sampler_->gen(randEngine) : rand_(randEngine); samples_.push_back({i, id, false, w}); } } prepared_ = true; }
void prepareData(DataBatch* batch, const int* numPerSlotType, bool iid, bool useGpu) { batch->clear(); int64_t size = uniformRandom(100) + 10; batch->setSize(size); ICpuGpuVectorPtr sequenceStartPositions; ICpuGpuVectorPtr subSequenceStartPositions; if (!iid) { int numSeqs = uniformRandom(10) + 1; sequenceStartPositions = ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); int* buf = sequenceStartPositions->getMutableData(false); subSequenceStartPositions = ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); int* subBuf = subSequenceStartPositions->getMutableData(false); int64_t pos = 0; int maxLen = 2 * size / numSeqs; for (int i = 0; i < numSeqs; ++i) { int len = uniformRandom(min<int64_t>(maxLen, size - pos - numSeqs + i)) + 1; buf[i] = pos; subBuf[i] = pos; pos += len; VLOG(1) << " len=" << len; } buf[numSeqs] = size; subBuf[numSeqs] = size; } vector<Argument>& arguments = batch->getStreams(); for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_DENSE]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r MatrixPtr mat = Matrix::create(size, dim, /* trans= */ false, false); mat->randomizeUniform(); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE]; ++i) { MatrixPtr mat = makeRandomSparseMatrix(size, kSpraseMatrixDim, false, useGpu); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arg.subSequenceStartPositions = subSequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE]; ++i) { MatrixPtr mat = makeRandomSparseMatrix(size, kSpraseMatrixDim, true, useGpu); Argument arg; arg.value = mat; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::STRING]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r SVectorPtr vec = std::make_shared<std::vector<std::string>>(); for (int j = 0; j < size; ++j) { vec->push_back(randStr(dim)); } Argument arg; arg.strs = vec; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } for (int i = 0; i < numPerSlotType[SlotDef::INDEX]; ++i) { int64_t dim = rand() % 10 + 4; // NOLINT rand_r IVectorPtr vec = IVector::create(size, /* useGpu= */ false); int* buf = vec->getData(); for (int j = 0; j < size; ++j) { buf[j] = uniformRandom(dim); } Argument arg; arg.ids = vec; arg.sequenceStartPositions = sequenceStartPositions; arguments.push_back(arg); } }