Beispiel #1
0
TEST(Argument, poolSequenceWithStride) {
  Argument input, output;
  ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false);
  int* inStart = input.sequenceStartPositions->getMutableData(false);
  inStart[0] = 0;
  inStart[1] = 9;
  inStart[2] = 14;
  inStart[3] = 17;
  inStart[4] = 30;

  int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
  int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};

  for (auto reversed : {false, true}) {
    IVectorPtr stridePositions;
    output.poolSequenceWithStride(
        input, 5 /* stride */, &stridePositions, reversed);

    const int* outStart = output.sequenceStartPositions->getData(false);
    CHECK_EQ(outStart[0], 0);
    CHECK_EQ(outStart[1], 2);
    CHECK_EQ(outStart[2], 3);
    CHECK_EQ(outStart[3], 4);
    CHECK_EQ(outStart[4], 7);

    CHECK_EQ(stridePositions->getSize(), 8);
    auto result = reversed ? strideResultReversed : strideResult;
    for (int i = 0; i < 8; i++) {
      CHECK_EQ(stridePositions->getData()[i], result[i]);
    }
  }
}
Beispiel #2
0
void SparsePrefetchRowCpuMatrix::addRows(IVectorPtr ids) {
  std::vector<unsigned int>& localIndices = indexDictHandle_->localIndices;
  size_t numSamples = ids->getSize();
  int* index = ids->getData();
  for (size_t i = 0; i < numSamples; ++i) {
    if (index[i] == -1) continue;

    unsigned int id = (unsigned int)index[i];
    CHECK_LT(id, this->getHeight())
        << "id:" << id << "Height:" << this->getHeight()
        << "sparse id value exceeds the max input dimension, "
        << "it could be caused invalid input data samples";
    localIndices.push_back(id);
  }
}
Beispiel #3
0
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions,
                              IVectorPtr& cpuSequenceDims) {
  /* generate sequences with 2 dims */
  int numSeqs = sequenceStartPositions->getSize() - 1;
  int numDims = 2;

  cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
  int* bufStarts = sequenceStartPositions->getData();
  int* bufDims = cpuSequenceDims->getData();

  for (int i = 0; i < numSeqs; i++) {
    int len = bufStarts[i + 1] - bufStarts[i];
    /* get width and height randomly */
    std::vector<int> dimVec;
    for (int j = 0; j < len; j++) {
      if (len % (j + 1) == 0) {
        dimVec.push_back(1);
      }
    }
    int idx = rand() % dimVec.size();  // NOLINT use rand_r
    bufDims[i * numDims] = dimVec[idx];
    bufDims[i * numDims + 1] = len / dimVec[idx];
  }
}
Beispiel #4
0
  void prefetch() override {
    prepareSamples();
    IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_);
    int* ids = labelIds_->getData();
    for (size_t i = 0; i < samples_.size(); ++i) {
      ids[i] = samples_[i].labelId;
    }

    for (int i = 0; i < numInputs_; ++i) {
      auto sparseParam =
          dynamic_cast<SparsePrefetchRowCpuMatrix*>(weights_[i]->getW().get());
      if (sparseParam) {
        sparseParam->addRows(labelIds_);
      }
    }
  }
Beispiel #5
0
  void prepareSamples() {
    CHECK(!useGpu_) << "GPU is not supported";

    int batchSize = getInput(*labelLayer_).getBatchSize();
    IVectorPtr label = getInput(*labelLayer_).ids;

    CpuSparseMatrixPtr multiLabel = std::dynamic_pointer_cast<CpuSparseMatrix>(
        getInput(*labelLayer_).value);

    CHECK(label || multiLabel)
        << "The label layer must have ids or NonValueSparseMatrix value";

    auto& randEngine = ThreadLocalRandomEngine::get();

    samples_.clear();
    samples_.reserve(batchSize * (1 + config_.num_neg_samples()));

    real* weight =
        weightLayer_ ? getInputValue(*weightLayer_)->getData() : nullptr;

    for (int i = 0; i < batchSize; ++i) {
      real w = weight ? weight[i] : 1;
      if (label) {
        int* ids = label->getData();
        samples_.push_back({i, ids[i], true, w});
      } else {
        const int* cols = multiLabel->getRowCols(i);
        int n = multiLabel->getColNum(i);
        for (int j = 0; j < n; ++j) {
          samples_.push_back({i, cols[j], true, w});
        }
      }
      for (int j = 0; j < config_.num_neg_samples(); ++j) {
        int id = sampler_ ? sampler_->gen(randEngine) : rand_(randEngine);
        samples_.push_back({i, id, false, w});
      }
    }
    prepared_ = true;
  }
void prepareData(DataBatch* batch, const int* numPerSlotType, bool iid,
                 bool useGpu) {
  batch->clear();
  int64_t size = uniformRandom(100) + 10;
  batch->setSize(size);

  ICpuGpuVectorPtr sequenceStartPositions;
  ICpuGpuVectorPtr subSequenceStartPositions;
  if (!iid) {
    int numSeqs = uniformRandom(10) + 1;
    sequenceStartPositions =
        ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
    int* buf = sequenceStartPositions->getMutableData(false);
    subSequenceStartPositions =
        ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
    int* subBuf = subSequenceStartPositions->getMutableData(false);
    int64_t pos = 0;
    int maxLen = 2 * size / numSeqs;
    for (int i = 0; i < numSeqs; ++i) {
      int len =
          uniformRandom(min<int64_t>(maxLen, size - pos - numSeqs + i)) + 1;
      buf[i] = pos;
      subBuf[i] = pos;
      pos += len;
      VLOG(1) << " len=" << len;
    }
    buf[numSeqs] = size;
    subBuf[numSeqs] = size;
  }

  vector<Argument>& arguments = batch->getStreams();
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_DENSE]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    MatrixPtr mat = Matrix::create(size, dim, /* trans= */ false, false);
    mat->randomizeUniform();
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE]; ++i) {
    MatrixPtr mat =
        makeRandomSparseMatrix(size, kSpraseMatrixDim, false, useGpu);
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arg.subSequenceStartPositions = subSequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE]; ++i) {
    MatrixPtr mat =
        makeRandomSparseMatrix(size, kSpraseMatrixDim, true, useGpu);
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::STRING]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    SVectorPtr vec = std::make_shared<std::vector<std::string>>();
    for (int j = 0; j < size; ++j) {
      vec->push_back(randStr(dim));
    }
    Argument arg;
    arg.strs = vec;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::INDEX]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    IVectorPtr vec = IVector::create(size, /* useGpu= */ false);
    int* buf = vec->getData();
    for (int j = 0; j < size; ++j) {
      buf[j] = uniformRandom(dim);
    }
    Argument arg;
    arg.ids = vec;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
}