Пример #1
0
void generateSubSequenceStartPositions(
    const ICpuGpuVectorPtr& sequenceStartPositions,
    ICpuGpuVectorPtr& subSequenceStartPositions) {
  int numSeqs = sequenceStartPositions->getSize() - 1;
  const int* buf = sequenceStartPositions->getData(false);
  int numOnes = 0;
  for (int i = 0; i < numSeqs; ++i) {
    if (buf[i + 1] - buf[i] == 1) {
      ++numOnes;
    }
  }
  // each seq has two sub-seq except length 1
  int numSubSeqs = numSeqs * 2 - numOnes;
  subSequenceStartPositions =
      ICpuGpuVector::create(numSubSeqs + 1, /* useGpu= */ false);
  int* subBuf = subSequenceStartPositions->getMutableData(false);
  int j = 0;
  for (int i = 0; i < numSeqs; ++i) {
    if (buf[i + 1] - buf[i] == 1) {
      subBuf[j++] = buf[i];
    } else {
      int len = uniformRandom(buf[i + 1] - buf[i] - 1) + 1;
      subBuf[j++] = buf[i];
      subBuf[j++] = buf[i] + len;
    }
  }
  subBuf[j] = buf[numSeqs];
}
Пример #2
0
TEST(Argument, poolSequenceWithStride) {
  Argument input, output;
  ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false);
  int* inStart = input.sequenceStartPositions->getMutableData(false);
  inStart[0] = 0;
  inStart[1] = 9;
  inStart[2] = 14;
  inStart[3] = 17;
  inStart[4] = 30;

  int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30};
  int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};

  for (auto reversed : {false, true}) {
    ICpuGpuVectorPtr stridePositions;
    output.poolSequenceWithStride(
        input, 5 /* stride */, &stridePositions, reversed);

    const int* outStart = output.sequenceStartPositions->getData(false);
    CHECK_EQ(outStart[0], 0);
    CHECK_EQ(outStart[1], 2);
    CHECK_EQ(outStart[2], 3);
    CHECK_EQ(outStart[3], 4);
    CHECK_EQ(outStart[4], 7);

    CHECK_EQ(stridePositions->getSize(), 8UL);
    auto result = reversed ? strideResultReversed : strideResult;
    for (int i = 0; i < 8; i++) {
      CHECK_EQ(stridePositions->getData(false)[i], result[i]);
    }
  }
}
Пример #3
0
void generateSequenceStartPositions(size_t batchSize,
                                    ICpuGpuVectorPtr& sequenceStartPositions) {
  int numSeqs;
  if (FLAGS_fixed_seq_length != 0) {
    numSeqs = std::ceil((float)batchSize / (float)FLAGS_fixed_seq_length);
  } else {
    numSeqs = batchSize / 10 + 1;
  }
  sequenceStartPositions =
      ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
  int* buf = sequenceStartPositions->getMutableData(false);
  int64_t pos = 0;
  int len = FLAGS_fixed_seq_length;
  int maxLen = 2 * batchSize / numSeqs;
  for (int i = 0; i < numSeqs; ++i) {
    if (FLAGS_fixed_seq_length == 0) {
      len = uniformRandom(
                std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
            1;
    }
    buf[i] = pos;
    pos += len;
    VLOG(1) << " len=" << len;
  }
  buf[numSeqs] = batchSize;
}
Пример #4
0
void writeData(const DataBatch& batch, bool useGpu, bool dataCompression) {
  DataHeader header;
  const vector<Argument>& arguments = batch.getStreams();
  for (auto& argument : arguments) {
    SlotDef* slotDef = header.add_slot_defs();
    slotDef->set_type(getSlotType(argument));
    slotDef->set_dim(getSlotDim(argument));
  }
  VLOG(1) << "header=" << header.DebugString();

  int64_t totalSeqs = batch.getNumSequences();
  int64_t seq = 0;
  ICpuGpuVectorPtr sequenceStartPositions =
      arguments[0].sequenceStartPositions;
  int64_t numWritten = 0;
  vector<string> curProtoFiles =
      dataCompression ? protoFilesCompressed : protoFiles;
  for (size_t i = 0; i < curProtoFiles.size(); ++i) {
    int64_t numSeqs = totalSeqs * (i + 1) / curProtoFiles.size() -
                      totalSeqs * i / curProtoFiles.size();
    ofstream os(curProtoFiles[i]);
    CHECK(os) << "Fail to open " << curProtoFiles[i];
    unique_ptr<ProtoWriter> writer(new ProtoWriter(&os, dataCompression));
    CHECK(writer->write(header));
    for (int j = 0; j < numSeqs; ++j, ++seq) {
      int64_t begin = seq;
      int64_t end = seq + 1;
      if (sequenceStartPositions) {
        begin = sequenceStartPositions->getElement(seq);
        end = sequenceStartPositions->getElement(seq + 1);
      }
      for (int pos = begin; pos < end; ++pos) {
        DataSample sample;
        makeSample(arguments, pos, pos == begin, &sample, useGpu);
        CHECK(writer->write(sample));
        ++numWritten;
      }
    }

    writer.reset(nullptr);
    os.close();
  }
  CHECK_EQ(arguments[0].getBatchSize(), numWritten);
}
Пример #5
0
void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores,
                                        real* sortedIds,
                                        const ICpuGpuVectorPtr seqStartPos) {
  int* starts = seqStartPos->getMutableData(false);
  std::vector<real> indices;
  for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) {
    int seqLen = starts[i + 1] - starts[i];
    int k = std::min(static_cast<int>(beamSize_), seqLen);

    indices.resize(seqLen, 0);
    std::iota(begin(indices), end(indices), 0.);
    std::vector<real> tmpScore(scores + starts[i], scores + starts[i + 1]);
    std::partial_sort(
        begin(indices),
        begin(indices) + k,
        end(indices),
        [&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; });
    memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real));
  }
}
Пример #6
0
void generateMDimSequenceData(const ICpuGpuVectorPtr& sequenceStartPositions,
                              IVectorPtr& cpuSequenceDims) {
  /* generate sequences with 2 dims */
  int numSeqs = sequenceStartPositions->getSize() - 1;
  int numDims = 2;

  cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
  const int* bufStarts = sequenceStartPositions->getData(false);
  int* bufDims = cpuSequenceDims->getData();

  for (int i = 0; i < numSeqs; i++) {
    int len = bufStarts[i + 1] - bufStarts[i];
    /* get width and height randomly */
    std::vector<int> dimVec;
    for (int j = 0; j < len; j++) {
      if (len % (j + 1) == 0) {
        dimVec.push_back(1);
      }
    }
    int idx = rand() % dimVec.size();  // NOLINT use rand_r
    bufDims[i * numDims] = dimVec[idx];
    bufDims[i * numDims + 1] = len / dimVec[idx];
  }
}
Пример #7
0
void generateSequenceStartPositions(size_t batchSize,
                                    IVectorPtr& sequenceStartPositions) {
  ICpuGpuVectorPtr gpuCpuVec;
  generateSequenceStartPositions(batchSize, gpuCpuVec);
  sequenceStartPositions = gpuCpuVec->getMutableVector(false);
}
Пример #8
0
void prepareData(DataBatch* batch, const int* numPerSlotType, bool iid,
                 bool useGpu) {
  batch->clear();
  int64_t size = uniformRandom(100) + 10;
  batch->setSize(size);

  ICpuGpuVectorPtr sequenceStartPositions;
  ICpuGpuVectorPtr subSequenceStartPositions;
  if (!iid) {
    int numSeqs = uniformRandom(10) + 1;
    sequenceStartPositions =
        ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
    int* buf = sequenceStartPositions->getMutableData(false);
    subSequenceStartPositions =
        ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false);
    int* subBuf = subSequenceStartPositions->getMutableData(false);
    int64_t pos = 0;
    int maxLen = 2 * size / numSeqs;
    for (int i = 0; i < numSeqs; ++i) {
      int len =
          uniformRandom(min<int64_t>(maxLen, size - pos - numSeqs + i)) + 1;
      buf[i] = pos;
      subBuf[i] = pos;
      pos += len;
      VLOG(1) << " len=" << len;
    }
    buf[numSeqs] = size;
    subBuf[numSeqs] = size;
  }

  vector<Argument>& arguments = batch->getStreams();
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_DENSE]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    MatrixPtr mat = Matrix::create(size, dim, /* trans= */ false, false);
    mat->randomizeUniform();
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE]; ++i) {
    MatrixPtr mat =
        makeRandomSparseMatrix(size, kSpraseMatrixDim, false, useGpu);
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arg.subSequenceStartPositions = subSequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE]; ++i) {
    MatrixPtr mat =
        makeRandomSparseMatrix(size, kSpraseMatrixDim, true, useGpu);
    Argument arg;
    arg.value = mat;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::STRING]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    SVectorPtr vec = std::make_shared<std::vector<std::string>>();
    for (int j = 0; j < size; ++j) {
      vec->push_back(randStr(dim));
    }
    Argument arg;
    arg.strs = vec;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
  for (int i = 0; i < numPerSlotType[SlotDef::INDEX]; ++i) {
    int64_t dim = rand() % 10 + 4;  // NOLINT rand_r
    IVectorPtr vec = IVector::create(size, /* useGpu= */ false);
    int* buf = vec->getData();
    for (int j = 0; j < size; ++j) {
      buf[j] = uniformRandom(dim);
    }
    Argument arg;
    arg.ids = vec;
    arg.sequenceStartPositions = sequenceStartPositions;
    arguments.push_back(arg);
  }
}