示例#1
0
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) {
  const int nx = 100;
  const int ny = 50;
  const int numSamples = 50;

  MatrixPtr cMat = Matrix::create(numSamples, nx * ny, false, false);
  MatrixPtr gMat = Matrix::create(numSamples, nx * ny, false, true);

  MatrixPtr cBatchTransMat = Matrix::create(numSamples, nx * ny, false, false);
  MatrixPtr gBatchTransMat = Matrix::create(numSamples, nx * ny, false, true);
  MatrixPtr cMat_d2h = Matrix::create(numSamples, nx * ny, false, false);

  real* cData = cMat->getData();
  real* gold = cBatchTransMat->getData();

  // host
  for (int sample_id = 0; sample_id < numSamples; ++sample_id)
    for (int j = 0; j < ny; j++)
      for (int i = 0; i < nx; i++)
        cData[sample_id * nx * ny + j * nx + i] = j * nx + i;

  // correct result for error checking
  for (int sample_id = 0; sample_id < numSamples; ++sample_id)
    for (int j = 0; j < ny; j++)
      for (int i = 0; i < nx; i++)
        gold[sample_id * nx * ny + i * ny + j] =
            cData[sample_id * nx * ny + j * nx + i];
  // device
  gMat->copyFrom(*cMat, HPPL_STREAM_DEFAULT);
  batchTranspose(
      gMat->getData(), gBatchTransMat->getData(), nx, ny, numSamples);
  cMat_d2h->copyFrom(*gBatchTransMat, HPPL_STREAM_DEFAULT);
  checkMatrixEqual(cBatchTransMat, cMat_d2h);
}
示例#2
0
TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) {
  const size_t HEIGHT = 20;
  const size_t WIDTH = 10;
  const size_t WIDTH_TEST = 15;
  MatrixPtr testMatrix(
      new CpuSparseMatrix(HEIGHT, WIDTH, HEIGHT * 2, FLOAT_VALUE, SPARSE_CSR));
  MatrixPtr testCpuMatrix(new CpuMatrix(HEIGHT, WIDTH));
  testCpuMatrix->randomizeUniform();
  testMatrix->copyFrom(*testCpuMatrix, HPPL_STREAM_DEFAULT);

  MatrixPtr testGpuMatrix = testMatrix->clone(HEIGHT, WIDTH, true);
  hl_stream_t gpuStream(HPPL_STREAM_3);
  testGpuMatrix->copyFrom(*testMatrix, gpuStream);
  hl_stream_synchronize(gpuStream);

  MatrixPtr mulCpuMatrix(new CpuMatrix(WIDTH, WIDTH_TEST));
  mulCpuMatrix->randomizeUniform();
  MatrixPtr mulGpuMatrix(new GpuMatrix(WIDTH, WIDTH_TEST));
  mulGpuMatrix->copyFrom(*mulCpuMatrix);
  MatrixPtr ret1(new CpuMatrix(HEIGHT, WIDTH_TEST));
  MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST));
  ret1->zeroMem();
  ret2->zeroMem();
  ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0);
  ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0);
  checkMatrixEqual(ret1, ret2);
}
示例#3
0
const real* getData(const Matrix& matrix) {
  if (matrix.useGpu()) {
    MatrixPtr cpuMatrix = Matrix::create(
        matrix.getHeight(), matrix.getWidth(), matrix.isTransposed(), false);
    cpuMatrix->copyFrom(matrix);
    return cpuMatrix->getData();
  } else {
    return matrix.getData();
  }
}
示例#4
0
void KmaxSeqScoreLayer::forward(PassType passType) {
  Layer::forward(passType);

  const Argument& input = getInput(0);
  const MatrixPtr inputScore = getInputValue(0);

  CHECK(input.hasSeq() || input.hasSubseq())
      << "input of " << getName()
      << " must be a sequence or a nested sequence.";
  CHECK_EQ(input.value->getWidth(), 1UL)
      << "input of " << getName() << " are scores over a sequence or "
      << "a nested sequence, so its width must be 1.";

  if (useGpu_) {
    /*
     * currently, this Layer only runs in CPU, if the other part of the model is
     * runing on GPU, then copy the input to this layer from GPU to CPU.
     */
    Matrix::resizeOrCreate(scores_,
                           inputScore->getHeight(),
                           1,
                           false /* trans */,
                           false /* useGpu */);
    scores_->copyFrom(*inputScore);
  } else {
    scores_ = inputScore;
  }

  /*
   * TODO(caoying)
   * In PaddePaddle, currently all matrices are real number types,
   * but output of this layer which is some selected indices of the give
   * sequence are actually filled with int types so that storing int types
   * information in a real number matrix is dangerous, since real numbers will
   * be convered to int types.
   */
  Matrix::resizeOrCreate(
      output_.value,
      input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
      beamSize_,
      false,
      false);
  output_.value->one();
  output_.value->mulScalar(-1.);

  kmaxScorePerSeq(scores_->getData(),
                  output_.value->getData(),
                  input.hasSubseq() ? input.subSequenceStartPositions
                                    : input.sequenceStartPositions);
}
示例#5
0
void testBilinearFwdBwd(int numSamples,
                        int imgSizeH,
                        int imgSizeW,
                        int channels) {
  int inWidth = imgSizeH * imgSizeW * channels;
  int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
  real ratioH = 0.5;
  real ratioW = 0.5;

  // forward
  MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);

  MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
  MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);

  input->randomizeUniform();
  inputGpu->copyFrom(*input);

  {
    // nvprof: GPU Proflier
    REGISTER_GPU_PROFILER("testBilinearFwdBwd");
    target->bilinearForward(*input,
                            imgSizeH,
                            imgSizeW,
                            2 * imgSizeH,
                            2 * imgSizeW,
                            channels,
                            ratioH,
                            ratioW);
    targetGpu->bilinearForward(*inputGpu,
                               imgSizeH,
                               imgSizeW,
                               2 * imgSizeH,
                               2 * imgSizeW,
                               channels,
                               ratioH,
                               ratioW);
  }

  // check
  targetCheck->copyFrom(*targetGpu);
  MatrixCheckErr(*target, *targetCheck);

  // backward
  MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);

  MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpuGrad =
      GpuMatrix::create(numSamples, outWidth, false, true);
  MatrixPtr targetCheckGrad =
      CpuMatrix::create(numSamples, inWidth, false, false);

  inputGrad->randomizeUniform();
  targetGrad->randomizeUniform();
  inputGpuGrad->copyFrom(*inputGrad);
  targetGpuGrad->copyFrom(*targetGrad);

  inputGrad->bilinearBackward(*targetGrad,
                              2 * imgSizeH,
                              2 * imgSizeW,
                              imgSizeH,
                              imgSizeW,
                              channels,
                              ratioH,
                              ratioW);
  inputGpuGrad->bilinearBackward(*targetGpuGrad,
                                 2 * imgSizeH,
                                 2 * imgSizeW,
                                 imgSizeH,
                                 imgSizeW,
                                 channels,
                                 ratioH,
                                 ratioW);

  // check
  targetCheckGrad->copyFrom(*inputGpuGrad);
  MatrixCheckErr(*inputGrad, *targetCheckGrad);
}
示例#6
0
TEST(Layer, priorBoxLayerFwd) {
  vector<int> minSize;
  vector<int> maxSize;
  vector<real> aspectRatio;
  vector<real> variance;
  bool useGpu = false;

  minSize.push_back(276);
  maxSize.push_back(330);
  variance.push_back(0.1);
  variance.push_back(0.1);
  variance.push_back(0.2);
  variance.push_back(0.2);

  // CPU case 1.
  MatrixPtr result;
  real resultData[] = {0.04,
                       0.04,
                       0.96,
                       0.96,
                       0.1,
                       0.1,
                       0.2,
                       0.2,
                       0,
                       0,
                       1,
                       1,
                       0.1,
                       0.1,
                       0.2,
                       0.2};
  result = Matrix::create(1, 2 * 8, false, useGpu);
  result->setData(resultData);
  doOnePriorBoxTest(/* feature_map_width */ 1,
                    /* feature_map_height */ 1,
                    /* image_width */ 300,
                    /* image_height */ 300,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    result);
  // CPU case 2.
  variance[1] = 0.2;
  variance[3] = 0.1;
  maxSize.pop_back();
  real resultData2[] = {0,     0,     0.595, 0.595, 0.1, 0.2, 0.2, 0.1,
                        0.405, 0,     1,     0.595, 0.1, 0.2, 0.2, 0.1,
                        0,     0.405, 0.595, 1,     0.1, 0.2, 0.2, 0.1,
                        0.405, 0.405, 1,     1,     0.1, 0.2, 0.2, 0.1};
  Matrix::resizeOrCreate(result, 1, 4 * 8, false, useGpu);
  result->setData(resultData2);
  doOnePriorBoxTest(/* feature_map_width */ 2,
                    /* feature_map_height */ 2,
                    /* image_width */ 400,
                    /* image_height */ 400,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    result);
  // CPU case 3.
  aspectRatio.push_back(2);
  real resultData3[] = {0.04,     0.04, 0.96, 0.96,       0.1,        0.2,
                        0.2,      0.1,  0,    0.17473088, 1,          0.825269,
                        0.1,      0.2,  0.2,  0.1,        0.17473088, 0,
                        0.825269, 1,    0.1,  0.2,        0.2,        0.1};
  Matrix::resizeOrCreate(result, 1, 3 * 8, false, useGpu);
  result->setData(resultData3);
  doOnePriorBoxTest(/* feature_map_width */ 1,
                    /* feature_map_height */ 1,
                    /* image_width */ 300,
                    /* image_height */ 300,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    result);

#ifndef PADDLE_ONLY_CPU
  // reset the input parameters
  variance[1] = 0.1;
  variance[3] = 0.2;
  maxSize.push_back(330);
  aspectRatio.pop_back();
  MatrixPtr resultGpu;
  useGpu = true;
  // GPU case 1.
  resultGpu = Matrix::create(1, 2 * 8, false, useGpu);
  resultGpu->copyFrom(resultData, 2 * 8);
  doOnePriorBoxTest(/* feature_map_width */ 1,
                    /* feature_map_height */ 1,
                    /* image_width */ 300,
                    /* image_height */ 300,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    resultGpu);
  // GPU case 2.
  variance[1] = 0.2;
  variance[3] = 0.1;
  maxSize.pop_back();
  Matrix::resizeOrCreate(resultGpu, 1, 4 * 8, false, useGpu);
  resultGpu->copyFrom(resultData2, 4 * 8);
  doOnePriorBoxTest(/* feature_map_width */ 2,
                    /* feature_map_height */ 2,
                    /* image_width */ 400,
                    /* image_height */ 400,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    resultGpu);
  // GPU case 3.
  aspectRatio.push_back(2);
  Matrix::resizeOrCreate(resultGpu, 1, 3 * 8, false, useGpu);
  resultGpu->copyFrom(resultData3, 3 * 8);
  doOnePriorBoxTest(/* feature_map_width */ 1,
                    /* feature_map_height */ 1,
                    /* image_width */ 300,
                    /* image_height */ 300,
                    minSize,
                    maxSize,
                    aspectRatio,
                    variance,
                    useGpu,
                    resultGpu);
#endif
}