void getColRow(const Argument& arg, int64_t pos, bool useGpu, int* colNum,
               const int** rowCols, const real** rowValues) {
  SlotDef::SlotType type = getSlotType(arg);
  GpuSparseMatrixPtr matGpu;
  CpuSparseMatrixPtr matCpu;
  if (useGpu) {
    matGpu = dynamic_pointer_cast<GpuSparseMatrix>(arg.value);
    ASSERT_TRUE(matGpu != NULL);
  } else {
    matCpu = dynamic_pointer_cast<CpuSparseMatrix>(arg.value);
    ASSERT_TRUE(matCpu != NULL);
  }
  *colNum = useGpu ? matGpu->getColNum(pos) : matCpu->getColNum(pos);
  *rowCols = useGpu ? matGpu->getRowCols(pos) : matCpu->getRowCols(pos);
  if (type == SlotDef::VECTOR_SPARSE_VALUE) {
    *rowValues = useGpu ? matGpu->getRowValues(pos) : matCpu->getRowValues(pos);
  } else {
    *rowValues = NULL;
  }
}
Beispiel #2
0
  void prepareSamples() {
    CHECK(!useGpu_) << "GPU is not supported";

    int batchSize = getInput(*labelLayer_).getBatchSize();
    IVectorPtr label = getInput(*labelLayer_).ids;

    CpuSparseMatrixPtr multiLabel = std::dynamic_pointer_cast<CpuSparseMatrix>(
        getInput(*labelLayer_).value);

    CHECK(label || multiLabel)
        << "The label layer must have ids or NonValueSparseMatrix value";

    auto& randEngine = ThreadLocalRandomEngine::get();

    samples_.clear();
    samples_.reserve(batchSize * (1 + config_.num_neg_samples()));

    real* weight =
        weightLayer_ ? getInputValue(*weightLayer_)->getData() : nullptr;

    for (int i = 0; i < batchSize; ++i) {
      real w = weight ? weight[i] : 1;
      if (label) {
        int* ids = label->getData();
        samples_.push_back({i, ids[i], true, w});
      } else {
        const int* cols = multiLabel->getRowCols(i);
        int n = multiLabel->getColNum(i);
        for (int j = 0; j < n; ++j) {
          samples_.push_back({i, cols[j], true, w});
        }
      }
      for (int j = 0; j < config_.num_neg_samples(); ++j) {
        int id = sampler_ ? sampler_->gen(randEngine) : rand_(randEngine);
        samples_.push_back({i, id, false, w});
      }
    }
    prepared_ = true;
  }
TEST(Matrix, CpuSparseMatrixCopyFrom) {
  size_t height = 10;
  size_t width = 8;
  int64_t indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 30, 32};
  sparse_non_value_t data[32];
  for (size_t i = 0; i < 32; i++) {
    data[i].col = ::rand() % width;
  }
  CpuSparseMatrixPtr mat = std::make_shared<CpuSparseMatrix>(
      height, width, 32, NO_VALUE, SPARSE_CSR, false);
  mat->copyFrom(indices, data);

  /*compare indices*/
  size_t sum = 0;
  CHECK_EQ(sum, size_t(mat->getRows()[0]));
  for (size_t i = 1; i < height + 1; i++) {
    sum += indices[i] - indices[i - 1];
    CHECK_EQ(sum, size_t(mat->getRows()[i]));
  }
  CHECK_EQ(mat->getElementCnt(), size_t(indices[height] - indices[0]));
  for (size_t i = 0; i < mat->getElementCnt(); i++) {
    CHECK_EQ(size_t(mat->getCols()[i]), size_t(data[i].col));
  }
}
void paddle::SelectiveFullyConnectedLayer::fillSelectiveData(
    const std::shared_ptr<std::vector<std::pair<int*, size_t>>>& candidates) {
  if (candidates == nullptr) {
    fillFullySelectiveData();
    return;
  }

  size_t sampleNum = candidates->size();
  size_t outputWidth = getSize();
  size_t nnz =
      std::accumulate(candidates->begin(),
                      candidates->end(),
                      0UL,
                      [](size_t a, const std::pair<int*, size_t>& arr) {
                        return a + arr.second;
                      });

  Matrix::resizeOrCreateSparseMatrix(this->cpuSelCols_,
                                     sampleNum,
                                     outputWidth,
                                     nnz,
                                     NO_VALUE,
                                     SPARSE_CSR,
                                     false,
                                     false);
  CHECK(this->cpuSelCols_ != nullptr);
  CpuSparseMatrixPtr selCols =
      std::dynamic_pointer_cast<CpuSparseMatrix>(cpuSelCols_);
  int* rowOffsets = selCols->getRows();
  int* colIndices = selCols->getCols();

  rowOffsets[0] = 0;
  int idx = 0;
  for (size_t i = 0; i < sampleNum; ++i) {
    if ((*candidates)[i].second > 0) {
      rowOffsets[i + 1] = rowOffsets[i] + (*candidates)[i].second;
      for (size_t j = 0; j < (*candidates)[i].second; ++j) {
        colIndices[idx] = (*candidates)[i].first[j];
        idx++;
      }
    } else {
      rowOffsets[i + 1] = rowOffsets[i];
    }
  }

  CHECK_EQ(static_cast<size_t>(rowOffsets[sampleNum]), nnz);
  if (!useGpu_) {
    this->selCols_ = this->cpuSelCols_;
  } else {
    Matrix::resizeOrCreateSparseMatrix(this->selCols_,
                                       sampleNum,
                                       outputWidth,
                                       nnz,
                                       NO_VALUE,
                                       SPARSE_CSR,
                                       false,
                                       true);
    this->selCols_->copyFrom(*cpuSelCols_, HPPL_STREAM_1);
    hl_stream_synchronize(HPPL_STREAM_1);
  }

  fullOutput_ = false;
}
TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
  size_t height = 8;
  size_t width = 10;
  int indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 27, 32};
  int value[32] = {
      1,                       // col_0 : 1
      5, 3, 1, 6,              // col_1 : 4
      0, 1, 2, 3,              // col_3 : 4
      4, 5, 6, 7,              // col_4 : 4
      2, 3,                    // col_5 : 2
      3, 5,                    // col_6 : 2
      0, 1,                    // col_7 : 2
      0, 1, 2, 3, 4, 5, 6, 7,  // col_8 : 8
      2, 4, 7, 3, 1            // col_9 : 5
  };
  std::vector<int> rows(value, value + 32);
  std::vector<int> cols(indices, indices + 11);
  std::vector<real> values(value, value + 32);
  CpuSparseMatrixPtr mat = std::make_shared<CpuSparseMatrix>(
      height, width, 32, FLOAT_VALUE, SPARSE_CSC, false);
  mat->copyFrom(rows, cols, values);

  /*compare indices*/
  size_t sum = 0;
  CHECK_EQ(sum, size_t(mat->getCols()[0]));
  for (size_t i = 1; i < width + 1; i++) {
    sum += indices[i] - indices[i - 1];
    CHECK_EQ(sum, size_t(mat->getCols()[i]));
  }
  CHECK_EQ(mat->getElementCnt(), size_t(indices[width] - indices[0]));
  for (size_t i = 0; i < mat->getElementCnt(); i++) {
    CHECK_EQ(size_t(mat->getRows()[i]), size_t(value[i]));
  }

  size_t trimedWidth = 5;
  int trimedIndices[6] = {0, 1, 5, 5, 9, 13};
  int trimedValue[13] = {
      1,  // col_0 : 1
      5,
      3,
      1,
      6,  // col_1 : 4
      0,
      1,
      2,
      3,  // col_3 : 4
      4,
      5,
      6,
      7  // col_4 : 4
  };
  std::vector<int> rowsA(trimedValue, trimedValue + 13);
  std::vector<int> colsA(trimedIndices, trimedIndices + 6);
  std::vector<real> valuesA(trimedValue, trimedValue + 13);
  CpuSparseMatrixPtr matA = std::make_shared<CpuSparseMatrix>(
      height, trimedWidth, 13, FLOAT_VALUE, SPARSE_CSC, false);
  matA->copyFrom(rowsA, colsA, valuesA);

  /*compare indices*/
  sum = 0;
  CHECK_EQ(sum, size_t(matA->getCols()[0]));
  for (size_t i = 1; i < trimedWidth + 1; i++) {
    sum += trimedIndices[i] - trimedIndices[i - 1];
    CHECK_EQ(sum, size_t(matA->getCols()[i]));
  }
  CHECK_EQ(matA->getElementCnt(),
           size_t(trimedIndices[trimedWidth] - trimedIndices[0]));
  for (size_t i = 0; i < matA->getElementCnt(); i++) {
    CHECK_EQ(size_t(matA->getRows()[i]), size_t(rowsA[i]));
  }

  CpuSparseMatrixPtr matB = std::make_shared<CpuSparseMatrix>(
      height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, false);
  matB->trimFrom(*mat);
  checkSMatrixEqual2(matA, matB);

#ifndef PADDLE_ONLY_CPU
  GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
      height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true);
  matC->trimFrom(*mat);

  CpuSparseMatrixPtr matD =
      std::make_shared<CpuSparseMatrix>(height,
                                        trimedWidth,
                                        matC->getElementCnt(),
                                        FLOAT_VALUE,
                                        SPARSE_CSC,
                                        false);
  matD->copyFrom(*matC, HPPL_STREAM_DEFAULT);
  hl_stream_synchronize(HPPL_STREAM_DEFAULT);
  checkSMatrixEqual2(matA, matD);
#endif
}
TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
  size_t height = 10;
  size_t width = 8;
  int64_t indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 27, 32};
  sparse_float_value_t data[32];
  int value[32] = {
      1,                       // row_0 : 1
      5, 3, 1, 6,              // row_1 : 4
      0, 1, 2, 3,              // row_3 : 4
      4, 5, 6, 7,              // row_4 : 4
      2, 3,                    // row_5 : 2
      3, 5,                    // row_6 : 2
      0, 1,                    // row_7 : 2
      0, 1, 2, 3, 4, 5, 6, 7,  // row_8 : 8
      2, 4, 7, 3, 1            // row_9 : 5
  };
  for (size_t i = 0; i < 32; i++) {
    data[i].col = value[i];
    data[i].value = float(value[i]);
  }
  CpuSparseMatrixPtr mat = std::make_shared<CpuSparseMatrix>(
      height, width, 32, FLOAT_VALUE, SPARSE_CSR, false);
  mat->copyFrom(indices, data);

  /*compare indices*/
  size_t sum = 0;
  CHECK_EQ(sum, size_t(mat->getRows()[0]));
  for (size_t i = 1; i < height + 1; i++) {
    sum += indices[i] - indices[i - 1];
    CHECK_EQ(sum, size_t(mat->getRows()[i]));
  }
  CHECK_EQ(mat->getElementCnt(), size_t(indices[height] - indices[0]));
  for (size_t i = 0; i < mat->getElementCnt(); i++) {
    CHECK_EQ(size_t(mat->getCols()[i]), size_t(data[i].col));
  }

  size_t trimedWidth = 4;
  int64_t trimedIndices[11] = {0, 1, 3, 3, 7, 7, 9, 10, 12, 16, 19};
  sparse_float_value_t trimedData[19];
  int trimedValue[19] = {
      1,  // row_0 : 1
      3,
      1,  // row_1 : 2
      0,
      1,
      2,
      3,  // row_3 : 4
      2,
      3,  // row_5 : 2
      3,  // row_6 : 1
      0,
      1,  // row_7 : 2
      0,
      1,
      2,
      3,  // row_8 : 4
      2,
      3,
      1  // row_9 : 3
  };
  for (size_t i = 0; i < 19; i++) {
    trimedData[i].col = trimedValue[i];
    trimedData[i].value = float(trimedValue[i]);
  }
  CpuSparseMatrixPtr matA = std::make_shared<CpuSparseMatrix>(
      height, trimedWidth, 19, FLOAT_VALUE, SPARSE_CSR, false);
  matA->copyFrom(trimedIndices, trimedData);

  /*compare indices*/
  sum = 0;
  CHECK_EQ(sum, size_t(matA->getRows()[0]));
  for (size_t i = 1; i < height + 1; i++) {
    sum += trimedIndices[i] - trimedIndices[i - 1];
    CHECK_EQ(sum, size_t(matA->getRows()[i]));
  }
  CHECK_EQ(matA->getElementCnt(),
           size_t(trimedIndices[height] - trimedIndices[0]));
  for (size_t i = 0; i < matA->getElementCnt(); i++) {
    CHECK_EQ(size_t(matA->getCols()[i]), size_t(trimedData[i].col));
  }

  CpuSparseMatrixPtr matB = std::make_shared<CpuSparseMatrix>(
      height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, false);
  matB->trimFrom(*mat);
  checkSMatrixEqual2(matA, matB);

#ifndef PADDLE_ONLY_CPU
  GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
      height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true);
  matC->trimFrom(*mat);

  CpuSparseMatrixPtr matD =
      std::make_shared<CpuSparseMatrix>(height,
                                        trimedWidth,
                                        matC->getElementCnt(),
                                        FLOAT_VALUE,
                                        SPARSE_CSR,
                                        false);
  matD->copyFrom(*matC, HPPL_STREAM_DEFAULT);
  hl_stream_synchronize(HPPL_STREAM_DEFAULT);
  checkSMatrixEqual2(matA, matD);
#endif
}
TEST(Matrix, CpuSparseMatrixRandUniform) {
  const size_t HEIGHT = 5;
  const size_t WIDTH = 10;
  const size_t NNZ = HEIGHT * WIDTH;
  int* major = nullptr;
  int* minor = nullptr;
  size_t majorLen = 0;
  size_t minorLen = 0;
  size_t nnz = 0;
  for (auto valueType : {NO_VALUE, FLOAT_VALUE}) {
    for (auto format : {SPARSE_CSR, SPARSE_CSC}) {
      CpuSparseMatrixPtr matA = std::make_shared<CpuSparseMatrix>(
          HEIGHT, WIDTH, size_t(NNZ * 0.1), valueType, format);
      matA->randomizeUniform();
      nnz = matA->getElementCnt();
      if (format == SPARSE_CSR) {
        majorLen = matA->getHeight() + 1;
        minorLen = matA->getElementCnt();
        major = matA->getRows();
        minor = matA->getCols();
      } else {
        majorLen = matA->getWidth() + 1;
        minorLen = matA->getElementCnt();
        major = matA->getCols();
        minor = matA->getRows();
      }
      sparseValid(major, minor, nnz, majorLen, minorLen);
    }
  }
}
TEST(Matrix, CpuSparseMatrixSubMatrix) {
  const size_t HEIGHT = 10;
  const size_t WIDTH = 10;
  const size_t NNZ = HEIGHT * WIDTH;
  for (auto valueType : {FLOAT_VALUE, NO_VALUE}) {
    size_t startRow = 3;
    size_t rowNum = 2;
    real sparseRate = 0.1;
    /*sparse matrix init and get subMatrix*/
    CpuSparseMatrixPtr matA = std::make_shared<CpuSparseMatrix>(
        HEIGHT, WIDTH, size_t(NNZ * sparseRate), valueType, SPARSE_CSR);
    matA->randomizeUniform();
    CpuSparseMatrixPtr matB = std::dynamic_pointer_cast<CpuSparseMatrix>(
        matA->subMatrix(startRow, rowNum));

    int start = matA->getRows()[startRow];
    int end = matA->getRows()[startRow + rowNum];

    /*compare two matrix*/
    ASSERT_EQ(matB->getElementCnt(), size_t(end - start));
    if (valueType == FLOAT_VALUE) {
      for (size_t i = 0; i < matB->getElementCnt(); i++) {
        ASSERT_FLOAT_EQ(matB->getValue()[start + i],
                        matA->getValue()[start + i]);
      }
    }

    for (size_t i = 0; i < matB->getElementCnt(); i++) {
      ASSERT_EQ(matB->getCols()[start + i], matA->getCols()[start + i]);
    }
    for (size_t i = 0; i < rowNum; i++) {
      ASSERT_EQ(matB->getRows()[i], matA->getRows()[startRow + i]);
    }
  }
}