TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { const int nx = 100; const int ny = 50; const int numSamples = 50; MatrixPtr cMat = Matrix::create(numSamples, nx * ny, false, false); MatrixPtr gMat = Matrix::create(numSamples, nx * ny, false, true); MatrixPtr cBatchTransMat = Matrix::create(numSamples, nx * ny, false, false); MatrixPtr gBatchTransMat = Matrix::create(numSamples, nx * ny, false, true); MatrixPtr cMat_d2h = Matrix::create(numSamples, nx * ny, false, false); real* cData = cMat->getData(); real* gold = cBatchTransMat->getData(); // host for (int sample_id = 0; sample_id < numSamples; ++sample_id) for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) cData[sample_id * nx * ny + j * nx + i] = j * nx + i; // correct result for error checking for (int sample_id = 0; sample_id < numSamples; ++sample_id) for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) gold[sample_id * nx * ny + i * ny + j] = cData[sample_id * nx * ny + j * nx + i]; // device gMat->copyFrom(*cMat, HPPL_STREAM_DEFAULT); batchTranspose( gMat->getData(), gBatchTransMat->getData(), nx, ny, numSamples); cMat_d2h->copyFrom(*gBatchTransMat, HPPL_STREAM_DEFAULT); checkMatrixEqual(cBatchTransMat, cMat_d2h); }
TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) { const size_t HEIGHT = 20; const size_t WIDTH = 10; const size_t WIDTH_TEST = 15; MatrixPtr testMatrix( new CpuSparseMatrix(HEIGHT, WIDTH, HEIGHT * 2, FLOAT_VALUE, SPARSE_CSR)); MatrixPtr testCpuMatrix(new CpuMatrix(HEIGHT, WIDTH)); testCpuMatrix->randomizeUniform(); testMatrix->copyFrom(*testCpuMatrix, HPPL_STREAM_DEFAULT); MatrixPtr testGpuMatrix = testMatrix->clone(HEIGHT, WIDTH, true); hl_stream_t gpuStream(HPPL_STREAM_3); testGpuMatrix->copyFrom(*testMatrix, gpuStream); hl_stream_synchronize(gpuStream); MatrixPtr mulCpuMatrix(new CpuMatrix(WIDTH, WIDTH_TEST)); mulCpuMatrix->randomizeUniform(); MatrixPtr mulGpuMatrix(new GpuMatrix(WIDTH, WIDTH_TEST)); mulGpuMatrix->copyFrom(*mulCpuMatrix); MatrixPtr ret1(new CpuMatrix(HEIGHT, WIDTH_TEST)); MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST)); ret1->zeroMem(); ret2->zeroMem(); ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0); checkMatrixEqual(ret1, ret2); }
const real* getData(const Matrix& matrix) { if (matrix.useGpu()) { MatrixPtr cpuMatrix = Matrix::create( matrix.getHeight(), matrix.getWidth(), matrix.isTransposed(), false); cpuMatrix->copyFrom(matrix); return cpuMatrix->getData(); } else { return matrix.getData(); } }
void KmaxSeqScoreLayer::forward(PassType passType) { Layer::forward(passType); const Argument& input = getInput(0); const MatrixPtr inputScore = getInputValue(0); CHECK(input.hasSeq() || input.hasSubseq()) << "input of " << getName() << " must be a sequence or a nested sequence."; CHECK_EQ(input.value->getWidth(), 1UL) << "input of " << getName() << " are scores over a sequence or " << "a nested sequence, so its width must be 1."; if (useGpu_) { /* * currently, this Layer only runs in CPU, if the other part of the model is * runing on GPU, then copy the input to this layer from GPU to CPU. */ Matrix::resizeOrCreate(scores_, inputScore->getHeight(), 1, false /* trans */, false /* useGpu */); scores_->copyFrom(*inputScore); } else { scores_ = inputScore; } /* * TODO(caoying) * In PaddePaddle, currently all matrices are real number types, * but output of this layer which is some selected indices of the give * sequence are actually filled with int types so that storing int types * information in a real number matrix is dangerous, since real numbers will * be convered to int types. */ Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), beamSize_, false, false); output_.value->one(); output_.value->mulScalar(-1.); kmaxScorePerSeq(scores_->getData(), output_.value->getData(), input.hasSubseq() ? input.subSequenceStartPositions : input.sequenceStartPositions); }
void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW, int channels) { int inWidth = imgSizeH * imgSizeW * channels; int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; real ratioH = 0.5; real ratioW = 0.5; // forward MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); input->randomizeUniform(); inputGpu->copyFrom(*input); { // nvprof: GPU Proflier REGISTER_GPU_PROFILER("testBilinearFwdBwd"); target->bilinearForward(*input, imgSizeH, imgSizeW, 2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW); targetGpu->bilinearForward(*inputGpu, imgSizeH, imgSizeW, 2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW); } // check targetCheck->copyFrom(*targetGpu); MatrixCheckErr(*target, *targetCheck); // backward MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, false, true); MatrixPtr targetCheckGrad = CpuMatrix::create(numSamples, inWidth, false, false); inputGrad->randomizeUniform(); targetGrad->randomizeUniform(); inputGpuGrad->copyFrom(*inputGrad); targetGpuGrad->copyFrom(*targetGrad); inputGrad->bilinearBackward(*targetGrad, 2 * imgSizeH, 2 * imgSizeW, imgSizeH, imgSizeW, channels, ratioH, ratioW); inputGpuGrad->bilinearBackward(*targetGpuGrad, 2 * imgSizeH, 2 * imgSizeW, imgSizeH, imgSizeW, channels, ratioH, ratioW); // check targetCheckGrad->copyFrom(*inputGpuGrad); MatrixCheckErr(*inputGrad, *targetCheckGrad); }
TEST(Layer, priorBoxLayerFwd) { vector<int> minSize; vector<int> maxSize; vector<real> aspectRatio; vector<real> variance; bool useGpu = false; minSize.push_back(276); maxSize.push_back(330); variance.push_back(0.1); variance.push_back(0.1); variance.push_back(0.2); variance.push_back(0.2); // CPU case 1. MatrixPtr result; real resultData[] = {0.04, 0.04, 0.96, 0.96, 0.1, 0.1, 0.2, 0.2, 0, 0, 1, 1, 0.1, 0.1, 0.2, 0.2}; result = Matrix::create(1, 2 * 8, false, useGpu); result->setData(resultData); doOnePriorBoxTest(/* feature_map_width */ 1, /* feature_map_height */ 1, /* image_width */ 300, /* image_height */ 300, minSize, maxSize, aspectRatio, variance, useGpu, result); // CPU case 2. variance[1] = 0.2; variance[3] = 0.1; maxSize.pop_back(); real resultData2[] = {0, 0, 0.595, 0.595, 0.1, 0.2, 0.2, 0.1, 0.405, 0, 1, 0.595, 0.1, 0.2, 0.2, 0.1, 0, 0.405, 0.595, 1, 0.1, 0.2, 0.2, 0.1, 0.405, 0.405, 1, 1, 0.1, 0.2, 0.2, 0.1}; Matrix::resizeOrCreate(result, 1, 4 * 8, false, useGpu); result->setData(resultData2); doOnePriorBoxTest(/* feature_map_width */ 2, /* feature_map_height */ 2, /* image_width */ 400, /* image_height */ 400, minSize, maxSize, aspectRatio, variance, useGpu, result); // CPU case 3. aspectRatio.push_back(2); real resultData3[] = {0.04, 0.04, 0.96, 0.96, 0.1, 0.2, 0.2, 0.1, 0, 0.17473088, 1, 0.825269, 0.1, 0.2, 0.2, 0.1, 0.17473088, 0, 0.825269, 1, 0.1, 0.2, 0.2, 0.1}; Matrix::resizeOrCreate(result, 1, 3 * 8, false, useGpu); result->setData(resultData3); doOnePriorBoxTest(/* feature_map_width */ 1, /* feature_map_height */ 1, /* image_width */ 300, /* image_height */ 300, minSize, maxSize, aspectRatio, variance, useGpu, result); #ifndef PADDLE_ONLY_CPU // reset the input parameters variance[1] = 0.1; variance[3] = 0.2; maxSize.push_back(330); aspectRatio.pop_back(); MatrixPtr resultGpu; useGpu = true; // GPU case 1. resultGpu = Matrix::create(1, 2 * 8, false, useGpu); resultGpu->copyFrom(resultData, 2 * 8); doOnePriorBoxTest(/* feature_map_width */ 1, /* feature_map_height */ 1, /* image_width */ 300, /* image_height */ 300, minSize, maxSize, aspectRatio, variance, useGpu, resultGpu); // GPU case 2. variance[1] = 0.2; variance[3] = 0.1; maxSize.pop_back(); Matrix::resizeOrCreate(resultGpu, 1, 4 * 8, false, useGpu); resultGpu->copyFrom(resultData2, 4 * 8); doOnePriorBoxTest(/* feature_map_width */ 2, /* feature_map_height */ 2, /* image_width */ 400, /* image_height */ 400, minSize, maxSize, aspectRatio, variance, useGpu, resultGpu); // GPU case 3. aspectRatio.push_back(2); Matrix::resizeOrCreate(resultGpu, 1, 3 * 8, false, useGpu); resultGpu->copyFrom(resultData3, 3 * 8); doOnePriorBoxTest(/* feature_map_width */ 1, /* feature_map_height */ 1, /* image_width */ 300, /* image_height */ 300, minSize, maxSize, aspectRatio, variance, useGpu, resultGpu); #endif }