int test_image_process_bgr2bgr565() { // Blog: http://blog.csdn.net/fengbingchun/article/details/78995720 #ifdef __linux__ const std::string image_name{ "test_data/images/lena.png" }; #else const std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" }; #endif cv::Mat mat = cv::imread(image_name, 1); CHECK(mat.data); const int width{ 1513 }, height{ 1473 }; cv::resize(mat, mat, cv::Size(width, height)); std::unique_ptr<unsigned char[]> data1(new unsigned char[width * height * 2]), data2(new unsigned char[width * height * 2]); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds cv::Mat bgr565; cv::cvtColor(mat, bgr565, cv::COLOR_BGR2BGR565); CHECK(bgr2bgr565_cpu(mat.data, width, height, data1.get(), &elapsed_time1) == 0); CHECK(bgr2bgr565_gpu(mat.data, width, height, data2.get(), &elapsed_time2) == 0); fprintf(stdout, "image bgr to bgr565: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); CHECK(compare_result(data1.get(), bgr565.data, width*height * 2) == 0); CHECK(compare_result(data1.get(), data2.get(), width*height*2) == 0); return 0; }
int test_layer_prior_vbox() { // Blog: http://blog.csdn.net/fengbingchun/article/details/77850422 std::vector<float> vec1{423.f, 245.f, 1333.f, 1444.f, 123.f, 23.f, 32.f, 66.f}; std::vector<float> vec2(vec1[6]); std::vector<float> vec3(4); int length = int(vec1[0] * vec1[1] * vec1[6] * 4 * 2); std::unique_ptr<float[]> data1(new float[length]), data2(new float[length]); std::for_each(data1.get(), data1.get() + length, [](float& n) {n = 0.f; }); std::for_each(data2.get(), data2.get() + length, [](float& n) {n = 0.f; }); generator_random_number(vec2.data(), vec2.size(), 10.f, 100.f); generator_random_number(vec3.data(), vec3.size(), 1.f, 10.f); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds int ret = layer_prior_vbox_cpu(data1.get(), length, vec1, vec2, vec3, &elapsed_time1); if (ret != 0) PRINT_ERROR_INFO(layer_prior_vbox_cpu); ret = layer_prior_vbox_gpu(data2.get(), length, vec1, vec2, vec3, &elapsed_time2); if (ret != 0) PRINT_ERROR_INFO(layer_prior_vbox_gpu); compare_result(data1.get(), data2.get(), length); fprintf(stderr, "test layer prior vbox: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); return 0; }
int test_layer_reverse() { // Blog: http://blog.csdn.net/fengbingchun/article/details/77160872 #ifdef __linux__ std::string image_name{ "test_data/images/lena.png" }; #else std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" }; #endif cv::Mat matSrc = cv::imread(image_name); CHECK(matSrc.data); cv::cvtColor(matSrc, matSrc, CV_BGR2GRAY); const int width{ 1511 }, height{ 1473 }; const auto length = width * height; cv::resize(matSrc, matSrc, cv::Size(width, height)); cv::Mat matTmp1; matSrc.convertTo(matTmp1, CV_32FC1); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds const std::vector<int> vec{ 5, 7}; std::unique_ptr<float[]> dst1(new float[length]), dst2(new float[length]); std::for_each(dst1.get(), dst1.get() + length, [](float& n) {n = 0.f; }); std::for_each(dst2.get(), dst2.get() + length, [](float& n) {n = 0.f; }); int ret = layer_reverse_cpu((float*)matTmp1.data, dst1.get(), length, vec, &elapsed_time1); if (ret != 0) PRINT_ERROR_INFO(image_reverse_cpu); ret = layer_reverse_gpu((float*)matTmp1.data, dst2.get(), length, vec, &elapsed_time2); if (ret != 0) PRINT_ERROR_INFO(image_reverse_gpu); compare_result(dst1.get(), dst2.get(), length); cv::Mat matTmp2(height, width, CV_32FC1, dst2.get()), matDst; matTmp2.convertTo(matDst, CV_8UC1); #ifdef __linux__ save_image(matSrc, matDst, 400, 200, "test_data/images/image_reverse.png"); #else save_image(matSrc, matDst, 400, 200, "E:/GitCode/CUDA_Test/test_data/images/image_reverse.png"); #endif fprintf(stderr, "test layer reverse: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); return 0; }
int test_image_process_laplacian() { // Blog: http://blog.csdn.net/fengbingchun/article/details/79321200 #ifdef __linux__ cv::Mat src = cv::imread("test_data/images/lena.png", 0); #else cv::Mat src = cv::imread("E:/GitCode/CUDA_Test/test_data/images/lena.png", 0); #endif if (!src.data || src.channels() != 1) { fprintf(stderr, "read image fail\n"); return -1; } int width{ 400 }, height{ 400 }; cv::resize(src, src, cv::Size(width, height)); std::unique_ptr<unsigned char[]> data1(new unsigned char[width * height]), data2(new unsigned char[width * height]); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds int ksize{ 1 }; CHECK(laplacian_cpu(src.data, width, height, ksize, data1.get(), &elapsed_time1) == 0); //CHECK(laplacian_gpu(src.data, width, height, data2.get(), &elapsed_time2) == 0); //fprintf(stdout, "gray image edge detection: laplacian: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); cv::Mat dst; cv::Laplacian(src, dst, src.depth(), ksize); #ifdef __linux__ cv::imwrite("test_data/images/laplacian.png", dst); #else cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/laplacian.png", dst); #endif CHECK(compare_result(data1.get(), dst.data, width*height) == 0); //CHECK(compare_result(data1.get(), data2.get(), width*height) == 0); #ifdef __linux__ save_image(src, dst, width, height / 2, "test_data/images/laplacian_result.png"); #else save_image(src, dst, width, height / 2, "E:/GitCode/CUDA_Test/test_data/images/laplacian_result.png"); #endif return 0; }
int test_image_process_histogram_equalization() { // Blog: http://blog.csdn.net/fengbingchun/article/details/79188021 #ifdef __linux__ const std::string image_name{ "test_data/images/lena.png" }; #else const std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" }; #endif cv::Mat mat = cv::imread(image_name, 0); CHECK(mat.data); const int width{ mat.cols/*1513*/ }, height{ mat.rows/*1473*/ }; cv::resize(mat, mat, cv::Size(width, height)); std::unique_ptr<unsigned char[]> data1(new unsigned char[width * height]), data2(new unsigned char[width * height]); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds CHECK(histogram_equalization_cpu(mat.data, width, height, data1.get(), &elapsed_time1) == 0); //CHECK(histogram_equalization_gpu(mat.data, width, height, data2.get(), &elapsed_time2) == 0); //fprintf(stdout, "image histogram equalization: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); cv::Mat dst; cv::equalizeHist(mat, dst); #ifdef __linux__ cv::imwrite("test_data/images/histogram_equalization.png", dst); #else cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/histogram_equalization.png", dst); #endif CHECK(compare_result(data1.get(), dst.data, width*height) == 0); //CHECK(compare_result(data1.get(), data2.get(), width*height) == 0); #ifdef __linux__ save_image(mat, dst, width, height/2, "test_data/images/histogram_equalization_result.png"); #else save_image(mat, dst, width, height/2, "E:/GitCode/CUDA_Test/test_data/images/histogram_equalization_result.png"); #endif return 0; }
int test_image_process_bgr2gray() { // Blog: http://blog.csdn.net/fengbingchun/article/details/78821765 #ifdef __linux__ const std::string image_name{ "test_data/images/lena.png" }; #else const std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" }; #endif cv::Mat mat = cv::imread(image_name); CHECK(mat.data); const int width{ 1513 }, height{ 1473 }; cv::resize(mat, mat, cv::Size(width, height)); std::unique_ptr<unsigned char[]> data1(new unsigned char[width * height]), data2(new unsigned char[width * height]); float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds CHECK(bgr2gray_cpu(mat.data, width, height, data1.get(), &elapsed_time1) == 0); CHECK(bgr2gray_gpu(mat.data, width, height, data2.get(), &elapsed_time2) == 0); cv::Mat dst(height, width, CV_8UC1, data1.get()); #ifdef __linux__ cv::imwrite("test_data/images/bgr2gray_cpu.png", dst); #else cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/bgr2gray_cpu.png", dst); #endif cv::Mat dst2(height, width, CV_8UC1, data2.get()); #ifdef __linux__ cv::imwrite("test_data/images/bgr2gray_gpu.png", dst2); #else cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/bgr2gray_gpu.png", dst2); #endif fprintf(stdout, "image bgr to gray: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2); CHECK(compare_result(data1.get(), data2.get(), width*height) == 0); return 0; }
int test_main(int argc, char *argv[]) { ////////////////////////////////////////////////////////////////////////// //[> Parameters <] //FIXME: use random sizes? const size_t n = 100; const size_t m = 50; const size_t n_s = 60; const size_t m_s = 30; ////////////////////////////////////////////////////////////////////////// //[> Setup test <] namespace mpi = boost::mpi; El::Initialize(argc, argv); mpi::environment env(argc, argv); mpi::communicator world; MPI_Comm mpi_world(world); El::Grid grid(mpi_world); const size_t rank = world.rank(); skylark::base::context_t context (0); double count = 1.0; const size_t matrix_full = n * m; mpi_vector_t colsf(matrix_full); mpi_vector_t rowsf(matrix_full); mpi_vector_t valsf(matrix_full); for(size_t i = 0; i < matrix_full; ++i) { colsf.SetElement(i, i % m); rowsf.SetElement(i, i / m); valsf.SetElement(i, count); count++; } DistMatrixType A(n, m, rowsf, colsf, valsf); mpi_vector_t zero; count = 1.0; El::Matrix<double> local_A(n, m); for( size_t j = 0; j < local_A.Height(); j++ ) for( size_t i = 0; i < local_A.Width(); i++ ) local_A.Set(j, i, count++); El::DistMatrix<double, El::STAR, El::STAR> result(grid); // columnwise application DistMatrixType expected_A; DistMatrixType pi_sketch(n_s, n, zero, zero, zero); // rowwise application DistMatrixType expected_AR; DistMatrixType pi_sketch_r(m_s, m, zero, zero, zero); ////////////////////////////////////////////////////////////////////////// //[> Column wise application DistSparseMatrix -> DistMatrix[MC/MR] <] typedef El::DistMatrix<double> mcmr_target_t; //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, mcmr_target_t> Sparse(n, n_s, context); //[> 2. Create space for the sketched matrix <] mcmr_target_t sketch_A(n_s, m, grid); El::Zero(sketch_A); //[> 3. Apply the transform <] Sparse.apply(A, sketch_A, skylark::sketch::columnwise_tag()); //[> 4. Build structure to compare <] // easier to check if all processors own result result = sketch_A; compute_sketch_matrix(Sparse, A, pi_sketch); expected_A = Mult_AnXBn_Synch<PTDD, double, col_t>( pi_sketch, A, false, false); compare_result(rank, expected_A, result); ////////////////////////////////////////////////////////////////////////// //[> Column wise application DistSparseMatrix -> DistMatrix[VC/*] <] typedef El::DistMatrix<double, El::VC, El::STAR> vcs_target_t; //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, vcs_target_t> SparseVC(n, n_s, context); //[> 2. Create space for the sketched matrix <] vcs_target_t sketch_A_vcs(n_s, m, grid); El::Zero(sketch_A_vcs); //[> 3. Apply the transform <] SparseVC.apply(A, sketch_A_vcs, skylark::sketch::columnwise_tag()); //[> 4. Build structure to compare <] // easier to check if all processors own result result = sketch_A_vcs; compute_sketch_matrix(SparseVC, A, pi_sketch); expected_A = Mult_AnXBn_Synch<PTDD, double, col_t>( pi_sketch, A, false, false); compare_result(rank, expected_A, result); ////////////////////////////////////////////////////////////////////////// //[> Column wise application DistSparseMatrix -> DistMatrix[*/VR] <] typedef El::DistMatrix<double, El::STAR, El::VR> svr_target_t; //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, svr_target_t> SparseVR(n, n_s, context); //[> 2. Create space for the sketched matrix <] svr_target_t sketch_A_svr(n_s, m, grid); El::Zero(sketch_A_svr); //[> 3. Apply the transform <] SparseVR.apply(A, sketch_A_svr, skylark::sketch::columnwise_tag()); //[> 4. Build structure to compare <] // easier to check if all processors own result result = sketch_A_svr; compute_sketch_matrix(SparseVR, A, pi_sketch); expected_A = Mult_AnXBn_Synch<PTDD, double, col_t>( pi_sketch, A, false, false); compare_result(rank, expected_A, result); ////////////////////////////////////////////////////////////////////////// //[> Column wise application DistSparseMatrix -> DistMatrix[*/*] <] typedef El::DistMatrix<double, El::STAR, El::STAR> st_target_t; //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, st_target_t> SparseST(n, n_s, context); //[> 2. Create space for the sketched matrix <] st_target_t sketch_A_st(n_s, m, grid); El::Zero(sketch_A_st); //[> 3. Apply the transform <] SparseST.apply(A, sketch_A_st, skylark::sketch::columnwise_tag()); //[> 4. Compare <] compute_sketch_matrix(SparseST, A, pi_sketch); expected_A = Mult_AnXBn_Synch<PTDD, double, col_t>( pi_sketch, A, false, false); compare_result(rank, expected_A, sketch_A_st); ////////////////////////////////////////////////////////////////////////// //[> Column wise application DistSparseMatrix -> LocalDenseMatrix <] Dummy_t<DistMatrixType, El::Matrix<double>> LocalSparse(n, n_s, context); El::Matrix<double> local_sketch_A(n_s, m); El::Zero(local_sketch_A); LocalSparse.apply(A, local_sketch_A, skylark::sketch::columnwise_tag()); El::Matrix<double> pi_sketch_l(n_s, n); El::Zero(pi_sketch_l); El::Matrix<double> expected_A_l(n_s, m); El::Zero(expected_A_l); if(rank == 0) { // PI generated by random number gen std::vector<size_t> row_idx = LocalSparse.getRowIdx(); std::vector<double> row_val = LocalSparse.getRowValues(); int sketch_size = row_val.size(); typename LocalMatrixType::coords_t coords; for(int i = 0; i < sketch_size; ++i) pi_sketch_l.Set(row_idx[i], i, row_val[i]); El::Gemm(El::NORMAL, El::NORMAL, 1.0, pi_sketch_l, local_A, 0.0, expected_A_l); for(int col = 0; col < expected_A_l.Width(); col++) { for(int row = 0; row < expected_A_l.Height(); row++) { if(local_sketch_A.Get(row, col) != expected_A_l.Get(row, col)) BOOST_FAIL("Result of local colwise application not as expected"); } } } ////////////////////////////////////////////////////////////////////////// //[> Row wise application DistSparseMatrix -> DistMatrix[MC/MR] <] //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, mcmr_target_t> Sparse_r(m, m_s, context); //[> 2. Create space for the sketched matrix <] mcmr_target_t sketch_A_r(n, m_s, grid); El::Zero(sketch_A_r); //[> 3. Apply the transform <] Sparse_r.apply(A, sketch_A_r, skylark::sketch::rowwise_tag()); //[> 4. Build structure to compare <] // easier to check if all processors own result result = sketch_A_r; compute_sketch_matrix(Sparse_r, A, pi_sketch_r); pi_sketch_r.Transpose(); expected_AR = Mult_AnXBn_Synch<PTDD, double, col_t>( A, pi_sketch_r, false, false); compare_result(rank, expected_AR, result); ////////////////////////////////////////////////////////////////////////// //[> Row wise application DistSparseMatrix -> DistMatrix[VC/*] <] //[> 1. Create the sketching matrix <] Dummy_t<DistMatrixType, vcs_target_t> Sparse_r_vcs(m, m_s, context); //[> 2. Create space for the sketched matrix <] vcs_target_t sketch_A_r_vcs(n, m_s, grid); El::Zero(sketch_A_r_vcs); //[> 3. Apply the transform <] Sparse_r_vcs.apply(A, sketch_A_r_vcs, skylark::sketch::rowwise_tag()); //[> 4. Build structure to compare <] // easier to check if all processors own result result = sketch_A_r_vcs; pi_sketch_r.Transpose(); compute_sketch_matrix(Sparse_r_vcs, A, pi_sketch_r); pi_sketch_r.Transpose(); expected_AR = Mult_AnXBn_Synch<PTDD, double, col_t>( A, pi_sketch_r, false, false); compare_result(rank, expected_AR, result); ////////////////////////////////////////////////////////////////////////// //[> Row wise application DistSparseMatrix -> LocalDenseMatrix <] Dummy_t<DistMatrixType, El::Matrix<double>> LocalSparse_r(m, m_s, context); El::Matrix<double> local_sketch_A_r(n, m_s); El::Zero(local_sketch_A_r); LocalSparse_r.apply(A, local_sketch_A_r, skylark::sketch::rowwise_tag()); El::Matrix<double> local_pi_sketch_r(m_s, m); El::Zero(local_pi_sketch_r); El::Matrix<double> expected_A_r(n, m_s); El::Zero(expected_A_r); if(rank == 0) { // PI generated by random number gen std::vector<size_t> row_idx = LocalSparse_r.getRowIdx(); std::vector<double> row_val = LocalSparse_r.getRowValues(); int sketch_size = row_val.size(); typename LocalMatrixType::coords_t coords; for(int i = 0; i < sketch_size; ++i) local_pi_sketch_r.Set(row_idx[i], i, row_val[i]); El::Gemm(El::NORMAL, El::TRANSPOSE, 1.0, local_A, local_pi_sketch_r, 0.0, expected_A_r); for(int col = 0; col < expected_A_r.Width(); col++) { for(int row = 0; row < expected_A_r.Height(); row++) { if(local_sketch_A_r.Get(row, col) != expected_A_r.Get(row, col)) BOOST_FAIL("Result of local rowwise application not as expected"); } } } return 0; }
int main (int argc, char **argv) { HKGDW_JPGENC_PARAM enc_param; HKGDW_JPGENC_IMAGE_PARAM image; YUV_FRAME frame; void* handle; char input_file_name[256]; char output_file_name[256]; char output_main_name[256]; int width; int height ; int quality; int frames = 0; int frame_num = 0; int size; int length; int i; FILE * input_file; FILE * output_file; FILE * ref_file; int tmp_time, tot_time = 0; unsigned __int64 start_count, end_count; int ret; if(argc <= 5) { printf("usage: inputfile outputfile width height quality frames\n", argv[0]); return 0; } // get parameters sprintf(input_file_name, "%s", argv[1]); sprintf(output_main_name, "%s", argv[2]); width = atoi(argv[3]); height = atoi(argv[4]); quality = atoi(argv[5]); if(argc > 6) frames = atoi(argv[6]); /* Open the input file. */ if ((input_file = fopen(input_file_name, "rb")) == NULL) { printf("can't open %s\n", argv[1]); return 0; } if ((ref_file = fopen("MARLBOR_std.jpg", "rb")) == NULL) { printf("cloud not open reffile.\n"); return 0; } if((width % 16) || (height % 16)) { printf("Unsupported image format x=%d,y=%d, must be a multiple of 16", width, height); return 0; } if(frames <= 0) { frames = 120; } printf("********************************************************************\n"); printf("* input file : %s\n", input_file_name); printf("* output file: %s\n", output_main_name); printf("* width : %d\n", width); printf("* height : %d\n", height); printf("* quality : %d\n", quality); printf("* frames : %d\n", frames); printf("********************************************************************\n"); printf("\n encode start...\n"); // init buffer size = width * height * 3 /2; if((frame.y = malloc(size)) == NULL) { printf("\nERROR! image buffer malloc failed!"); return 0; } frame.u = frame.y + width * height; frame.v = frame.y + width * height * 5 / 4; /*++++++++++++++++++++++++++++++++++++++++++++++++*/ enc_param.width = width; enc_param.height = height; enc_param.quality = quality; enc_param.insert_watermarker = 1; if(HKGDW_JPGENC_GetMemSize(&enc_param) != HIK_JPGENC_LIB_S_OK) { printf("\nJPGENC_GetMemSize ERROR!"); return 0; } if((enc_param.sdram_buf = malloc(enc_param.sdram_buf_size)) == NULL) { printf("\nERROR! sdram_buf malloc failed!"); return 0; } HKGDW_JPGENC_Create(&enc_param, &handle); /*+++++++++++++++++++++++++++++++++++++++++++++++++++++*/ image.frame = &frame; image.width = width; image.height = height; image.size = size; //HIK定义的水印格式为: 第0~4 byte为水印起始码,一般为0x494d5748(海康定义)或0x494d5755(用户定义) // 第5~6 byte为水印长度 image.watermarker_data = g_hik_watermark; image.watermarker_len = *(unsigned short*)(&g_hik_watermark[4]); image.comment_data = (unsigned char*)malloc(1024); //调试信息 image.comment_len = 1024; //调试信息长度,如果没有调试信息,长度一定要置0 for (i = 0; i < 100; i++) { image.comment_data[i] = i; } if((image.bitstream = malloc(size)) == NULL) { printf("\nERROR! stream_buffer malloc failed!"); return 0; } HKGDW_JPGENC_SetQuality(handle, quality); for(i = 0; i < 1; i ++) { if(!fread(frame.y, size, 1, input_file)) { printf("reach the end of the file.\n"); break; } if(!quality) HKGDW_JPGENC_SetQuality(handle, i%101); // compress one picture ReadTimestampCounter(&start_count); ret = HKGDW_JPGENC_Compress(handle, &image); if(ret != HIK_JPGENC_LIB_S_OK) { printf("\nERROR(0x%x)! jpeg_compress_data failed!", ret); break;; } ReadTimestampCounter(&end_count); tmp_time = (int)((end_count - start_count) / 2800); tot_time += tmp_time; /*************** change here for m-jpeg *************************/ length = image.length; // ouput one encoded picture sprintf(output_file_name, "out.jpg",output_main_name, i); if ((output_file = fopen(output_file_name, "wb")) == NULL) { printf("\nERROR! can't open %s\n", output_main_name); break; } compare_result(image.bitstream, length, ref_file); if(!fwrite(image.bitstream, length, 1, output_file)) { printf("\nERROR! output stream file failed!"); fclose(output_file); break; } /* After finish_compress, we can close the output file. */ fclose(output_file); /********************************************************/ frame_num ++; printf("\n [%d]_%s size = %d, time = %d us", i%101, output_file_name, length, tmp_time); } fclose(ref_file); fclose(input_file); free(frame.y); free(enc_param.sdram_buf); free(image.bitstream); free(image.comment_data); /* And we're done! */ if(frame_num > 0) { printf("\n总共压缩 %d 帧\n平均每帧 = %d us\n", frame_num, tot_time/frame_num); } printf("\n\n encode end...\n\n"); }