void genRefs() { gen( TPointX, TPoint, x ); gen( TPointY, TPoint, y ); genx( TViewSizeX, TView, size ); geny( TViewSizeY, TView, size ); gen( TViewState, TView, state ); gen( TViewOwner, TView, owner ); geny( TViewOriginY, TView, origin ); genx( TViewOriginX, TView, origin ); geny( TViewCursorY, TView, cursor ); genx( TViewCursorX, TView, cursor ); gen( TViewNext, TView, next ); gen( TViewOptions, TView, options ); genay( TGroupClipAY, TGroup, clip ); genax( TGroupClipAX, TGroup, clip ); genby( TGroupClipBY, TGroup, clip ); genbx( TGroupClipBX, TGroup, clip ); gen( TGroupLast, TGroup, last ); gen( TGroupBuffer, TGroup, buffer ); gen( TGroupLockFlag, TGroup, lockFlag ); genx( MsEventWhereX, MouseEventType, where ); geny( MsEventWhereY, MouseEventType, where ); genx( TFrameSizeX, TFrame, size ); gen( TFrameOwner, TFrame, owner ); gen( TDrawBufferData, TDrawBuffer, data ); gen( TEditorCurPtr, TEditor, curPtr ); gen( TEditorGapLen, TEditor, gapLen ); gen( TEditorBuffer, TEditor, buffer ); gen( TEditorSelStart, TEditor, selStart ); gen( TEditorSelEnd, TEditor, selEnd ); gen( TEditorBufSize, TEditor, bufSize ); gen( TEditorBufLen, TEditor, bufLen ); gen( TTerminalBuffer, TTerminal, buffer ); gen( TTerminalBufSize, TTerminal, bufSize ); gen( TTerminalQueBack, TTerminal, queBack ); genConst( sfVisible ); genConst( sfCursorVis ); genConst( sfCursorIns ); genConst( sfFocused ); genConst( sfShadow ); genConst( sfExposed ); genConst( ofFramed ); }
void time_hog( const std::vector<carp::record_t>& pool, const std::vector<float>& sizes, int num_positions, int repeat ) { carp::Timing timing("HOG"); for (;repeat>0; --repeat) { for ( auto & size : sizes ) { for ( auto & item : pool ) { std::mt19937 rng(0); //uses same seed, reseed for all iteration cv::Mat cpu_gray; cv::cvtColor( item.cpuimg(), cpu_gray, CV_RGB2GRAY ); cv::Mat_<float> locations(num_positions, 2); cv::Mat_<float> blocksizes(num_positions, 2); size_t max_blocksize_x = std::ceil(size); size_t max_blocksize_y = std::ceil(size); //fill locations and blocksizes std::uniform_real_distribution<float> genx(size/2+1, cpu_gray.rows-1-size/2-1); std::uniform_real_distribution<float> geny(size/2+1, cpu_gray.cols-1-size/2-1); for( int i = 0; i < num_positions; ++i) { locations(i, 0) = genx(rng); locations(i, 1) = geny(rng); blocksizes(i, 0) = size; blocksizes(i, 1) = size; } const int HISTOGRAM_BINS = NUMBER_OF_CELLS * NUMBER_OF_CELLS * NUMBER_OF_BINS; std::vector<float> cpu_result(num_positions * HISTOGRAM_BINS), gpu_result(num_positions * HISTOGRAM_BINS), pen_result(num_positions * HISTOGRAM_BINS); std::chrono::duration<double> elapsed_time_cpu, elapsed_time_gpu_p_copy, elapsed_time_gpu_nocopy, elapsed_time_pencil; { //CPU implement static nel::HOGDescriptorCPP descriptor( NUMBER_OF_CELLS , NUMBER_OF_BINS , GAUSSIAN_WEIGHTS , SPARTIAL_WEIGHTS , SIGNED_HOG ); const auto cpu_start = std::chrono::high_resolution_clock::now(); const auto result = descriptor.compute(cpu_gray, locations, blocksizes); const auto cpu_end = std::chrono::high_resolution_clock::now(); std::copy(result.begin(), result.end(), cpu_result.begin()); elapsed_time_cpu = cpu_end - cpu_start; //Free up resources } { //GPU implement static nel::HOGDescriptorOCL descriptor( NUMBER_OF_CELLS , NUMBER_OF_BINS , GAUSSIAN_WEIGHTS , SPARTIAL_WEIGHTS , SIGNED_HOG ); const auto gpu_start = std::chrono::high_resolution_clock::now(); const auto result = descriptor.compute(cpu_gray, locations, blocksizes, max_blocksize_x, max_blocksize_y, elapsed_time_gpu_nocopy); const auto gpu_end = std::chrono::high_resolution_clock::now(); std::copy(result.begin(), result.end(), gpu_result.begin()); elapsed_time_gpu_p_copy = gpu_end - gpu_start; //Free up resources } { pen_result.resize(num_positions * HISTOGRAM_BINS, 0.0f); const auto pencil_start = std::chrono::high_resolution_clock::now(); pencil_hog( NUMBER_OF_CELLS, NUMBER_OF_BINS, GAUSSIAN_WEIGHTS, SPARTIAL_WEIGHTS, SIGNED_HOG , cpu_gray.rows, cpu_gray.cols, cpu_gray.step1(), cpu_gray.ptr<uint8_t>() , num_positions , reinterpret_cast<const float (*)[2]>(locations.data) , reinterpret_cast<const float (*)[2]>(blocksizes.data) , pen_result.data() ); const auto pencil_end = std::chrono::high_resolution_clock::now(); elapsed_time_pencil = pencil_end - pencil_start; //Free up resources } // Verifying the results if ( cv::norm( cpu_result, gpu_result, cv::NORM_INF) > cv::norm( gpu_result, cv::NORM_INF)*1e-5 || cv::norm( cpu_result, pen_result, cv::NORM_INF) > cv::norm( cpu_result, cv::NORM_INF)*1e-5 ) { std::vector<float> diff; std::transform(cpu_result.begin(), cpu_result.end(), gpu_result.begin(), std::back_inserter(diff), std::minus<float>()); std::cerr << "ERROR: Results don't match." << std::endl; std::cerr << "CPU norm:" << cv::norm(cpu_result, cv::NORM_INF) << std::endl; std::cerr << "GPU norm:" << cv::norm(gpu_result, cv::NORM_INF) << std::endl; std::cerr << "PEN norm:" << cv::norm(pen_result, cv::NORM_INF) << std::endl; std::cerr << "GPU-CPU norm:" << cv::norm(gpu_result, cpu_result, cv::NORM_INF) << std::endl; std::cerr << "PEN-CPU norm:" << cv::norm(pen_result, cpu_result, cv::NORM_INF) << std::endl; throw std::runtime_error("The OpenCL or PENCIL results are not equivalent with the C++ results."); } timing.print( elapsed_time_cpu, elapsed_time_gpu_p_copy, elapsed_time_gpu_nocopy, elapsed_time_pencil ); } } } }