DeviceMatrixCL::Ptr cell_histogram_dense_cl( const DeviceMatrixCL3D::Ptr& ff_out, const int max_bin, const int cell_size, const int start_y, const int start_x, const int stop_y, const int stop_x) { DeviceMatrixCL::Ptr assignment_mat = makeDeviceMatrixCL(*ff_out, 0); DeviceMatrixCL::Ptr weight_mat = makeDeviceMatrixCL(*ff_out, 1); int n_parts_y = (stop_y - start_y) / cell_size; int n_parts_x = (stop_x - start_x) / cell_size; #ifdef METHOD_2 n_parts_y += (n_parts_y % 2); n_parts_x += (n_parts_x % 2); #endif DeviceMatrixCL::Ptr histogram = makeDeviceMatrixCL(n_parts_y * n_parts_x, max_bin); // for(int i=0; i<10000; i++) cell_histogram_dense_device_cl( histogram.get(), assignment_mat.get(), weight_mat.get(), max_bin, cell_size, start_y, start_x, n_parts_y, n_parts_x); return histogram; }
DeviceMatrixCL::Ptr pwcityblock_cl( const DeviceMatrixCL::Ptr& features_train, const DeviceMatrixCL::Ptr& features_test){ DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height, features_test->height); pwdist_genericCL(features_train.get(), features_test.get(), out.get(), CITYBLOCK); return out; }
DeviceMatrixCL::Ptr pwchisq_cl( const DeviceMatrixCL::Ptr& features_train, const DeviceMatrixCL::Ptr& features_test){ DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height, features_test->height); pwdist_genericCL(features_train.get(), features_test.get(), out.get(), CHISQUARED); return out; }
DeviceMatrixCL::Ptr pwabsdot_cl( const DeviceMatrixCL::Ptr& features_train, const DeviceMatrixCL::Ptr& features_test){ DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height, features_test->height); pwdist_genericCL(features_train.get(), features_test.get(), out.get(), ABSDOTPRODUCT); return out; }
DeviceMatrixCL::Ptr pwdist_cl( const DeviceMatrixCL::Ptr& features_train, const DeviceMatrixCL::Ptr& features_test){ // double tic = omp_get_wtime(); DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height, features_test->height); // double tic = omp_get_wtime(); // pwdist_genericCL(features_train.get(), features_test.get(), out.get(), EUCLIDEAN); // for(int i=0; i<10000; i++) pwdist_eucCL(features_train.get(), features_test.get(), out.get()); // double toc = omp_get_wtime(); // std::cout << "OpenCL time: " << toc - tic << std::endl; return out; }
Classifier(const int window_height, const int window_width, const int cell_size, const int block_size, const int dict_size): _window_height(window_height), _window_width(window_width), _dict_size(dict_size), _cell_size(cell_size), _block_size(block_size) { _n_cells_x = _window_width / cell_size; _n_cells_y = _window_height / cell_size; _n_blocks_x = _n_cells_x - _block_size + 1; _n_blocks_y = _n_cells_y - _block_size + 1; _n_total_coeff = _block_size * _block_size * _n_blocks_x * _n_blocks_y, _dict_size; coefficients = new float[_n_total_coeff]; classifierCL = makeDeviceMatrixCL(_n_total_coeff / _dict_size, _dict_size); for (int i = 0; i < _n_total_coeff; i++) { coefficients[i] = float( std::rand() ) / RAND_MAX; } DeviceMatrixCL_copyToDevice(*classifierCL, coefficients); };
DeviceMatrixCL::Ptr max_cl(const DeviceMatrixCL::Ptr& matrix) { DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(matrix->height, 1); max_cl_local(matrix.get(), out.get()); return out; }
int main(int argc, char* argv[]) { if(argc!=2){ printf("Please provide kernel path as argument!"); exit(1); } kernelPath=argv[1]; const int height = 640; const int width = 480; const int MAX_BIN = 100; float* weights = new float[height * width]; float* assignments = new float[height * width]; for (int i = 0; i < height * width; i++) { weights[i] = float(std::rand()) / RAND_MAX; assignments[i] = float(std::rand() % MAX_BIN); } DeviceMatrixCL::Ptr asCL = makeDeviceMatrixCL(height, width); DeviceMatrixCL_copyFromDevice(*asCL, assignments); DeviceMatrixCL::Ptr wtCL = makeDeviceMatrixCL(height, width); DeviceMatrixCL_copyFromDevice(*wtCL, weights); DeviceMatrixCL3D::Ptr block_histograms = cell_histogram_dense_cl( asCL, wtCL, MAX_BIN, 8, 0, 0, height, width); delete[] weights; delete[] assignments; // cv::Mat exampleImage = cv::imread(exampleImagePath, 0); // // //convert to float // exampleImage.convertTo(exampleImage, CV_32FC1); // // //pull the data // float* f_imData = (float*) exampleImage.data; // // const int height = exampleImage.size().height; // const int width = exampleImage.size().width; // // //create a random filterbank // const int num_filters = 100; // const int filter_dim = 3; // // float* filter_bank = new float[num_filters * filter_dim * filter_dim]; // // for (int i = 0; i < num_filters * filter_dim * filter_dim; i++) // { // filter_bank[i] = float( std::rand() ) / RAND_MAX; // } // // //OPENCL Reference // DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(height, width); // DeviceMatrixCL_copyToDevice(*dmpCL, f_imData); // set_filter_bank_cl(filter_bank, num_filters * filter_dim * filter_dim); // DeviceMatrixCL3D::Ptr retdm = filter_frame_cl_3(dmpCL, num_filters, 1, FF_OPTYPE_COSINE); // float* retval = new float[height * width * 2]; // DeviceMatrixCL3D_copyFromDevice(*retdm, retval); // // std::ofstream test_out_cl("testcl.out", std::ios_base::out); // for (int j = 0; j < height; j++) // { // for (int i = 0; i < width; i++) // { // test_out_cl << retval[j * width + i] << ", "; // } // // test_out_cl << std::endl; // } // // test_out_cl << std::endl << std::endl << std::endl; // // for (int j = 0; j < height; j++) // { // for (int i = 0; i < width; i++) // { // test_out_cl << retval[height * width + j * width + i] << ", "; // } // test_out_cl << std::endl; // } // test_out_cl.close(); // // delete[] filter_bank; // delete[] retval; // //delete[] retvalCU; // // return 0; }
int main(int argc, char* argv[]) { device_use = 0; if(argc>1) device_use = atoi(argv[1]); static char* exampleImagePath = "..\\..\\..\\media\\kewell1.jpg"; //create a random filterbank const int num_filters = 256; //number of pipeline passes const int num_iters = 125; const int filter_dim = 3; FilterBank fb(filter_dim, num_filters); fb.set_on_device(); Classifier clf(128, 64, 8, 2, num_filters); //load the image on device cv::Mat exampleImage = cv::imread(exampleImagePath, 0); //convert to float exampleImage.convertTo(exampleImage, CV_32FC1); cv::resize(exampleImage, exampleImage, cv::Size(exampleImage.cols, exampleImage.rows)); if(device_use==0) std::cout << "running on CPU" <<std::endl; else std::cout << "running on GPU" <<std::endl; std::cout << "Image dimensions:" << exampleImage.size().height <<" "<< exampleImage.size().width <<std::endl; //pull the data float* f_imData = (float*) exampleImage.data; DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(exampleImage.size().height, exampleImage.size().width); DeviceMatrixCL_copyToDevice(*dmpCL, f_imData); /* for(int i=0; i<20; i++) { DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL); // tic1= omp_get_wtime(); DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl( ff_im, num_filters, 8, 0, 0, exampleImage.size().height, exampleImage.size().width); // tic2= omp_get_wtime(); DeviceMatrixCL::Ptr result = clf.apply(block_histogram); } */ double tic0, tic1, tic2, tic3; double tim1 = 0.0; double tim2 = 0.0; double tim3 = 0.0; for(int i=0; i<num_iters; i++) { tic0= omp_get_wtime(); DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL); tic1= omp_get_wtime(); tim1 += tic1 - tic0; DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl( ff_im, num_filters, 8, 0, 0, exampleImage.size().height, exampleImage.size().width); tic2= omp_get_wtime(); tim2 += tic2 - tic1; DeviceMatrixCL::Ptr result = clf.apply(block_histogram); TheContext* tc = new TheContext(); clFinish(tc->getMyContext()->cqCommandQueue); tic3 = omp_get_wtime(); tim3 += tic3 - tic2; } std::cout << "full pipeline time: " << tim1 + tim2 + tim3 << std::endl; std::cout << "filter pipeline time: " << tim1 << std::endl; std::cout << "histogram pipeline time: " << tim2 << std::endl; std::cout << "classifier pipeline time: " << tim3 << std::endl; return 0; }