int main(int argc, char* argv[]) { if(argc!=2){ printf("Please provide kernel path as argument!"); exit(1); } kernelPath=argv[1]; const int height = 640; const int width = 480; const int MAX_BIN = 100; float* weights = new float[height * width]; float* assignments = new float[height * width]; for (int i = 0; i < height * width; i++) { weights[i] = float(std::rand()) / RAND_MAX; assignments[i] = float(std::rand() % MAX_BIN); } DeviceMatrixCL::Ptr asCL = makeDeviceMatrixCL(height, width); DeviceMatrixCL_copyFromDevice(*asCL, assignments); DeviceMatrixCL::Ptr wtCL = makeDeviceMatrixCL(height, width); DeviceMatrixCL_copyFromDevice(*wtCL, weights); DeviceMatrixCL3D::Ptr block_histograms = cell_histogram_dense_cl( asCL, wtCL, MAX_BIN, 8, 0, 0, height, width); delete[] weights; delete[] assignments; // cv::Mat exampleImage = cv::imread(exampleImagePath, 0); // // //convert to float // exampleImage.convertTo(exampleImage, CV_32FC1); // // //pull the data // float* f_imData = (float*) exampleImage.data; // // const int height = exampleImage.size().height; // const int width = exampleImage.size().width; // // //create a random filterbank // const int num_filters = 100; // const int filter_dim = 3; // // float* filter_bank = new float[num_filters * filter_dim * filter_dim]; // // for (int i = 0; i < num_filters * filter_dim * filter_dim; i++) // { // filter_bank[i] = float( std::rand() ) / RAND_MAX; // } // // //OPENCL Reference // DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(height, width); // DeviceMatrixCL_copyToDevice(*dmpCL, f_imData); // set_filter_bank_cl(filter_bank, num_filters * filter_dim * filter_dim); // DeviceMatrixCL3D::Ptr retdm = filter_frame_cl_3(dmpCL, num_filters, 1, FF_OPTYPE_COSINE); // float* retval = new float[height * width * 2]; // DeviceMatrixCL3D_copyFromDevice(*retdm, retval); // // std::ofstream test_out_cl("testcl.out", std::ios_base::out); // for (int j = 0; j < height; j++) // { // for (int i = 0; i < width; i++) // { // test_out_cl << retval[j * width + i] << ", "; // } // // test_out_cl << std::endl; // } // // test_out_cl << std::endl << std::endl << std::endl; // // for (int j = 0; j < height; j++) // { // for (int i = 0; i < width; i++) // { // test_out_cl << retval[height * width + j * width + i] << ", "; // } // test_out_cl << std::endl; // } // test_out_cl.close(); // // delete[] filter_bank; // delete[] retval; // //delete[] retvalCU; // // return 0; }
int main(int argc, char* argv[]) { device_use = 0; if(argc>1) device_use = atoi(argv[1]); static char* exampleImagePath = "..\\..\\..\\media\\kewell1.jpg"; //create a random filterbank const int num_filters = 256; //number of pipeline passes const int num_iters = 125; const int filter_dim = 3; FilterBank fb(filter_dim, num_filters); fb.set_on_device(); Classifier clf(128, 64, 8, 2, num_filters); //load the image on device cv::Mat exampleImage = cv::imread(exampleImagePath, 0); //convert to float exampleImage.convertTo(exampleImage, CV_32FC1); cv::resize(exampleImage, exampleImage, cv::Size(exampleImage.cols, exampleImage.rows)); if(device_use==0) std::cout << "running on CPU" <<std::endl; else std::cout << "running on GPU" <<std::endl; std::cout << "Image dimensions:" << exampleImage.size().height <<" "<< exampleImage.size().width <<std::endl; //pull the data float* f_imData = (float*) exampleImage.data; DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(exampleImage.size().height, exampleImage.size().width); DeviceMatrixCL_copyToDevice(*dmpCL, f_imData); /* for(int i=0; i<20; i++) { DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL); // tic1= omp_get_wtime(); DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl( ff_im, num_filters, 8, 0, 0, exampleImage.size().height, exampleImage.size().width); // tic2= omp_get_wtime(); DeviceMatrixCL::Ptr result = clf.apply(block_histogram); } */ double tic0, tic1, tic2, tic3; double tim1 = 0.0; double tim2 = 0.0; double tim3 = 0.0; for(int i=0; i<num_iters; i++) { tic0= omp_get_wtime(); DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL); tic1= omp_get_wtime(); tim1 += tic1 - tic0; DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl( ff_im, num_filters, 8, 0, 0, exampleImage.size().height, exampleImage.size().width); tic2= omp_get_wtime(); tim2 += tic2 - tic1; DeviceMatrixCL::Ptr result = clf.apply(block_histogram); TheContext* tc = new TheContext(); clFinish(tc->getMyContext()->cqCommandQueue); tic3 = omp_get_wtime(); tim3 += tic3 - tic2; } std::cout << "full pipeline time: " << tim1 + tim2 + tim3 << std::endl; std::cout << "filter pipeline time: " << tim1 << std::endl; std::cout << "histogram pipeline time: " << tim2 << std::endl; std::cout << "classifier pipeline time: " << tim3 << std::endl; return 0; }