int main(void) { /* Data */ std::string fileName("square.cl"); std::string routineName("square"); int routines = 1; int clCount = CL_COUNT; float clIn[CL_COUNT] = { 2.f }; float clOut[CL_COUNT]; std::ifstream sourceFile(fileName.c_str()); if (sourceFile.fail()) std::cout << "Failed to open OpenCL source file" << std::endl; std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); OCLutil ocl(CL_DEVICE_TYPE_GPU, fileName, "", routineName, routines); ocl.CarregarBuffer(clIn, clCount, 0, 0, false); ocl.CarregarBuffer(clOut, clCount, 0, 1, true); ocl.CarregarInt(clCount, 0, 2); ocl.Exec(0, cl::NDRange(1), cl::NullRange); ocl.LerBuffer(clOut, 1, 1); for (int i = 0; i < clCount; ++i) std::cout << clOut[i] << std::endl; /* Image */ fileName = "imgcpy.cl"; routineName = "imgcpy"; cv::Mat clImgIn = cv::imread("./alaor.jpg"); cv::Mat clImgOut(clImgIn.size(),CV_8UC3); std::ifstream sourceFileImg(fileName.c_str()); if (sourceFileImg.fail()) std::cout << "Failed to open OpenCL source file" << std::endl; std::string sourceCodeImg(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); OCLutil oclImg(CL_DEVICE_TYPE_GPU, fileName, "", routineName, routines); oclImg.CarregarCVMatf(clImgIn, 0, 0, false); oclImg.CarregarCVMatf(clImgOut, 0, 1, true); oclImg.Exec(0, cl::NDRange(clImgIn.cols, clImgIn.rows), cl::NullRange); oclImg.LerBufferImgf(clImgOut, 1); cv::imshow("Output", clImgOut); cv::waitKey(); return EXIT_SUCCESS; }
void computeMIonGPU(SequenceSet& sequence, Matrix<float>& MI, bool GPU) { // initializes context and kernel and stores them OCL ocl(GPU); cl_int oclError1, oclError2; timeval start, end; // memory sizes size_t sequenceLength = sequence.getSequenceLength(); size_t numSequences = sequence.getNumberOfSequences(); // matrix MI is of size numElements size_t numElements = sequenceLength * sequenceLength; size_t sequenceSize = sequence.getNumberOfSequences() * sequenceLength; size_t onePointProbsSize = sequenceLength * NUMPROTEINCHARS; // host memory float * dst = new float[MI.size()]; memset(dst, 0, MI.size()); // device memory for sequences, one point probablities and resulting matrix cl_mem oclDevSrcSequence, oclDevSrcOnePointProbs, oclDevDstMI; // size for a work group: each workgroup computes one matrix entry, thus computes the correlation // one time for each character => 25 work items are sufficient size_t localWorkSize[2] = { 5, 5 }; if (sequenceLength % localWorkSize[0] != 0) throw std::runtime_error("sequence length ^ 2 not divisable by local work size"); // global work size defines the total amount of threads over all work group, thus needs to be a multiple of the local // work size in each dimension. size_t globalWorkSize[2] = { sequenceLength, sequenceLength }; // create buffer on device, one for each input array oclDevSrcSequence = clCreateBuffer( ocl.oclContext, CL_MEM_READ_ONLY, sizeof(cl_uchar) * sequenceSize, 0, &oclError1); oclDevSrcOnePointProbs = clCreateBuffer(ocl.oclContext, CL_MEM_READ_ONLY, sizeof(cl_float) * onePointProbsSize, 0, &oclError2); oclError1 |= oclError2; oclDevDstMI = clCreateBuffer( ocl.oclContext, CL_MEM_WRITE_ONLY, sizeof(cl_float) * numElements, 0, &oclError2); oclError1 |= oclError2; if (oclError1 != CL_SUCCESS) { std::cout << "error while allocating buffers" << std::endl; exit(1); } // set buffer to appropriate kernel arguments oclError1 = clSetKernelArg(ocl.oclKernel, 0, sizeof(cl_mem), (void*)&oclDevSrcSequence); oclError1 |= clSetKernelArg(ocl.oclKernel, 1, sizeof(cl_mem), (void*)&oclDevSrcOnePointProbs); oclError1 |= clSetKernelArg(ocl.oclKernel, 2, sizeof(cl_mem), (void*)&oclDevDstMI); oclError1 |= clSetKernelArg(ocl.oclKernel, 3, sizeof(cl_uint), &sequenceLength); oclError1 |= clSetKernelArg(ocl.oclKernel, 4, sizeof(cl_uint), &numSequences); if (oclError1 != CL_SUCCESS) { std::cout << "error while setting arguments: " << ocl.oclErrorString(oclError1) << std::endl; exit(1); } // copy host memory to device, non-blocking copy oclError1 = clEnqueueWriteBuffer( ocl.oclCmdQueue, oclDevSrcSequence, CL_FALSE, 0, sizeof(cl_uchar) * sequenceSize, (const void *) sequence.getData(), 0, 0, 0); oclError1 |= clEnqueueWriteBuffer( ocl.oclCmdQueue, oclDevSrcOnePointProbs, CL_FALSE, 0, sizeof(cl_float) * onePointProbsSize, (const void *) sequence.getOnePointProbs(), 0, 0, 0); if (oclError1 != CL_SUCCESS) { std::cout << "error while writing to device " << ocl.oclErrorString(oclError1) << std::endl; exit(1); } // execute kernel LOOPCOUNT times and measure execution time // TODO LOOPCOUNT aendern, um Kernel mehrfach auszufuehren gettimeofday(&start, 0); for (int i = 0; i < LOOPCOUNT; ++i) { oclError1 = clEnqueueNDRangeKernel( ocl.oclCmdQueue, ocl.oclKernel, 2, // dimension 0, globalWorkSize, localWorkSize, 0, 0, 0); if (oclError1 != CL_SUCCESS) { std::cout << "error while executing kernel: " << ocl.oclErrorString(oclError1) << std::endl; exit(1); } } // clFinish blocks until all issued commands so far are completed, necessary for computing execution time oclError1 = clFinish(ocl.oclCmdQueue); gettimeofday(&end, 0); // read memory from device, store in temporary array and if no error happend copy to result matrix oclError1 = clEnqueueReadBuffer( ocl.oclCmdQueue, oclDevDstMI, CL_TRUE, 0, sizeof(cl_float) * numElements, dst, 0, 0, 0); if (oclError1 != CL_SUCCESS) { std::cout << "error while reading from device: " << ocl.oclErrorString(oclError1) << std::endl; exit(1); } std::cout << "execution time: " << (end.tv_sec - start.tv_sec ) * 1000 + ( end.tv_usec - start.tv_usec) / 1000 << " milliseconds" << std::endl; // fill the matrix with the computed results MI.copyElements(dst); // release used memory, can cause really bad crashes otherwise clReleaseMemObject(oclDevSrcSequence); clReleaseMemObject(oclDevSrcOnePointProbs); clReleaseMemObject(oclDevDstMI); }
/** * Init GUI and load existing quick config */ quickconf::quickconf() { int x; for (x=0; x<8; x++) { botfiles[x] = ""; shownames[x] = new QLabel (this); shownames[x]->setGeometry (10,50+x*25,190,20); shownames[x]->show(); team[x] = new QComboBox (this); team[x]->setGeometry (215,50+x*25,40,20); team[x]->insertItem ("1"); team[x]->insertItem ("2"); team[x]->insertItem ("3"); team[x]->insertItem ("4"); } // press[0] = new PixButton( "load",1,this ); // press[0]->setGeometry( 0,0,80,40 ); // press[1] = new PixButton( "remove",1,this ); // press[1]->setGeometry( 85,0,80,40 ); ifteams = new QCheckBox ("Teams",this); ifteams->setGeometry (170,10,70,20); QObject::connect (press[0],SIGNAL (clicked()),this, SLOT (choosefile())); QObject::connect (press[1],SIGNAL (clicked()),this, SLOT (dechoosefile())); readyb = new PixButton ("write file",1,this); readyb->setGeometry (100,500,80,40); cancelb = new PixButton ("Exit",1,this); cancelb->setGeometry (200,500,80,40); QObject::connect (readyb,SIGNAL (clicked()),this,SLOT (ocl())); QObject::connect (cancelb,SIGNAL (clicked()),this,SLOT (ccl())); tnumfights = new QLabel ("Number of fights:",this); tnumfights->setGeometry (10,300,100,20); wnumfights = new QLineEdit (this); wnumfights->setGeometry (120,300,40,20); numfix = new QIntValidator (this); wnumfights->setValidator (numfix); lengthfight = new QLabel ("Max length of fight ( 50 ~ 1sec ):",this); lengthfight->setGeometry (10,320,200,20); length = new QLineEdit (this); length->setGeometry (200,320,60,20); maxxinfo = new QLabel ("The xsize of the battlearea: ",this); maxxinfo->setGeometry (10,350,200,20); maxx = new QSpinBox (8192,65535,512,this); maxx->setGeometry (210,350,80,30); maxx->setValue (32768); maxyinfo = new QLabel ("The ysize of the battlearea: ",this); maxyinfo->setGeometry (10,380,200,20); maxy = new QSpinBox (8192,65535,512,this); maxy->setGeometry (210,380,80,30); maxy->setValue (32768); QString temp = QDir::homeDirPath(); temp += "/droidbattles/quick.conf"; QFile f (temp); if (f.exists() && f.open (QIODevice::ReadOnly)) { Q3TextStream s (&f); for (int x=0; x<8; x++) { s >> botfiles[x]; s >> temp; if (botfiles[x] == QString ("fff")) botfiles[x] = ""; team[x]->setCurrentItem (temp.toInt()); shownames[x]->setText (botfiles[x]); } s >> temp; ifteams->setChecked (temp.toInt()); s >> temp; wnumfights->setText (temp); s >> temp; length->setText (temp); s >> temp; maxx->setValue (temp.toInt()); s >> temp; maxy->setValue (temp.toInt()); f.close(); }