void CodeManager::substitute() { Q_ASSERT(templateIsSubstitutable()); QString substTxt("/*!POA!*/\n"); QValueList<PinModel*> pins = model_->pins(); for (QValueListIterator<PinModel *> it = pins.begin(); it != pins.end(); ++it) { PinModel *pin = *it; substTxt.append(QString(" np_pio *%1 = (np_pio*) 0x%2;\n") .arg(pin->name()) .arg(pin->address(), 0, 16)); } QString source = sourceCode(); // Note: f*****g QRegExp doesn't work over newlines, so // we've to double-match... - so QRegExp is useless indeed :(( int firstIndex = source.find("/*!POA!*/"); int lastIndex = source.find("/*!AOP!*/"); Q_ASSERT(firstIndex < lastIndex); source.replace(firstIndex, lastIndex - firstIndex, substTxt); model_->setSource(source); saveSource(); }
void LayoutTestController::addUserStyleSheet(JSStringRef source, bool allFrames) { GOwnPtr<gchar> sourceCode(JSStringCopyUTF8CString(source)); DumpRenderTreeSupportGtk::addUserStyleSheet(mainFrame, sourceCode.get(), allFrames); // FIXME: needs more investigation why userscripts/user-style-top-frame-only.html fails when allFrames is false. }
void CodeManager::prependSubstitutionMarkers() { Q_ASSERT(model_->source().isNull()); QString source = sourceCode(); source.prepend("\n/*!POA!*/\n\n/*!AOP!*/\n"); model_->setSource(source); saveSource(); }
void XMLTokenizer::notifyFinished(CachedResource* unusedResource) { ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript); ASSERT(m_pendingScript->accessCount() > 0); ScriptSourceCode sourceCode(m_pendingScript.get()); bool errorOccurred = m_pendingScript->errorOccurred(); m_pendingScript->removeClient(this); m_pendingScript = 0; RefPtr<Element> e = m_scriptElement; m_scriptElement = 0; ScriptElement* scriptElement = toScriptElement(e.get()); ASSERT(scriptElement); if (errorOccurred) scriptElement->dispatchErrorEvent(); else { m_view->frame()->loader()->executeScript(sourceCode); scriptElement->dispatchLoadEvent(); } m_scriptElement = 0; if (!m_requestingScript) resumeParsing(); }
//------------------------------------------------------------------------- void MainWindow::processContent() { QString fileName = getCurrentSourceFile(); if(fileName.isEmpty()) { return; } QPlainTextEdit *currentSourceCode = getCurrentSourceCode(); std::vector<const char*> Args; std::list<std::string> commandLine; QByteArray sourceCode(currentSourceCode->toPlainText().toUtf8()); APP::Application::addCmdArgsFromProject(Args, commandLine); std::string processedFile; std::stringstream diagnostics; APP::Application::processFile(sourceCode.constData(), sourceCode.constData() + sourceCode.size(), QFileInfo(fileName).canonicalFilePath().toUtf8().constData(), Args, processedFile, diagnostics); clearLog(); //diagnostics<<processedFile; log(diagnostics.str()); setTransformedSource(processedFile.c_str()); ui.tabWidget_2->setTabText(0, QFileInfo(fileName).fileName()); }
bool CodeManager::templateIsSubstitutable() { Q_ASSERT(model_->source().isNull()); QString source = sourceCode(); // only one block is allowed! return (source.find("/*!POA!*/") != -1) && (source.find("/*!AOP!*/") != -1); }
/*! Returns true if this QScriptProgram is equal to \a other; otherwise returns false. */ bool QScriptProgram::operator==(const QScriptProgram &other) const { Q_D(const QScriptProgram); if (d == other.d_func()) return true; return (sourceCode() == other.sourceCode()) && (fileName() == other.fileName()) && (firstLineNumber() == other.firstLineNumber()); }
JSValue ScriptController::executeScriptInWorld(DOMWrapperWorld& world, const String& script, bool forceUserGesture) { UserGestureIndicator gestureIndicator(forceUserGesture ? Optional<ProcessingUserGestureState>(ProcessingUserGesture) : Nullopt); ScriptSourceCode sourceCode(script, m_frame.document()->url()); if (!canExecuteScripts(AboutToExecuteScript) || isPaused()) return { }; return evaluateInWorld(sourceCode, world); }
ScriptValue ScriptController::executeScriptInWorld(DOMWrapperWorld* world, const String& script, bool forceUserGesture) { UserGestureIndicator gestureIndicator(forceUserGesture ? DefinitelyProcessingUserGesture : PossiblyProcessingUserGesture); ScriptSourceCode sourceCode(script, m_frame->document()->url()); if (!canExecuteScripts(AboutToExecuteScript) || isPaused()) return ScriptValue(); return evaluateInWorld(sourceCode, world); }
bool UserScript::operator==(const UserScript &other) const { if (isNull() != other.isNull()) return false; if (isNull()) // neither is valid return true; return worldId() == other.worldId() && runsOnSubFrames() == other.runsOnSubFrames() && injectionPoint() == other.injectionPoint() && name() == other.name() && sourceCode() == other.sourceCode(); }
cl::Program * CLM::buildProgram(std::string sourceFile){ // Reading of the source file std::ifstream file(sourceFile.c_str()); std::string sourceCode( std::istreambuf_iterator<char>(file), (std::istreambuf_iterator<char>()) ); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(),sourceCode.length() + 1)); cl::Program * result = new cl::Program(m_context, source); result->build(m_devices); return result; }
void JSModuleRecord::link(ExecState* exec) { VM& vm = exec->vm(); auto scope = DECLARE_THROW_SCOPE(vm); ModuleProgramExecutable* executable = ModuleProgramExecutable::create(exec, sourceCode()); if (!executable) { throwSyntaxError(exec, scope); return; } m_moduleProgramExecutable.set(vm, this, executable); instantiateDeclarations(exec, executable); }
int main(void) { /* Data */ std::string fileName("square.cl"); std::string routineName("square"); int routines = 1; int clCount = CL_COUNT; float clIn[CL_COUNT] = { 2.f }; float clOut[CL_COUNT]; std::ifstream sourceFile(fileName.c_str()); if (sourceFile.fail()) std::cout << "Failed to open OpenCL source file" << std::endl; std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); OCLutil ocl(CL_DEVICE_TYPE_GPU, fileName, "", routineName, routines); ocl.CarregarBuffer(clIn, clCount, 0, 0, false); ocl.CarregarBuffer(clOut, clCount, 0, 1, true); ocl.CarregarInt(clCount, 0, 2); ocl.Exec(0, cl::NDRange(1), cl::NullRange); ocl.LerBuffer(clOut, 1, 1); for (int i = 0; i < clCount; ++i) std::cout << clOut[i] << std::endl; /* Image */ fileName = "imgcpy.cl"; routineName = "imgcpy"; cv::Mat clImgIn = cv::imread("./alaor.jpg"); cv::Mat clImgOut(clImgIn.size(),CV_8UC3); std::ifstream sourceFileImg(fileName.c_str()); if (sourceFileImg.fail()) std::cout << "Failed to open OpenCL source file" << std::endl; std::string sourceCodeImg(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); OCLutil oclImg(CL_DEVICE_TYPE_GPU, fileName, "", routineName, routines); oclImg.CarregarCVMatf(clImgIn, 0, 0, false); oclImg.CarregarCVMatf(clImgOut, 0, 1, true); oclImg.Exec(0, cl::NDRange(clImgIn.cols, clImgIn.rows), cl::NullRange); oclImg.LerBufferImgf(clImgOut, 1); cv::imshow("Output", clImgOut); cv::waitKey(); return EXIT_SUCCESS; }
ScriptValue ScriptController::executeScriptInWorld(DOMWrapperWorld* world, const String& script, bool forceUserGesture) { ScriptSourceCode sourceCode(script, forceUserGesture ? KURL() : m_frame->document()->url()); if (!canExecuteScripts(AboutToExecuteScript) || isPaused()) return ScriptValue(); bool wasInExecuteScript = m_inExecuteScript; m_inExecuteScript = true; ScriptValue result = evaluateInWorld(sourceCode, world); if (!wasInExecuteScript) { m_inExecuteScript = false; Document::updateStyleForAllDocuments(); } return result; }
void CLGLRayCast::ChangeSource(std::string sourcePath){ cl_int error; // Read source file std::ifstream sourceFile(sourcePath.c_str()); std::string sourceCode( std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); // Make program of the source code in the context m_program = cl::Program(m_context, source, &error); if (error != 0) std::cout << "Change of source code failed : error " << error << std::endl; else std::cout << "Change of source code successfull" << std::endl; // Build program for these specific devices m_program.build(m_devices); }
OCLutil::OCLutil(cl_device_type type,std::string arq,std::string buildOptions,std::string nomeRot,int n) { VECTOR_CLASS<cl::Platform> platforms; cl::Platform::get(&platforms); if(platforms.size() == 0){ std::cout<<"No OpenCL platforms were found"<<std::endl; } int platformID = -1; for(unsigned int i = 0; i < platforms.size(); i++) { try { VECTOR_CLASS<cl::Device> devices; platforms[i].getDevices(type, &devices); platformID = i; break; } catch(std::exception e) { std::cout<<"Error ao ler plataforma: "<<std::endl; continue; } } if(platformID == -1){ std::cout<<"No compatible OpenCL platform found"<<std::endl; } cl::Platform platform = platforms[platformID]; std::cout << "Using platform vendor: " << platform.getInfo<CL_PLATFORM_VENDOR>() << std::endl; // Use the preferred platform and create a context cl_context_properties cps[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platform)(), 0 }; try { context = cl::Context(type, cps); } catch(std::exception e) { std::cout<<"Failed to create an OpenCL context!"<<std::endl; } std::string filename = arq; std::ifstream sourceFile(filename.c_str()); if(sourceFile.fail()) std::cout<<"Failed to open OpenCL source file"<<std::endl; std::string sourceCode( std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); // Make program of the source code in the context cl::Program program = cl::Program(context, source); VECTOR_CLASS<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); std::string deviceInfo; cl_ulong memInfo; size_t tam; cl_uint clUnit; int indexDev = 0; int maxU = 0; for (int i = 0; i < devices.size(); ++i) { devices[i].getInfo((cl_device_info) CL_DEVICE_NAME, &deviceInfo); std::cout << "Device info: " << deviceInfo << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_VERSION, &deviceInfo); std::cout << "Versão CL: " << deviceInfo << std::endl; devices[i].getInfo((cl_device_info) CL_DRIVER_VERSION, &deviceInfo); std::cout << "Versão Driver: " << deviceInfo << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_GLOBAL_MEM_SIZE, &memInfo); std::cout << "Memoria Global: " << memInfo << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_LOCAL_MEM_SIZE, &memInfo); std::cout << "Memoria Local: " << memInfo << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_LOCAL_MEM_SIZE, &tam); std::cout << "Max tamanho Work-group: " << tam << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &clUnit); std::cout << "Max dimensao: " << clUnit << std::endl; devices[i].getInfo((cl_device_info) CL_DEVICE_MAX_COMPUTE_UNITS, &clUnit); std::cout << "Unidades CL: " << clUnit << std::endl; std::cout << "*********************************" << std::endl; if((int)clUnit>maxU){ indexDev = i; maxU = (int)clUnit; } } // Build program for these specific devices cl_int error = program.build(devices, buildOptions.c_str()); if(error != 0) { std::cout << "Build log:" << std::endl << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl; } std::cout << "Index Dispositino selecionado: " << indexDev << std::endl; queue = cl::CommandQueue(context, devices[indexDev]); int posi = 0; int posf = 0; for(int i = 0; i < n; i++){ posf = nomeRot.find(",",posi); std::string nomeRoti; if(posf != -1){ nomeRoti = nomeRot.substr(posi,posf-posi); }else{ nomeRoti = nomeRot.substr(posi); } std::cout<<"Nome rotina["<<i<<"]: "<<nomeRoti.data()<<std::endl; rotina.push_back(cl::Kernel(program, nomeRoti.data())); posi = posf + 1; } }
int main(int argc, char **argv) { TS ts; //Time stepper Vec soln; //Holds the solution vector, including all the primitive //variables. DM dmda; //Manages the computational grid and parallelization. int X1Start, X2Start; int X1Size, X2Size; PetscInitialize(&argc, &argv, PETSC_NULL, help); // Create the computational domain. DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, DMDA_STENCIL_STAR, N1, N2, PETSC_DECIDE, PETSC_DECIDE, DOF, NG, PETSC_NULL, PETSC_NULL, &dmda); // When running in parallel, each process computes from // [X1Start, X1Start+X1Size] x [X2Start, X2Start+X2Size] DMDAGetCorners(dmda, &X1Start, &X2Start, NULL, &X1Size, &X2Size, NULL); // Create the solution vector. DMCreateGlobalVector(dmda, &soln); // Create the time stepper and link it to the computational grid and the // residual evaluation function. TSCreate(PETSC_COMM_WORLD, &ts); TSSetDM(ts, dmda); TSSetIFunction(ts, PETSC_NULL, ComputeResidual, NULL); // OpenCL boilerplate code. clErr = cl::Platform::get(&platforms); CheckCLErrors(clErr, "cl::Platform::get"); // Select computation device here. clErr = platforms.at(1).getDevices(CL_DEVICE_TYPE_CPU, &devices); CheckCLErrors(clErr, "cl::Platform::getDevices"); context = cl::Context(devices, NULL, NULL, NULL, &clErr); CheckCLErrors(clErr, "cl::Context::Context"); queue = cl::CommandQueue(context, devices.at(0), 0, &clErr); CheckCLErrors(clErr, "cl::CommandQueue::CommandQueue"); std::ifstream sourceFile("computeresidual.cl"); std::string sourceCode((std::istreambuf_iterator<char>(sourceFile)), std::istreambuf_iterator<char>()); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); program = cl::Program(context, source, &clErr); CheckCLErrors(clErr, "cl::Program::Program"); // Pass in constants to the OpenCL kernel as compiler switches. This is an // efficient way to handle constants such as domain sizes in OpenCL. std::string BuildOptions("\ -D X1_SIZE=" + std::to_string(X1Size) + " -D X2_SIZE=" + std::to_string(X2Size) + " -D TOTAL_X1_SIZE=" + std::to_string(X1Size+2*NG) + " -D TOTAL_X2_SIZE=" + std::to_string(X2Size+2*NG)); // Compile the OpenCL program and extract the kernel. PetscScalar start = std::clock(); clErr = program.build(devices, BuildOptions.c_str(), NULL, NULL); const char *buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>( devices.at(0), &clErr).c_str(); PetscPrintf(PETSC_COMM_WORLD, "%s\n", buildlog); CheckCLErrors(clErr, "cl::Program::build"); PetscScalar end = std::clock(); PetscScalar time = (end - start)/(PetscScalar)CLOCKS_PER_SEC; PetscPrintf(PETSC_COMM_WORLD, "Time taken for kernel compilation = %f\n", time); kernel = cl::Kernel(program, "ComputeResidual", &clErr); CheckCLErrors(clErr, "cl::Kernel::Kernel"); // How much memory is the kernel using? cl_ulong localMemSize = kernel.getWorkGroupInfo<CL_KERNEL_LOCAL_MEM_SIZE>( devices.at(0), &clErr); cl_ulong privateMemSize = kernel.getWorkGroupInfo<CL_KERNEL_PRIVATE_MEM_SIZE>( devices.at(0), &clErr); printf("Local memory used = %llu\n", (unsigned long long)localMemSize); printf("Private memory used = %llu\n", (unsigned long long)privateMemSize); // Set initial conditions. InitialCondition(ts, soln); TSSetSolution(ts, soln); TSSetType(ts, TSTHETA); TSSetFromOptions(ts); // Finally solve! All time stepping options can be controlled from the // command line. TSSolve(ts, soln); // Delete the data structures in the following order. DMDestroy(&dmda); VecDestroy(&soln); TSDestroy(&ts); PetscFinalize(); return(0); }
int main(int argc, char **argv) { // Create the two input vectors const int LIST_SIZE = 1000; int* A = new int[LIST_SIZE]; int* B = new int[LIST_SIZE]; for (int i = 0; i < LIST_SIZE; i++) { A[i] = i; B[i] = LIST_SIZE - i; } try { // Get available platforms std::vector<cl::Platform> all_platforms; cl::Platform::get(&all_platforms); checkErr(all_platforms.size() != 0 ? CL_SUCCESS : !CL_SUCCESS, "cl::Platform::get returned empty platform list"); std::cerr << "Platform number is: " << all_platforms.size() << '\n'; std::string platformVendor; all_platforms[0].getInfo((cl_platform_info)CL_PLATFORM_VENDOR, &platformVendor); std::cerr << "Platform is by: " << platformVendor << '\n'; cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(all_platforms[0])(), 0 }; cl::Platform default_platform = all_platforms[0]; std::cout << "Using platform: " << default_platform.getInfo<CL_PLATFORM_NAME>() << '\n'; //get default device of the default platform std::vector<cl::Device> all_devices; default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); checkErr(all_devices.size() != 0 ? CL_SUCCESS : !CL_SUCCESS, "No devices found. Check OpenCL installation"); for (auto& device : all_devices) { std::cout << "Available device: " << device.getInfo<CL_DEVICE_NAME>() << '\n'; } cl::Device default_device = all_devices[0]; std::cout << "Using first device: " << default_device.getInfo<CL_DEVICE_NAME>() << '\n'; cl::Context context({ default_device }); //cl_int err; //cl::Context context(CL_DEVICE_TYPE_CPU, cprops, NULL, NULL, &err); //checkErr(err, "Conext::Context() creation failed"); // Get a list of devices on this platform std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); // Create a command queue and use the first device cl::CommandQueue queue = cl::CommandQueue(context, devices[0]); // Read source file std::ifstream sourceFile("vector_add_kernel.cl"); std::string sourceCode( std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length() + 1)); // Make program of the source code in the context cl::Program program = cl::Program(context, source); // Build program for these specific devices program.build(devices); // Make kernel cl::Kernel kernel(program, "vector_add"); // Create memory buffers cl::Buffer bufferA = cl::Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int)); cl::Buffer bufferB = cl::Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int)); cl::Buffer bufferC = cl::Buffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(int)); // Copy lists A and B to the memory buffers queue.enqueueWriteBuffer(bufferA, CL_TRUE, 0, LIST_SIZE * sizeof(int), A); queue.enqueueWriteBuffer(bufferB, CL_TRUE, 0, LIST_SIZE * sizeof(int), B); // Set arguments to kernel kernel.setArg(0, bufferA); kernel.setArg(1, bufferB); kernel.setArg(2, bufferC); // Run the kernel on specific ND range cl::NDRange global(LIST_SIZE); cl::NDRange local(1); queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local); // Read buffer C into a local list int* C = new int[LIST_SIZE]; queue.enqueueReadBuffer(bufferC, CL_TRUE, 0, LIST_SIZE * sizeof(int), C); for (int i = 0; i < LIST_SIZE; i++) std::cout << A[i] << " + " << B[i] << " = " << C[i] << '\n'; } catch (cl::Error error) { std::cout << error.what() << "(" << error.err() << ")" << std::endl; } return 0; }
void TestRunner::addUserScript(JSStringRef source, bool runAtStart, bool allFrames) { GUniquePtr<gchar> sourceCode(JSStringCopyUTF8CString(source)); DumpRenderTreeSupportGtk::addUserScript(mainFrame, sourceCode.get(), runAtStart, allFrames); }
//Main Program int main() { try { std::cout<<CL_DEVICE_MAX_MEM_ALLOC_SIZE<<std::endl; //const unsigned int size_t k=4; //number of clusters to find size_t n=256*1000000; //1024; //number of data points (MUST BE MULTIPLE OF 256) size_t d=2; //dimensionality of each data point i.e. data[n][d] or n vectors of length d float *data=new float[n*d]; float *centroid=new float[k*d]; //float *dist2=new float[n*k]; //distance squared from each centroid int *clusterI=new int[n]; //index of closest cluster centroid for each point (i.e. which cluster does point belong to?) //make some random data //std::srand((unsigned int)std::time(0)); std::srand(123456); //pick a fixed seed for consistency for (int i=0; i<n*d; i++) { data[i]=(float)std::rand()/(float)RAND_MAX; //std::cout<<"data="<<data[i]<<std::endl; } //pick initial cluster points - use first k points in the data for now - need to check Witten for what the best practice is for (int i=0; i<k*d; i++) { centroid[i]=data[i]; //this is really obtuse - both arrays laid out the same way, so only have to copy k vectors of length d //std::cout<<"centroid="<<centroid[i]<<std::endl; } //OpenCL part //query for platforms cl::vector<cl::Platform> platforms; cl::Platform::get(&platforms); //get a list of devices on this platform cl::vector<cl::Device> devices; platforms[0].getDevices(CL_DEVICE_TYPE_GPU,&devices); //create a context for the devices cl::Context context(devices); //create a command queue for the first device cl::CommandQueue queue = cl::CommandQueue(context,devices[0],CL_QUEUE_PROFILING_ENABLE); //PROFILING ENABLED //create memory buffers cl::Buffer bufferD=cl::Buffer(context,CL_MEM_READ_ONLY,n*d*sizeof(float)); //data buffer cl::Buffer bufferC=cl::Buffer(context,CL_MEM_READ_ONLY,k*d*sizeof(float)); //centroid buffer //cl::Buffer bufferDS=cl::Buffer(context,CL_MEM_WRITE_ONLY,n*k*sizeof(float)); //distance squared from centroids cl::Buffer bufferClusterI=cl::Buffer(context,CL_MEM_WRITE_ONLY,n*sizeof(int)); //index of closest cluster centroid //copy the input data to the input buffers using the command queue for the first device queue.enqueueWriteBuffer(bufferD,CL_TRUE,0,n*d*sizeof(float),data); queue.enqueueWriteBuffer(bufferC,CL_TRUE,0,d*k*sizeof(float),centroid); //read the program source std::ifstream sourceFile("kmeans_kernel.cl"); std::string sourceCode(std::istreambuf_iterator<char>(sourceFile),(std::istreambuf_iterator<char>())); cl::Program::Sources source(1,std::make_pair(sourceCode.c_str(),sourceCode.length()+1)); //make program from source code cl::Program program=cl::Program(context,source); //build the program for the devices program.build(devices); //make kernel //cl::Kernel vecadd_kernel(program,"kmeans"); cl::Kernel vecadd_kernel(program,"kmeans2"); //set the kernel arguments vecadd_kernel.setArg(0,bufferD); vecadd_kernel.setArg(1,bufferC); //vecadd_kernel.setArg(2,bufferDS); //kmeans vecadd_kernel.setArg(2,bufferClusterI); //kmeans2 vecadd_kernel.setArg(3,n); vecadd_kernel.setArg(4,d); vecadd_kernel.setArg(5,k); //execute the kernel cl::NDRange global(n); cl::NDRange local(256); cl::Event timing_event; //perf //cl_int err_code; //perf queue.enqueueNDRangeKernel(vecadd_kernel,cl::NullRange,global,local,NULL,&timing_event); queue.finish(); cl_ulong gpu_starttime; cl_ulong gpu_endtime; gpu_starttime = timing_event.getProfilingInfo<CL_PROFILING_COMMAND_START>(); gpu_endtime = timing_event.getProfilingInfo<CL_PROFILING_COMMAND_END>(); double gpu_ms = 1e-6 * (gpu_endtime-gpu_starttime); //not sure where the 1e-6 came from, but AMD used 1e-9 for seconds std::cout<<"GPU kmeans time="<<gpu_ms<<" milliseconds"<<std::endl; //copy the output data back to the host //queue.enqueueReadBuffer(bufferDS,CL_TRUE,0,n*k*sizeof(float),dist2); //kmeans queue.enqueueReadBuffer(bufferClusterI,CL_TRUE,0,n*sizeof(int),clusterI); //kmeans2 //check the output - kmeans //for (int i=0; i<n; i++) { //loop through all data lines // std::cout<<"dist2 i="<<i<<" : "; // for (int c=0; c<k; c++) { //loop through all centroids and compare this line to each // float sum=0; // for (int j=0; j<d; j++) { //loop through all values on each data line (dimensionality of data points) // sum+=pow(data[i*d+j]-centroid[c*d+j],2); // } // float gpu_value = dist2[i*k+c]; // float error = gpu_value-sum; //error between this data point and centroid c location // std::cout<<error<<" "; // } // std::cout<<std::endl; //} //Do a CPU version of kmeans to check the GPU data against int *cpu_clusterI=new int[n]; LARGE_INTEGER frequency,counter1,counter2; QueryPerformanceFrequency(&frequency); //returns counts per second QueryPerformanceCounter(&counter1); kmeans_cpu(data,centroid,cpu_clusterI,n,d,k); QueryPerformanceCounter(&counter2); float t_ms=((float)(counter2.LowPart-counter1.LowPart))/(float)(frequency.LowPart)*1000; //milliseconds std::cout<<"CPU kmeans time="<<t_ms<<" milliseconds"<<std::endl; //check output - kmeans2 bool result=true; for (int i=0; i<n; i++) { //loop through all data lines int gpu_index = clusterI[i]; //cluster index as calculated by the gpu int cpu_index = cpu_clusterI[i]; //cluster index as calculated by the cpu //std::cout<<"output i="<<i<<" : "<<gpu_index<<" "<<cpu_index<<std::endl; if (gpu_index!=cpu_index) { std::cout<<"Failed: "<<"output i="<<i<<" : "<<gpu_index<<" "<<cpu_index<<std::endl; result=false; break; } } if (result) std::cout<<"Success"<<std::endl; //and don't forget to clean up here delete [] data; delete [] centroid; //delete [] dist2; delete [] clusterI; delete [] cpu_clusterI; } catch (cl::Error error) { std::cout<<error.what()<<"("<<error.err()<<")"<<std::endl; } return 0; }