static void writeElementsKEY(FILE *fp, GEntity *ge, std::vector<T *> &elements, bool saveAll) { if(elements.size() && (saveAll || ge->physicals.size())) { const char *typ = elements[0]->getStringForKEY(); int pid = partID(ge->dim(), ge->tag()); if(typ) { fprintf(fp, "*ELEMENT%s\n$#SET_ELEMENT=%s%d\n", typ, physicalName(ge->model(), ge->dim(), ge->tag()).c_str(), ge->tag()); for(std::size_t i = 0; i < elements.size(); i++) elements[i]->writeKEY(fp, pid, elements[i]->getNum()); } } }
void RTSpheres::init(const std::tr1::shared_ptr<magnet::thread::TaskQueue>& systemQueue) { RTriangles::init(systemQueue); magnet::GL::Context& context = magnet::GL::Context::getContext(); //Build the sort functor now so we can grab the padding sortFunctor.build(context.getCLCommandQueue(), context.getCLContext()); //We must pad the sort data out to a multiple of sortFunctor.padding() cl_uint padding = std::max(sortFunctor.padding(), size_t(1024)); cl_uint paddedN = ((_N + padding - 1) / padding) * padding; { _spherePositions = cl::Buffer(context.getCLContext(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, sizeof(cl_float4) * _N); _sortKeys = cl::Buffer(context.getCLContext(), CL_MEM_READ_WRITE, sizeof(cl_float) * paddedN); _sortData = cl::Buffer(context.getCLContext(), CL_MEM_READ_WRITE, sizeof(cl_uint) * paddedN); _sphereColors = cl::Buffer(context.getCLContext(), CL_MEM_READ_ONLY, sizeof(cl_uchar4) * paddedN); cl_float4* Pos = (cl_float4*)context.getCLCommandQueue().enqueueMapBuffer(_spherePositions, true, CL_MAP_WRITE, 0, _N * sizeof(cl_float4)); const float density = 0.1; cl_float particleDiam = std::pow(1 * density / _N, float(1.0 / 3.0)); //Generates positions on a simple cubic lattice for (size_t partID(0); partID < _N; ++partID) { Pos[partID].x = ((1.0 * rand()) / RAND_MAX) - 0.5; Pos[partID].y = ((1.0 * rand()) / RAND_MAX) - 0.5; Pos[partID].z = ((1.0 * rand()) / RAND_MAX) - 0.5; Pos[partID].w = particleDiam * 0.5; } //Start copying this data to the graphics card context.getCLCommandQueue().enqueueUnmapMemObject(_spherePositions, (void*)Pos); } {//Setup initial vertex positions size_t nVertice = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) nVertice += iPtr->_type.getVertexCount() * iPtr->_nSpheres; std::vector<float> VertexPos(3 * nVertice, 0.0); setGLPositions(VertexPos); } {//Setup inital normal vectors size_t nNormals = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) nNormals += iPtr->_type.getVertexCount() * iPtr->_nSpheres; std::vector<float> VertexNormals(3 * nNormals, 0.0); nNormals = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) { for (size_t i = 0; i < iPtr->_nSpheres; ++i) for (int j = 0; j < 3 * iPtr->_type.getVertexCount(); ++j) VertexNormals[nNormals + 3 * iPtr->_type.getVertexCount() * i + j] = iPtr->_type.getVertices()[j]; nNormals += 3 * iPtr->_nSpheres * iPtr->_type.getVertexCount(); } setGLNormals(VertexNormals); } {//Setup initial Colors size_t nColors = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) nColors += iPtr->_type.getVertexCount() * iPtr->_nSpheres; std::vector<GLubyte> VertexColor(nColors * 4); for (size_t icol = 0; icol < nColors; ++icol) { VertexColor[icol * 4 + 0] = 255; VertexColor[icol * 4 + 1] = 255; VertexColor[icol * 4 + 2] = 255; VertexColor[icol * 4 + 3] = 255; } setGLColors(VertexColor); } {//Setup initial element data size_t nElements = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) nElements += 3 * iPtr->_type.getFaceCount() * iPtr->_nSpheres; std::vector<GLuint> ElementData(nElements, 0.0); nElements = 0; size_t nSphereVertices = 0; for (std::vector<SphereDetails>::const_iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) { for (size_t i = 0; i < iPtr->_nSpheres; ++i) for (int j = 0; j < 3 * iPtr->_type.getFaceCount(); ++j) ElementData[nElements + 3 * iPtr->_type.getFaceCount() * i + j] = i * iPtr->_type.getVertexCount() + iPtr->_type.getFaces()[j] + nSphereVertices; nSphereVertices += iPtr->_type.getVertexCount() * iPtr->_nSpheres; nElements += 3 * iPtr->_type.getFaceCount() * iPtr->_nSpheres; } setGLElements(ElementData); } std::stringstream fullSource; //It is ideal if the workgroup size divides by 3(coords), 64 //(warp/wave) AND the number of vertices per particle (not so important) //An Icosahedron, of order 0 (12), fits exactly into //3x32x2=192=12x16 _workgroupsize = 2*32*3; _globalsize = _workgroupsize * (std::min((_N +_workgroupsize-1) / _workgroupsize, _workgroupsize*(9216 / _workgroupsize))); fullSource << "#define WORKGROUP_SIZE " << _workgroupsize << "\n"; fullSource << sphereKernelSource; //Need to make the c_str() point to a valid data area, so copy the string std::string finalSource = fullSource.str(); cl::Program::Sources kernelSource; kernelSource.push_back(std::pair<const char*, ::size_t> (finalSource.c_str(), finalSource.size())); _program = cl::Program(context.getCLCommandQueue().getInfo<CL_QUEUE_CONTEXT>(), kernelSource); std::string buildOptions; cl::Device clDevice = context.getCLCommandQueue().getInfo<CL_QUEUE_DEVICE>(); try { _program.build(std::vector<cl::Device>(1, clDevice), buildOptions.c_str()); } catch(cl::Error& err) { std::string msg = _program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(context.getCLDevice()); std::cout << "Compilation failed for device " << context.getCLDevice().getInfo<CL_DEVICE_NAME>() << "\nBuild Log:" << msg; throw; } _renderKernel = cl::Kernel(_program, "SphereRenderKernel"); _sortDataKernel = cl::Kernel(_program, "GenerateData"); _colorKernel = cl::Kernel(_program, "SphereColorKernel"); _pickingKernel = cl::Kernel(_program, "SpherePickingKernel"); _sortDataKernelFunc = _sortDataKernel.bind(context.getCLCommandQueue(), cl::NDRange(paddedN), cl::NDRange(256)); _renderKernelFunc = _renderKernel.bind(context.getCLCommandQueue(), cl::NDRange(_globalsize), cl::NDRange(_workgroupsize)); _pickingKernelFunc = _pickingKernel.bind(context.getCLCommandQueue(), cl::NDRange(_globalsize), cl::NDRange(_workgroupsize)); for (std::vector<SphereDetails>::iterator iPtr = _renderDetailLevels.begin(); iPtr != _renderDetailLevels.end(); ++iPtr) iPtr->setupCLBuffers(context); }