void init() { mgr=new TheoraVideoManager(); clip=mgr->createVideoClip(new TheoraMemoryFileDataSource("media/bunny" + resourceExtension), TH_RGB, 4); clip->setAutoRestart(1); tex_id=createTexture(nextPow2(clip->getWidth()), nextPow2(clip->getHeight())); }
void ZBitmapDesc::initSP2 ( int _w, int _h, int _d, char *_bits ) { clear(); w = _w; h = _h; d = _d; memW = max( nextPow2(w), nextPow2(h) ); memH = max( nextPow2(w), nextPow2(h) ); bits = _bits; }
void CudaNarrowphase::computeContacts(CUDABuffer * overlappingPairBuf, unsigned numOverlappingPairs) { if(numOverlappingPairs < 1) return; m_numPairs = numOverlappingPairs; m_coord->create(nextPow2(numOverlappingPairs * 16)); m_contact[0]->create(nextPow2(numOverlappingPairs * 48)); m_contact[1]->create(nextPow2(numOverlappingPairs * 48)); void * overlappingPairs = overlappingPairBuf->bufferOnDevice(); computeTimeOfImpact(overlappingPairs, numOverlappingPairs); squeezeContacts(overlappingPairs, numOverlappingPairs); }
void init() { mgr=new TheoraVideoManager(); iface_factory=new OpenAL_AudioInterfaceFactory(); mgr->setAudioInterfaceFactory(iface_factory); clip=mgr->createVideoClip("media/bunny.ogg"); // use this if you want to preload the file into ram and stream from there // clip=mgr->createVideoClip(new TheoraMemoryFileDataSource("../media/short.ogg"),TH_RGB); clip->setAutoRestart(1); tex_id=createTexture(nextPow2(clip->getWidth()),nextPow2(clip->getHeight())); }
cl_int ComputeLocalSplits_p(cl::Buffer &internalBRTNodes, cl::Buffer &localSplits, cl_int size) { startBenchmark("ComputeLocalSplits_p"); cl_int globalSize = nextPow2(size); cl::Kernel &kernel = CLFW::Kernels["ComputeLocalSplitsKernel"]; cl::CommandQueue &queue = CLFW::DefaultQueue; bool isOld; cl::Buffer zeroBuffer; cl_int error = CLFW::get(localSplits, "localSplits", sizeof(cl_int) * globalSize); error |= CLFW::get(zeroBuffer, "zeroBuffer", sizeof(cl_int) * globalSize, isOld); //Fill any new zero buffers with zero. Then initialize localSplits with zero. if (!isOld) { cl_int zero = 0; error |= queue.enqueueFillBuffer<cl_int>(zeroBuffer, { zero }, 0, sizeof(cl_int) * globalSize); } error |= queue.enqueueCopyBuffer(zeroBuffer, localSplits, 0, 0, sizeof(cl_int) * globalSize); error |= kernel.setArg(0, localSplits); error |= kernel.setArg(1, internalBRTNodes); error |= kernel.setArg(2, size); error = queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(globalSize), cl::NullRange); stopBenchmark(); return error; }
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum::Init(int n) { int err = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); COpenCL::CheckOCLError("Could not create parallel sum temporary buffer.", err); } }
cl_int RadixSortBigUnsigned(cl::Buffer &input, cl_int size, cl_int mbits) { cl_int error = 0; const size_t globalSize = nextPow2(size); cl::Buffer predicate, address, bigUnsignedTemp, temp; error |= CLFW::get(address, "address", sizeof(cl_int)*(globalSize)); error |= CLFW::get(bigUnsignedTemp, "bigUnsignedTemp", sizeof(BigUnsigned)*globalSize); if (error != CL_SUCCESS) return error; //For each bit startBenchmark("RadixSortBigUnsigned"); for (unsigned int index = 0; index < mbits; index++) { //Predicate the 0's and 1's error |= BitPredicate(input, predicate, index, 0, globalSize); //Scan the predication buffers. error |= StreamScan_p(predicate, address, globalSize); //Compacting error |= DoubleCompact(input, bigUnsignedTemp, predicate, address, globalSize); //Swap result with input. temp = input; input = bigUnsignedTemp; bigUnsignedTemp = temp; } stopBenchmark(); return error; }
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum_NVidia::Init(int n) { int status = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 in which kernel fails to compute sums for N = [33 - 64] // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } if(mTempBuffer2 == NULL) { mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } }
void CRoutine_Sum::getNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int &blocks, int &threads) { if (whichKernel < 3) { threads = (n < maxThreads) ? nextPow2(n) : maxThreads; blocks = (n + threads - 1) / threads; } else { threads = (n < maxThreads*2) ? nextPow2((n + 1)/ 2) : maxThreads; blocks = (n + (threads * 2 - 1)) / (threads * 2); } if (whichKernel == 6) blocks = min(maxBlocks, blocks); }
void init() { printf("---\nUSAGE: press buttons 1,2,3 or 4 to change the number of worker threads\n---\n"); std::string files[] = {"media/bunny" + resourceExtension, "media/konqi" + resourceExtension, "media/room" + resourceExtension, "media/titan" + resourceExtension}; mgr=new TheoraVideoManager(4); mgr->setDefaultNumPrecachedFrames(16); for (int i=0;i<4;i++) { clips[i]=mgr->createVideoClip(new TheoraMemoryFileDataSource(files[i]), outputMode); clips[i]->setAutoRestart(1); textures[i]=createTexture(nextPow2(clips[i]->getWidth()),nextPow2(clips[i]->getHeight()), textureFormat); } }
void ZBitmapDesc::initP2W( int _w, int _h, int _d, char *_bits ) { clear(); w = _w; h = _h; d = _d; memW = nextPow2(w); memH = h; bits = _bits; }
cl_int UploadPoints(const vector<intn> &points, cl::Buffer &pointsBuffer) { startBenchmark("Uploading points"); cl_int error = 0; cl_int roundSize = nextPow2(points.size()); error |= CLFW::get(pointsBuffer, "pointsBuffer", sizeof(intn)*roundSize); error |= CLFW::DefaultQueue.enqueueWriteBuffer(pointsBuffer, CL_TRUE, 0, sizeof(cl_int2) * points.size(), points.data()); stopBenchmark(); return error; }
void CRoutine_Sum_NVidia::getNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int &blocks, int &threads) { threads = (n < maxThreads*2) ? nextPow2((n + 1)/ 2) : maxThreads; blocks = (n + (threads * 2 - 1)) / (threads * 2); if (whichKernel == 6) blocks = min(maxBlocks, blocks); }
/** * Evaluate a row of the gram matrix * @param d_xtraindata device pointer to the training set * @param d_dottraindata device pointer to the array containing the dot product of the row with itself * @param d_kernelrow device pointer that will store the array extracted from d_xtraindata. * @param d_kerneldot device pointer that will store the result of the kernel evaluation * @param d_kdata device pointer to the matrix that stores the cached values * @param gid index that points to the point in d_xtraindata to be calculated * @param cacheid index that points to the location in cache that will keep the results * @param ntraining number of training samples in the training set * @param nfeatures number of features in the training samples * @param beta value of the parameter of the RBF kernel * @param a if using polynomial or sigmoid kernel the value of a x_i x_j * @param b if using polynomial or sigmoid kernel the value of b * @param d if using polynomial kernel * @param kernelcode code that indicates the kernel type to run */ void kerneleval ( float* d_xtraindata, float* d_dottraindata, float* d_kernelrow, float* d_kerneldot, float* d_kdata, int gid, int cacheid, int ntraining, int nfeatures, float beta, float a, float b, float d, int kernelcode) { int numThreads = (nfeatures < MAXTHREADS*2) ? nextPow2((nfeatures + 1)/ 2) : MAXTHREADS; int numBlocks = (nfeatures + (numThreads * 2 - 1)) / (numThreads * 2); int numBlocksRed = min(MAXBLOCKS, numBlocks); dim3 dimBlockKernelRow(numThreads, 1, 1); dim3 dimGridKernelRow(numBlocksRed, 1, 1); int smemSize = 0; bool isNtrainingPow2=isPow2(nfeatures); if(isNtrainingPow2) { switch (numThreads) { case 512: ExtractKernelRow <512,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 256: ExtractKernelRow <256,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 128: ExtractKernelRow <128,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 64: ExtractKernelRow <64,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 32: ExtractKernelRow <32,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 16: ExtractKernelRow <16,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 8: ExtractKernelRow <8,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 4: ExtractKernelRow <4,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 2: ExtractKernelRow <2,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 1: ExtractKernelRow <1,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; } } else { switch (numThreads)
//////////////////////////////////////////////////////////////////////////////// // Compute the number of threads and blocks to use for the given reduction kernel // For the kernels >= 3, we set threads / block to the minimum of maxThreads and // n/2. For kernels < 3, we set to the minimum of maxThreads and n. For kernel // 6, we observe the maximum specified number of blocks, because each thread in // that kernel can process a variable number of elements. //////////////////////////////////////////////////////////////////////////////// void getNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int &blocks, int &threads) { //get device capability, to avoid block/grid size exceed the upper bound cudaDeviceProp prop; int device; checkCudaErrors(cudaGetDevice(&device)); checkCudaErrors(cudaGetDeviceProperties(&prop, device)); if (whichKernel < 3) { threads = (n < maxThreads) ? nextPow2(n) : maxThreads; blocks = (n + threads - 1) / threads; } else { threads = (n < maxThreads*2) ? nextPow2((n + 1)/ 2) : maxThreads; blocks = (n + (threads * 2 - 1)) / (threads * 2); } if ((float)threads*blocks > (float)prop.maxGridSize[0] * prop.maxThreadsPerBlock) { printf("n is too large, please choose a smaller number!\n"); } if (blocks > prop.maxGridSize[0]) { printf("Grid size <%d> exceeds the device capability <%d>, set block size as %d (original %d)\n", blocks, prop.maxGridSize[0], threads*2, threads); blocks /= 2; threads *= 2; } if (whichKernel == 6) { blocks = MIN(maxBlocks, blocks); } }
cl_int PointsToMorton_s(cl_int size, cl_int bits, cl_int2* points, BigUnsigned* result) { startBenchmark("PointsToMorton_s"); int nextPowerOfTwo = nextPow2(size); for (int gid = 0; gid < nextPowerOfTwo; ++gid) { if (gid < size) { xyz2z(&result[gid], points[gid], bits); } else { initBlkBU(&result[gid], 0); } } stopBenchmark(); return 0; }
cl_int PointsToMorton_p(cl::Buffer &points, cl::Buffer &zpoints, cl_int size, cl_int bits) { cl_int error = 0; size_t globalSize = nextPow2(size); error |= CLFW::get(zpoints, "zpoints", globalSize * sizeof(BigUnsigned)); cl::Kernel kernel = CLFW::Kernels["PointsToMortonKernel"]; error |= kernel.setArg(0, zpoints); error |= kernel.setArg(1, points); error |= kernel.setArg(2, size); error |= kernel.setArg(3, bits); startBenchmark("PointsToMorton_p"); error |= CLFW::DefaultQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(nextPow2(size)), cl::NullRange); stopBenchmark(); return error; };
void draw() { glBindTexture(GL_TEXTURE_2D,tex_id); if (!needsSeek) { TheoraVideoFrame* f=clip->getNextFrame(); if (f) { glTexSubImage2D(GL_TEXTURE_2D,0,0,0,clip->getWidth(),f->getHeight(),GL_RGB,GL_UNSIGNED_BYTE,f->getBuffer()); needsSeek = 1; if (f->getFrameNumber() != cFrame) nWrongSeeks++; cFrame++; if (cFrame >= clip->getNumFrames()) cFrame = 0; printf("Displayed frame %d\n", f->getFrameNumber()); clip->popFrame(); } } float w=clip->getWidth(),h=clip->getHeight(); float tw=nextPow2(w),th=nextPow2(h); glEnable(GL_TEXTURE_2D); if (shader_on) enable_shader(); drawTexturedQuad(tex_id,0,0,800,600,w/tw,h/th); if (shader_on) disable_shader(); glDisable(GL_TEXTURE_2D); drawColoredQuad(0,570,800,30,0,0,0,1); drawWiredQuad(0,570,800,30,1,1,1,1); float x=clip->getTimePosition()/clip->getDuration(); drawColoredQuad(3,573,794*x,24,1,1,1,1); }
/** * @brief LauraLogger::LauraLogger * @param pathToDir * Dest path to save logs * @param flag * Types of logs * @param frameSize * Size of audio frame * @param samplingRate * Sampling frequency */ LauraLogger::LauraLogger(std::string pathToDir, unsigned int flag, unsigned int frameSize, unsigned int samplingRate){ this->pathToDir = pathToDir; this->flags = flag; this->BUFF_SIZE = nextPow2(frameSize); this->firstRunning = true; this->SAMPLING_RATE = samplingRate; this->fileNames = new std::string[6]; fileNames[0] = "TDOA"; fileNames[1] = "ITD"; fileNames[2] = "ILD"; fileNames[3] = "CORRELATION"; fileNames[4] = "SPECTRUM"; fileNames[5] = "STREAM"; }
cl_int BinaryRadixToOctree_p(cl::Buffer &internalBRTNodes, vector<OctNode> &octree_vec, cl_int size) { startBenchmark("BinaryRadixToOctree_p"); int globalSize = nextPow2(size); cl::Kernel &kernel = CLFW::Kernels["BRT2OctreeKernel"]; cl::CommandQueue &queue = CLFW::DefaultQueue; cl::Buffer localSplits, scannedSplits, octree; cl_int error = CLFW::get(scannedSplits, "scannedSplits", sizeof(cl_int) * globalSize); error |= ComputeLocalSplits_p(internalBRTNodes, localSplits, size); error |= StreamScan_p(localSplits, scannedSplits, globalSize); //Read in the required octree size cl_int octreeSize; error |= CLFW::DefaultQueue.enqueueReadBuffer(scannedSplits, CL_TRUE, sizeof(int)*(size - 2), sizeof(int), &octreeSize); cl_int roundOctreeSize = nextPow2(octreeSize); //Create an octree buffer. error |= CLFW::get(octree, "octree", sizeof(OctNode) * roundOctreeSize); //use the scanned splits & brt to create octree. InitOctree(internalBRTNodes, octree, localSplits, scannedSplits, size, octreeSize); error |= kernel.setArg(0, internalBRTNodes); error |= kernel.setArg(1, octree); error |= kernel.setArg(2, localSplits); error |= kernel.setArg(3, scannedSplits); error |= kernel.setArg(4, size); error |= queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(globalSize), cl::NullRange); octree_vec.resize(octreeSize); error |= queue.enqueueReadBuffer(octree, CL_TRUE, 0, sizeof(OctNode)*octreeSize, octree_vec.data()); stopBenchmark(); return error; }
cl_int BuildBinaryRadixTree_p(cl::Buffer &zpoints, cl::Buffer &internalBRTNodes, cl_int size, cl_int mbits) { startBenchmark("BuildBinaryRadixTree_p"); cl::Kernel &kernel = CLFW::Kernels["BuildBinaryRadixTreeKernel"]; cl::CommandQueue &queue = CLFW::DefaultQueue; cl_int globalSize = nextPow2(size); cl_int error = CLFW::get(internalBRTNodes, "internalBRTNodes", sizeof(BrtNode)* (globalSize)); error |= kernel.setArg(0, internalBRTNodes); error |= kernel.setArg(1, zpoints); error |= kernel.setArg(2, mbits); error |= kernel.setArg(3, size); error |= queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(globalSize), cl::NullRange); stopBenchmark(); return error; }
cl_int InitOctree(cl::Buffer &internalBRTNodes, cl::Buffer &octree, cl::Buffer &localSplits, cl::Buffer &scannedSplits, cl_int size, cl_int octreeSize) { startBenchmark("InitOctree"); cl_int globalSize = nextPow2(octreeSize); cl::Kernel &kernel = CLFW::Kernels["BRT2OctreeKernel_init"]; cl::CommandQueue &queue = CLFW::DefaultQueue; cl_int error = 0; error |= kernel.setArg(0, internalBRTNodes); error |= kernel.setArg(1, octree); error |= kernel.setArg(2, localSplits); error |= kernel.setArg(3, scannedSplits); error |= kernel.setArg(4, size); error |= queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(globalSize), cl::NullRange); stopBenchmark(); return error; }
unsigned char* RecastTileBuilder::build(float x, float y, const AABB& lastTileBounds, int& dataSize) { int gw = 0, gh = 0; float bmin[3]; float bmax[3]; bmin[0] = bounds.getXMin(); bmin[1] = bounds.getYMin(); bmin[2] = bounds.getZMin(); bmax[0] = bounds.getXMax(); bmax[1] = bounds.getYMax(); bmax[2] = bounds.getZMax(); rcCalcGridSize(bmin, bmax, settings.m_cellSize, &gw, &gh); const int ts = (int) settings.m_tileSize; const int tw = (gw + ts - 1) / ts; const int th = (gh + ts - 1) / ts; // Max tiles and max polys affect how the tile IDs are caculated. // There are 22 bits available for identifying a tile and a polygon. int tileBits = rcMin((int) ilog2(nextPow2(tw * th)), 14); int polyBits = 22 - tileBits; m_maxTiles = 1<<tileBits; m_maxPolysPerTile = 1<<polyBits; dtNavMeshParams params; params.orig[0] = bounds.getXMin(); params.orig[1] = bounds.getYMin(); params.orig[2] = bounds.getZMin(); //rcVcopy(params.orig, m_geom->getNavMeshBoundsMin()); params.tileWidth = settings.m_tileSize * settings.m_cellSize; params.tileHeight = settings.m_tileSize * settings.m_cellSize; params.maxTiles = m_maxTiles; params.maxPolys = m_maxPolysPerTile; dtStatus status; this->lastTileBounds = lastTileBounds; return buildTileMesh(x, y, dataSize); }
cl_int UniqueSorted(cl::Buffer &input, cl_int &size) { startBenchmark("UniqueSorted"); int globalSize = nextPow2(size); cl_int error = 0; cl::Buffer predicate, address, intermediate, result; error = CLFW::get(predicate, "predicate", sizeof(cl_int)*(globalSize)); error |= CLFW::get(address, "address", sizeof(cl_int)*(globalSize)); error |= CLFW::get(result, "result", sizeof(BigUnsigned) * globalSize); error |= UniquePredicate(input, predicate, globalSize); error |= StreamScan_p(predicate, address, globalSize); error |= SingleCompact(input, result, predicate, address, globalSize); input = result; error |= CLFW::DefaultQueue.enqueueReadBuffer(address, CL_TRUE, (sizeof(cl_int)*globalSize - (sizeof(cl_int))), sizeof(cl_int), &size); stopBenchmark(); return error; }
void NoiseProcessor::perlinNoise(Image *img) { auto size = nextPow2(std::max(size_.get().x, size_.get().y)); std::vector<std::unique_ptr<Image>> levels; std::vector<TemplateImageSampler<float,float>> samplers; auto currentSize = std::pow(2, levels_.get().x); auto iterations = levels_.get().y - levels_.get().x + 1; float currentPersistance = 1; while (currentSize <= size && iterations--) { size2_t imgsize{static_cast<size_t>(currentSize)}; auto img1 = util::make_unique<Image>(imgsize, DataFLOAT32::get()); randomNoise(img1.get(), -currentPersistance, currentPersistance); samplers.push_back(TemplateImageSampler<float,float>(img1.get())); levels.push_back(std::move(img1)); currentSize *= 2; currentPersistance *= persistence_.get(); } auto data = static_cast<float *>( img->getColorLayer()->getEditableRepresentation<LayerRAM>()->getData()); float repri = 1.0 / size; // size_t index = 0; util::IndexMapper2D index(size_.get()); #pragma omp parallel for for (long long y = 0; y < size_.get().y; y++) { for (long long x = 0; x < size_.get().x; x++) { float v = 0; float X = x * repri; float Y = y * repri; for (auto &sampler : samplers) { v += sampler.sample(X, Y); } v = (v + 1.0f) / 2.0f; data[index(x, size_.get().y - 1 - y)] = glm::clamp(v, 0.0f, 1.0f); } } }
void Sample_TileMesh::handleSettings() { Sample::handleCommonSettings(); if (imguiCheck("Keep Itermediate Results", m_keepInterResults)) m_keepInterResults = !m_keepInterResults; if (imguiCheck("Build All Tiles", m_buildAll)) m_buildAll = !m_buildAll; imguiLabel("Tiling"); imguiSlider("TileSize", &m_tileSize, 16.0f, 1024.0f, 16.0f); if (m_geom) { char text[64]; int gw = 0, gh = 0; const float* bmin = m_geom->getNavMeshBoundsMin(); const float* bmax = m_geom->getNavMeshBoundsMax(); rcCalcGridSize(bmin, bmax, m_cellSize, &gw, &gh); const int ts = (int)m_tileSize; const int tw = (gw + ts-1) / ts; const int th = (gh + ts-1) / ts; snprintf(text, 64, "Tiles %d x %d", tw, th); imguiValue(text); // Max tiles and max polys affect how the tile IDs are caculated. // There are 22 bits available for identifying a tile and a polygon. int tileBits = rcMin((int)ilog2(nextPow2(tw*th)), 14); if (tileBits > 14) tileBits = 14; int polyBits = 22 - tileBits; m_maxTiles = 1 << tileBits; m_maxPolysPerTile = 1 << polyBits; snprintf(text, 64, "Max Tiles %d", m_maxTiles); imguiValue(text); snprintf(text, 64, "Max Polys %d", m_maxPolysPerTile); imguiValue(text); } else { m_maxTiles = 0; m_maxPolysPerTile = 0; } imguiSeparator(); imguiIndent(); imguiIndent(); if (imguiButton("Save")) { Sample::saveAll("all_tiles_navmesh.bin", m_navMesh); } if (imguiButton("Load")) { dtFreeNavMesh(m_navMesh); m_navMesh = Sample::loadAll("all_tiles_navmesh.bin"); m_navQuery->init(m_navMesh, 2048); } imguiUnindent(); imguiUnindent(); char msg[64]; snprintf(msg, 64, "Build Time: %.1fms", m_totalBuildTimeMs); imguiLabel(msg); imguiSeparator(); imguiSeparator(); }
// Internal function - Called to load up a texture map - file is known to exist Texture *textureFromBitmap(bitmap *loadbm, Texture *tex) { bitmap *swizzleBm, *scaleBm, *resizeCanvasSrc; bool mustSwizzle = false; bool mustScale = false; // Step 1: Check for a need to swizzle (if the video card doesn't support this texture mode) uintf dataType = loadbm->flags & (bitmap_DataTypeMask | bitmap_DataInfoMask); // ### This code is not yet complete, must leave this block with 'swizzleBM' pointing to swizzled bitmap data // If Video card only handles 'Power-of-2' texture dimensions bool resizeCanvas=false; // if (GLESWarnings) //!(videoFeatures & videodriver_nonP2Tex)) { uintf newCanvasWidth = loadbm->width; uintf newCanvasHeight = loadbm->height; if (!isPow2(loadbm->width)) { resizeCanvas=true; newCanvasWidth=nextPow2(loadbm->width); } if (!isPow2(loadbm->height)) { resizeCanvas=true; newCanvasHeight=nextPow2(loadbm->height); } if (resizeCanvas) { resizeCanvasSrc = loadbm; loadbm = newbitmap("resizeCanvasP2Tex",newCanvasWidth,newCanvasHeight,bitmap_ARGB32); uintf x,y; uint32 *src32 = (uint32 *)resizeCanvasSrc->pixel; uint32 *dst32 = (uint32 *)loadbm->pixel; for (y=0; y<resizeCanvasSrc->height; y++) { uint32 *src = &src32[y*(resizeCanvasSrc->width)]; uint32 *dst = &dst32[y*newCanvasWidth]; for (x=0; x<resizeCanvasSrc->width; x++) *dst++ = *src++; for (;x<newCanvasWidth; x++) *dst++ = 0; } for (;y<newCanvasHeight; y++) { uint32 *dst = &dst32[y*newCanvasWidth]; for (x=0; x<newCanvasWidth; x++) *dst++=0; } } /* // Work out X scale uintf size = 1; while (size<=maxtexwidth) { if (newx<=size) break; size <<=1; } if (size>maxtexwidth) size = maxtexwidth; newx = size; // Work out Y scale size = 1; while (size<=maxtexheight) { if (newy<=size) break; size <<=1; } if (size>maxtexheight) size = maxtexheight; newy = size; */ } // Step 2: Check if we need to resize - this may change swizzle mode uintf newx = loadbm->width; uintf newy = loadbm->height; if (newx>maxtexwidth) newx = maxtexwidth; if (newy>maxtexheight) newy = maxtexheight; if (newx!=loadbm->width || newy!=loadbm->height) { dataType = bitmap_DataTypeRGB | bitmap_RGB_32bit; mustSwizzle = true; mustScale = true; } if (mustSwizzle) { dataType |= loadbm->flags & bitmap_AlphaMask; swizzleBm = SwizzleBitmap(loadbm, dataType); } else swizzleBm = loadbm; if (mustScale) { // Bitmap needs to be resized before hardware will accept it scaleBm = scalebitmap(swizzleBm,newx,newy); } else scaleBm = swizzleBm; // If we don't have a texture provided, create a new one if (!tex) tex = newTexture(NULL, 0, 0); downloadbitmaptex(tex, scaleBm, 0); estimatedtexmemused += tex->texmemused; if (mustScale) deleteBitmap(scaleBm); if (mustSwizzle) deleteBitmap(swizzleBm); if (resizeCanvas) { deleteBitmap(loadbm); loadbm = resizeCanvasSrc; tex->flags |= texture_canvasSize; tex->UVscale.x = (float)loadbm->width / (float)tex->width; tex->UVscale.y = (float)loadbm->height/ (float)tex->height; } return tex; }
dtNavMesh* buildMesh(InputGeom* geom, WCellBuildContext* wcellContext, int numCores) { dtNavMesh* mesh = 0; if (!geom || !geom->getMesh()) { CleanupAfterBuild(); wcellContext->log(RC_LOG_ERROR, "buildTiledNavigation: No vertices and triangles."); return 0; } mesh = dtAllocNavMesh(); if (!mesh) { CleanupAfterBuild(); wcellContext->log(RC_LOG_ERROR, "buildTiledNavigation: Could not allocate navmesh."); return 0; } // setup some default parameters rcConfig cfg; memset(&cfg, 0, sizeof(rcConfig)); const float agentHeight = 2.1f; // most character toons are about this tall const float agentRadius = 0.6f; // most character toons are about this big around const float agentClimb = 1.0f; // character toons can step up this far. Seems ridiculously high ... const float tileSize = 1600.0f/3.0f/16.0f; // The size of one chunk cfg.cs = 0.1f; // cell size is a sort of resolution -> the bigger the faster cfg.ch = 0.05f; // cell height -> distance from mesh to ground, if too low, recast will not build essential parts of the mesh for some reason cfg.walkableSlopeAngle = 50.0f; // max climbable slope, bigger values won't make much of a change cfg.walkableHeight = (int)ceilf(agentHeight/cfg.ch);// minimum space to ceiling cfg.walkableClimb = (int)floorf(agentClimb/cfg.ch); // how high the agent can climb in one step cfg.walkableRadius = (int)ceilf(agentRadius/cfg.cs);// minimum distance to objects cfg.tileSize = (int)(tileSize/cfg.cs + 0.5f); cfg.maxEdgeLen = cfg.tileSize/2;; cfg.borderSize = cfg.walkableRadius + 3; cfg.width = cfg.tileSize + cfg.borderSize*2; cfg.height = cfg.tileSize + cfg.borderSize*2; cfg.maxSimplificationError = 1.3f; cfg.minRegionArea = (int)rcSqr(8); // Note: area = size*size cfg.mergeRegionArea = (int)rcSqr(20); // Note: area = size*size cfg.maxVertsPerPoly = 3; cfg.detailSampleDist = cfg.cs * 9; cfg.detailSampleMaxError = cfg.ch * 1.0f; // default calculations - for some reason not included in basic recast const float* bmin = geom->getMeshBoundsMin(); const float* bmax = geom->getMeshBoundsMax(); int gw = 0, gh = 0; rcCalcGridSize(bmin, bmax, cfg.cs, &gw, &gh); const int ts = cfg.tileSize; const int tw = (gw + ts-1) / ts; const int th = (gh + ts-1) / ts; // Max tiles and max polys affect how the tile IDs are caculated. // There are 22 bits available for identifying a tile and a polygon. int tileBits = rcMin((int)ilog2(nextPow2(tw*th)), 14); if (tileBits > 14) tileBits = 14; int polyBits = 22 - tileBits; int maxTiles = 1 << tileBits; int maxPolysPerTile = 1 << polyBits; dtNavMeshParams params; rcVcopy(params.orig, geom->getMeshBoundsMin()); params.tileWidth = cfg.tileSize * cfg.cs; params.tileHeight = cfg.tileSize * cfg.cs; params.maxTiles = maxTiles; params.maxPolys = maxPolysPerTile; dtStatus status; status = mesh->init(¶ms); if (dtStatusFailed(status)) { CleanupAfterBuild(); wcellContext->log(RC_LOG_ERROR, "buildTiledNavigation: Could not init navmesh."); return 0; } // start building const float tcs = cfg.tileSize*cfg.cs; wcellContext->startTimer(RC_TIMER_TEMP); TileAdder Adder; dispatcher.Reset(); dispatcher.maxHeight = th; dispatcher.maxWidth = tw; int numThreads = 0; numThreads = std::min(2*numCores, 8); boost::thread *threads[8]; for(int i = 0; i < numThreads; ++i) { QuadrantTiler newTiler; newTiler.geom = geom; newTiler.cfg = cfg; newTiler.ctx = *wcellContext; boost::thread newThread(boost::ref(newTiler)); threads[i] = &newThread; } Adder.mesh = mesh; Adder.numThreads = numThreads; boost::thread AdderThread(boost::ref(Adder)); AdderThread.join(); // Start the build process. wcellContext->stopTimer(RC_TIMER_TEMP); return mesh; }
void Font::makeDisplayList(FT_Face face, char ch) { // Load Glyph for character if (FT_Load_Glyph(face, FT_Get_Char_Index(face, ch), FT_LOAD_DEFAULT)) throw std::runtime_error("FT_Load_Glyph failed"); // Move Glyph into object FT_Glyph glyph; if (FT_Get_Glyph(face->glyph, &glyph)) throw std::runtime_error("FT_Get_Glyph failed"); // Convert Glyph to Bitmap FT_Glyph_To_Bitmap(&glyph, ft_render_mode_normal, 0, 1); FT_BitmapGlyph bitmap_glyph = (FT_BitmapGlyph)glyph; FT_Bitmap &bitmap = bitmap_glyph->bitmap; // Resize to OpenGL power of 2 and two channels (luminosity and alpha) int width = nextPow2(bitmap.width); int height = nextPow2(bitmap.rows); GLubyte *expandedData = new GLubyte[2 * width * height]; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { int pos = 2 * (x + y*width); expandedData[pos] = expandedData[pos+1] = (x >= bitmap.width || y >= bitmap.rows) ? 0 : bitmap.buffer[x + bitmap.width * y]; } } // Create OpenGL texture glBindTexture(GL_TEXTURE_2D, m_textures[ch]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, expandedData); delete[] expandedData; // Now we create the Display List glNewList(m_displayLists+ch, GL_COMPILE); glBindTexture(GL_TEXTURE_2D, m_textures[ch]); glPushMatrix(); // Center character correctly glTranslatef(bitmap_glyph->left, 0, 0); glTranslatef(0, bitmap_glyph->top - bitmap.rows, 0); // Calculate real size versus padding space float x = float(bitmap.width) / float(width); float y = float(bitmap.rows) / float(height); // Draw the quad glBegin(GL_QUADS); glTexCoord2d(0, 0); glVertex2f(0, bitmap.rows); glTexCoord2d(0, y); glVertex2f(0, 0); glTexCoord2d(x, y); glVertex2f(bitmap.width, 0); glTexCoord2d(x, 0); glVertex2f(bitmap.width, bitmap.rows); glEnd(); glPopMatrix(); glTranslatef(face->glyph->advance.x >> 6, 0, 0); // Increment the raster position as if it were a bitmap font // glBitmap(0, 0, 0, 0, face->glyph->advance.x >> 6, 0, NULL); glEndList(); }
void SimpleContactSolver::solveContacts(unsigned numContacts, CUDABuffer * contactBuf, CUDABuffer * pairBuf, void * objectData) { #if DISABLE_COLLISION_RESOLUTION return; #endif if(numContacts < 1) return; m_numContacts = numContacts; const unsigned indBufLength = iRound1024(numContacts * 2); m_sortedInd[0]->create(indBufLength * 8); m_sortedInd[1]->create(indBufLength * 8); void * bodyContactHash = m_sortedInd[0]->bufferOnDevice(); void * pairs = pairBuf->bufferOnDevice(); simpleContactSolverWriteContactIndex((KeyValuePair *)bodyContactHash, (uint *)pairs, numContacts * 2, indBufLength); void * tmp = m_sortedInd[1]->bufferOnDevice(); RadixSort((KeyValuePair *)bodyContactHash, (KeyValuePair *)tmp, indBufLength, 30); m_splitPair->create(numContacts * 8); void * splits = m_splitPair->bufferOnDevice(); const unsigned splitBufLength = numContacts * 2; simpleContactSolverComputeSplitBufLoc((uint2 *)splits, (uint2 *)pairs, (KeyValuePair *)bodyContactHash, splitBufLength); m_bodyCount->create(splitBufLength * 4); void * bodyCount = m_bodyCount->bufferOnDevice(); simpleContactSolverCountBody((uint *)bodyCount, (KeyValuePair *)bodyContactHash, splitBufLength); int mxcount = 0; max<int>(mxcount, (int *)bodyCount, splitBufLength); // if(mxcount>9) std::cout<<" max count per contact "<<mxcount; int numiterations = mxcount + 3; m_splitInverseMass->create(splitBufLength * 4); void * splitMass = m_splitInverseMass->bufferOnDevice(); CudaNarrowphase::CombinedObjectBuffer * objectBuf = (CudaNarrowphase::CombinedObjectBuffer *)objectData; void * pos = objectBuf->m_pos->bufferOnDevice(); void * vel = objectBuf->m_vel->bufferOnDevice(); void * mass = objectBuf->m_mass->bufferOnDevice(); void * ind = objectBuf->m_ind->bufferOnDevice(); void * perObjPointStart = objectBuf->m_pointCacheLoc->bufferOnDevice(); void * perObjectIndexStart = objectBuf->m_indexCacheLoc->bufferOnDevice(); simpleContactSolverComputeSplitInverseMass((float *)splitMass, (uint2 *)splits, (uint2 *)pairs, (float *)mass, (uint4 *)ind, (uint * )perObjPointStart, (uint * )perObjectIndexStart, (uint *)bodyCount, splitBufLength); m_constraint->create(numContacts * 64); void * constraint = m_constraint->bufferOnDevice(); void * contacts = contactBuf->bufferOnDevice(); simpleContactSolverSetContactConstraint((ContactConstraint *)constraint, (uint2 *)splits, (uint2 *)pairs, (float3 *)pos, (float3 *)vel, (uint4 *)ind, (uint * )perObjPointStart, (uint * )perObjectIndexStart, (float *)splitMass, (ContactData *)contacts, numContacts * 2); CudaBase::CheckCudaError("jacobi solver set constraint"); m_deltaLinearVelocity->create(nextPow2(splitBufLength * 12)); m_deltaAngularVelocity->create(nextPow2(splitBufLength * 12)); void * deltaLinVel = m_deltaLinearVelocity->bufferOnDevice(); void * deltaAngVel = m_deltaAngularVelocity->bufferOnDevice(); simpleContactSolverClearDeltaVelocity((float3 *)deltaLinVel, (float3 *)deltaAngVel, splitBufLength); /* const unsigned scanBufLength = iRound1024(numContacts * 2); m_bodyCount->create(scanBufLength * 4); m_scanBodyCount[0]->create(scanBufLength * 4); m_scanBodyCount[1]->create(scanBufLength * 4); void * scanResult = m_scanBodyCount[0]->bufferOnDevice(); void * scanIntermediate = m_scanBodyCount[1]->bufferOnDevice(); scanExclusive((uint *)scanResult, (uint *)bodyCount, (uint *)scanIntermediate, scanBufLength / 1024, 1024); const unsigned numSplitBodies = ScanUtil::getScanResult(m_bodyCount, m_scanBodyCount[0], scanBufLength); */ int i; for(i=0; i< numiterations; i++) { // compute impulse and velocity changes per contact simpleContactSolverSolveContactWoJ((ContactConstraint *)constraint, (float3 *)deltaLinVel, (float3 *)deltaAngVel, (uint2 *)pairs, (uint2 *)splits, (float *)splitMass, (ContactData *)contacts, (float3 *)pos, (float3 *)vel, (uint4 *)ind, (uint * )perObjPointStart, (uint * )perObjectIndexStart, numContacts * 2); CudaBase::CheckCudaError("jacobi solver solve impulse"); simpleContactSolverAverageVelocities((float3 *)deltaLinVel, (float3 *)deltaAngVel, (uint *)bodyCount, (KeyValuePair *)bodyContactHash, splitBufLength); CudaBase::CheckCudaError("jacobi solver average velocity"); } // 2 tet per contact, 4 pnt per tet, key is pnt index, value is tet index in split const unsigned pntHashBufLength = iRound1024(numContacts * 2 * 4); // std::cout<<"\n pntHashBufLength"<<pntHashBufLength // <<" numContact"<<numContacts; m_pntTetHash[0]->create(pntHashBufLength * 8); m_pntTetHash[1]->create(pntHashBufLength * 8); void * pntTetHash = m_pntTetHash[0]->bufferOnDevice(); simpleContactSolverWritePointTetHash((KeyValuePair *)pntTetHash, (uint2 *)pairs, (uint2 *)splits, (uint *)bodyCount, (uint4 *)ind, (uint * )perObjPointStart, (uint * )perObjectIndexStart, numContacts * 2, pntHashBufLength); CudaBase::CheckCudaError(CudaBase::Synchronize(), "jacobi solver point-tetra hash"); void * intermediate = m_pntTetHash[1]->bufferOnDevice(); RadixSort((KeyValuePair *)pntTetHash, (KeyValuePair *)intermediate, pntHashBufLength, 24); #if 0 svlg.writeHash(m_pntTetHash[1], numContacts * 2, "pnttet_hash", CudaDbgLog::FAlways); #endif simpleContactSolverUpdateVelocity((float3 *)vel, (float3 *)deltaLinVel, (float3 *)deltaAngVel, (KeyValuePair *)pntTetHash, (uint2 *)pairs, (uint2 *)splits, (ContactConstraint *)constraint, (ContactData *)contacts, (float3 *)pos, (uint4 *)ind, (uint * )perObjPointStart, (uint * )perObjectIndexStart, numContacts * 2 * 4); CudaBase::CheckCudaError(CudaBase::Synchronize(), "jacobi solver update velocity"); }