//-------------------------------------------------------------------------- void GBitmap::allocateBitmap(const U32 in_width, const U32 in_height, const bool in_extrudeMipLevels, const GFXFormat in_format ) { //-------------------------------------- Some debug checks... U32 svByteSize = mByteSize; U8 *svBits = mBits; AssertFatal(in_width != 0 && in_height != 0, "GBitmap::allocateBitmap: width or height is 0"); if (in_extrudeMipLevels == true) { AssertFatal(isPow2(in_width) == true && isPow2(in_height) == true, "GBitmap::GBitmap: in order to extrude mip levels, bitmap w/h must be pow2"); } mInternalFormat = in_format; mWidth = in_width; mHeight = in_height; mBytesPerPixel = 1; switch (mInternalFormat) { case GFXFormatA8: case GFXFormatL8: mBytesPerPixel = 1; break; case GFXFormatR8G8B8: mBytesPerPixel = 3; break; case GFXFormatR8G8B8X8: case GFXFormatR8G8B8A8: mBytesPerPixel = 4; break; case GFXFormatR5G6B5: case GFXFormatR5G5B5A1: mBytesPerPixel = 2; break; default: AssertFatal(false, "GBitmap::GBitmap: misunderstood format specifier"); break; } // Set up the mip levels, if necessary... mNumMipLevels = 1; U32 allocPixels = in_width * in_height * mBytesPerPixel; mMipLevelOffsets[0] = 0; if (in_extrudeMipLevels == true) { U32 currWidth = in_width; U32 currHeight = in_height; do { mMipLevelOffsets[mNumMipLevels] = mMipLevelOffsets[mNumMipLevels - 1] + (currWidth * currHeight * mBytesPerPixel); currWidth >>= 1; currHeight >>= 1; if (currWidth == 0) currWidth = 1; if (currHeight == 0) currHeight = 1; mNumMipLevels++; allocPixels += currWidth * currHeight * mBytesPerPixel; } while (currWidth != 1 || currHeight != 1); }
PrecomputedRandom::PrecomputedRandom(int dataSize, uint32 seed) : Random((void*)NULL), m_hemiUniform(NULL), m_sphereBits(NULL), m_modMask(dataSize - 1), m_freeData(true) { alwaysAssertM(isPow2(dataSize), "dataSize must be a power of 2"); m_index = seed & m_modMask; HemiUniformData* h; SphereBitsData* s; m_hemiUniform = h = (HemiUniformData*) System::malloc(sizeof(HemiUniformData) * dataSize); m_sphereBits = s = (SphereBitsData*) System::malloc(sizeof(SphereBitsData) * dataSize); Random r; for (int i = 0; i < dataSize; ++i) { h[i].uniform = r.uniform(); r.cosHemi(h[i].cosHemiX, h[i].cosHemiY, h[i].cosHemiZ); s[i].bits = r.bits(); r.sphere(s[i].sphereX, s[i].sphereY, s[i].sphereZ); } }
GLsizei ComputePitch(GLsizei width, GLint internalformat, GLint alignment) { ASSERT(alignment > 0 && isPow2(alignment)); GLsizei rawPitch = ComputePixelSize(internalformat) * width; return (rawPitch + alignment - 1) & ~(alignment - 1); }
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum_NVidia::Init(int n) { int status = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 in which kernel fails to compute sums for N = [33 - 64] // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } if(mTempBuffer2 == NULL) { mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } }
int main() { uint64 a; int k,l; scanf("%d",&k); while(k>0) { l=0; scanf("%llu",&a); while(a>1) { if( isPow2(a) ) { a/=2; } else { a=a-(1LLU<<msb(a)); } l++; // printf("%llu\n",a); } if(l%2) { printf("Louise\n"); } else { printf("Richard\n"); } k--; } return 0; }
msg getCodeFromHam (msg t) { msg h;//unde o sa extrag codul initial int i, j, fromPos; int pNo = 0; while (pow2 (pNo) < t.len){ pNo ++; } h.len = t.len - pNo; //nr de biti de paritate for (i = 0; i < h.len; i++){ h.payload[i] = 0; } for (i = 0; i < 8; i++){ fromPos = 1; for (j = 0; j < h.len; j++){ fromPos ++; while (isPow2(fromPos) == 1){ fromPos ++; } if (getBit (t.payload [fromPos - 1], i) == 1){ setBit (&h.payload [j], i); } } } return h; }
GLsizei ComputePitch(GLsizei width, GLenum format, GLenum type, GLint alignment) { ASSERT(alignment > 0 && isPow2(alignment)); GLsizei rawPitch = ComputePixelSize(format, type) * width; return (rawPitch + alignment - 1) & ~(alignment - 1); }
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum::Init(int n) { int err = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); COpenCL::CheckOCLError("Could not create parallel sum temporary buffer.", err); } }
/** * Evaluate a row of the gram matrix * @param d_xtraindata device pointer to the training set * @param d_dottraindata device pointer to the array containing the dot product of the row with itself * @param d_kernelrow device pointer that will store the array extracted from d_xtraindata. * @param d_kerneldot device pointer that will store the result of the kernel evaluation * @param d_kdata device pointer to the matrix that stores the cached values * @param gid index that points to the point in d_xtraindata to be calculated * @param cacheid index that points to the location in cache that will keep the results * @param ntraining number of training samples in the training set * @param nfeatures number of features in the training samples * @param beta value of the parameter of the RBF kernel * @param a if using polynomial or sigmoid kernel the value of a x_i x_j * @param b if using polynomial or sigmoid kernel the value of b * @param d if using polynomial kernel * @param kernelcode code that indicates the kernel type to run */ void kerneleval ( float* d_xtraindata, float* d_dottraindata, float* d_kernelrow, float* d_kerneldot, float* d_kdata, int gid, int cacheid, int ntraining, int nfeatures, float beta, float a, float b, float d, int kernelcode) { int numThreads = (nfeatures < MAXTHREADS*2) ? nextPow2((nfeatures + 1)/ 2) : MAXTHREADS; int numBlocks = (nfeatures + (numThreads * 2 - 1)) / (numThreads * 2); int numBlocksRed = min(MAXBLOCKS, numBlocks); dim3 dimBlockKernelRow(numThreads, 1, 1); dim3 dimGridKernelRow(numBlocksRed, 1, 1); int smemSize = 0; bool isNtrainingPow2=isPow2(nfeatures); if(isNtrainingPow2) { switch (numThreads) { case 512: ExtractKernelRow <512,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 256: ExtractKernelRow <256,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 128: ExtractKernelRow <128,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 64: ExtractKernelRow <64,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 32: ExtractKernelRow <32,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 16: ExtractKernelRow <16,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 8: ExtractKernelRow <8,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 4: ExtractKernelRow <4,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 2: ExtractKernelRow <2,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; case 1: ExtractKernelRow <1,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow, gid,ntraining,nfeatures); break; } } else { switch (numThreads)
PrecomputedRandom::PrecomputedRandom(const HemiUniformData* data1, const SphereBitsData* data2, int dataSize, uint32 seed) : Random((void*)NULL), m_hemiUniform(data1), m_sphereBits(data2), m_modMask(dataSize - 1), m_freeData(false) { m_index = seed & m_modMask; alwaysAssertM(isPow2(dataSize), "dataSize must be a power of 2"); }
void* System::alignedMalloc(size_t bytes, size_t alignment) { alwaysAssertM(isPow2(alignment), "alignment must be a power of 2"); // We must align to at least a word boundary. alignment = iMax((int)alignment, sizeof(void *)); // Pad the allocation size with the alignment size and the // size of the redirect pointer. size_t totalBytes = bytes + alignment + sizeof(intptr_t); void* truePtr = System::malloc(totalBytes); if (!truePtr) { // malloc returned NULL return NULL; } debugAssert(isValidHeapPointer(truePtr)); #ifdef G3D_WIN32 // The blocks we return will not be valid Win32 debug heap // pointers because they are offset // debugAssert(_CrtIsValidPointer(truePtr, totalBytes, TRUE) ); #endif // The return pointer will be the next aligned location (we must at least // leave space for the redirect pointer, however). char* alignedPtr = ((char*)truePtr)+ sizeof(intptr_t); #if 0 // 2^n - 1 has the form 1111... in binary. uint32 bitMask = (alignment - 1); // Advance forward until we reach an aligned location. while ((((intptr_t)alignedPtr) & bitMask) != 0) { alignedPtr += sizeof(void*); } #else alignedPtr += alignment - (((intptr_t)alignedPtr) & (alignment - 1)); // assert((alignedPtr - truePtr) + bytes <= totalBytes); #endif debugAssert((alignedPtr - truePtr) + bytes <= totalBytes); // Immediately before the aligned location, write the true array location // so that we can free it correctly. intptr_t* redirectPtr = (intptr_t*)(alignedPtr - sizeof(intptr_t)); redirectPtr[0] = (intptr_t)truePtr; debugAssert(isValidHeapPointer(truePtr)); #ifdef G3D_WIN32 debugAssert( _CrtIsValidPointer(alignedPtr, bytes, TRUE) ); #endif return (void*)alignedPtr; }
void Boot::setBootBlock(BootBlock *bootBlock) { _bootBlock = bootBlock; _clusterSize = _bootBlock->bytePerSector * _bootBlock->sectorPerCluster; if (isPow2(_bootBlock->clusterIndexRecord)) _indexRecordSize = _bootBlock->clusterIndexRecord * _clusterSize; else { DEBUG(INFO, "Invalid index record size in BootSector\n"); ; } }
static bool getGDGT(SparseGTInfo *gtInfo, const SparseExtentHeader *hdr) { if (hdr->grainSize < 1 || hdr->grainSize > 128 || !isPow2(hdr->grainSize)) { return false; } /* disklib supports only 512 GTEs per GT (=> 4KB GT size). Streaming is more flexible. */ if (hdr->numGTEsPerGT < VMDK_SECTOR_SIZE / sizeof(uint32_t) || !isPow2(hdr->numGTEsPerGT)) { return false; } gtInfo->lastGrainNr = hdr->capacity / hdr->grainSize; gtInfo->lastGrainSize = (hdr->capacity & (hdr->grainSize - 1)) * VMDK_SECTOR_SIZE; { uint64_t GTEs = gtInfo->lastGrainNr + (gtInfo->lastGrainSize != 0); /* Number of GTEs must be less than 2^32. Actually capacity must be less than 2^32 (2TB) for everything except streamOptimized format... */ uint32_t GTs = CEILING(GTEs, hdr->numGTEsPerGT); uint32_t GDsectors = CEILING(GTs * sizeof(uint32_t), VMDK_SECTOR_SIZE); uint32_t GTsectors = CEILING(hdr->numGTEsPerGT * sizeof(uint32_t), VMDK_SECTOR_SIZE); uint32_t *gd = calloc(GDsectors + GTsectors * GTs, VMDK_SECTOR_SIZE); uint32_t *gt; if (!gd) { return false; } gt = gd + GDsectors * VMDK_SECTOR_SIZE / sizeof(uint32_t); gtInfo->GTEs = GTEs; gtInfo->GTs = GTs; gtInfo->GDsectors = GDsectors; gtInfo->gd = gd; gtInfo->GTsectors = GTsectors; gtInfo->gt = gt; } return true; }
void CRoutine_Sum_NVidia::BuildKernels() { int whichKernel = 6; int numBlocks = 0; int numThreads = 0; #ifdef __APPLE__ int maxThreads = 64; #else int maxThreads = 128; #endif int maxBlocks = 64; int cpuFinalThreshold = 1; getNumBlocksAndThreads(whichKernel, mBufferSize, maxBlocks, maxThreads, numBlocks, numThreads); BuildReductionKernel(whichKernel, numThreads, isPow2(mBufferSize) ); mBlocks.push_back(numBlocks); mThreads.push_back(numThreads); mReductionPasses += 1; int s = numBlocks; int threads = 0, blocks = 0; int kernel = (whichKernel == 6) ? 5 : whichKernel; while(s > cpuFinalThreshold) { getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads); BuildReductionKernel(kernel, threads, isPow2(s) ); mBlocks.push_back(blocks); mThreads.push_back(threads); s = (s + (threads*2-1)) / (threads*2); mReductionPasses += 1; } mFinalS = s; }
ImageBuffer::ImageBuffer(const ImageFormat* format, int width, int height, int depth, int rowAlignment) : m_buffer(NULL) , m_format(format) , m_rowAlignment(rowAlignment) , m_rowStride(0) , m_width(width) , m_height(height) , m_memoryManager(NULL) , m_depth(depth) { debugAssert(m_format); debugAssert(isPow2(m_rowAlignment)); debugAssert(m_width > 0); debugAssert(m_height > 0); debugAssert(m_depth > 0); }
/** * Check * - BOOT_MEDIA_DESCRIPTOR_ID in mediaDescriptorId * - BOOT_FAT_NTFS_SIGNATURE in signature */ bool Boot::isBootBlock(uint64_t offset) { std::ostringstream expectedMediaID; BootBlock *bootBlock = new BootBlock; _vfile->seek(offset); _vfile->read(bootBlock, BOOT_BLOCK_SIZE); expectedMediaID << BOOT_MEDIA_DESCRIPTOR_ID; if ((expectedMediaID.str() == std::string(bootBlock->mediaDescriptorId)) && (bootBlock->signature == BOOT_FAT_NTFS_SIGNATURE)) { setBootBlock(bootBlock); #if __WORDSIZE == 64 DEBUG(VERBOSE, "NTFS Boot block found at offset 0x%lx in vfile %s\n", offset, _vfile->node()->absolute().c_str()); #else DEBUG(VERBOSE, "NTFS Boot block found at offset 0x%llx in vfile %s\n", offset, _vfile->node()->absolute().c_str()); #endif DEBUG(VERBOSE, "Byte per sector: %u\n", bootBlock->bytePerSector); DEBUG(VERBOSE, "Sector per cluster: %u\n", bootBlock->sectorPerCluster); #if __WORDSIZE == 64 DEBUG(VERBOSE, "Number of sector: %lx\n", bootBlock->numberOfSector); DEBUG(VERBOSE, "Start Mft: 0x%lx\n", bootBlock->startMft); DEBUG(VERBOSE, "Start Mft 16b Mirror: 0x%lx\n", bootBlock->startMftMirr); #else DEBUG(VERBOSE, "Number of sector: %llx\n", bootBlock->numberOfSector); DEBUG(VERBOSE, "Start Mft: 0x%llx\n", bootBlock->startMft); DEBUG(VERBOSE, "Start Mft 16b Mirror: 0x%llx\n", bootBlock->startMftMirr); #endif DEBUG(VERBOSE, "Cluster Mft record: %u\n", bootBlock->clusterMftRecord); if (isPow2(bootBlock->clusterMftRecord)) { _mftEntrySize = bootBlock->clusterMftRecord * _clusterSize; DEBUG(VERBOSE,"MFT Entry size: %u\n", _mftEntrySize); } else { DEBUG(VERBOSE, "MFT Entry size not valid in bootblock"); DEBUG(VERBOSE, "Will search for it"); ; } DEBUG(VERBOSE, "Cluster Index record: %u\n", bootBlock->clusterIndexRecord); DEBUG(VERBOSE, "Cluster Size: %u\n", _clusterSize); } else { delete bootBlock;; return false; } return true; }
ZArray::ZArray(uint32_t _numLines, uint32_t _ways, uint32_t _candidates, ReplPolicy* _rp, HashFamily* _hf) //(int _size, int _lineSize, int _assoc, int _zassoc, ReplacementPolicy<T>* _rp, int _hashType) : rp(_rp), hf(_hf), numLines(_numLines), ways(_ways), cands(_candidates) { assert_msg(ways > 1, "zcaches need >=2 ways to work"); assert_msg(cands >= ways, "candidates < ways does not make sense in a zcache"); assert_msg(numLines % ways == 0, "number of lines is not a multiple of ways"); //Populate secondary parameters numSets = numLines/ways; assert_msg(isPow2(numSets), "must have a power of 2 # sets, but you specified %d", numSets); setMask = numSets - 1; lookupArray = gm_calloc<uint32_t>(numLines); array = gm_calloc<Address>(numLines); for (uint32_t i = 0; i < numLines; i++) { lookupArray[i] = i; // start with a linear mapping; with swaps, it'll get progressively scrambled } swapArray = gm_calloc<uint32_t>(cands/ways + 2); // conservative upper bound (tight within 2 ways) }
//----------------------------------------------------------------------------- // _innerCreateTexture //----------------------------------------------------------------------------- void GFXD3D9TextureManager::_innerCreateTexture( GFXD3D9TextureObject *retTex, U32 height, U32 width, U32 depth, GFXFormat format, GFXTextureProfile *profile, U32 numMipLevels, bool forceMips, S32 antialiasLevel) { GFXD3D9Device* d3d = static_cast<GFXD3D9Device*>(GFX); // Some relevant helper information... bool supportsAutoMips = GFX->getCardProfiler()->queryProfile("autoMipMapLevel", true); DWORD usage = 0; // 0, D3DUSAGE_RENDERTARGET, or D3DUSAGE_DYNAMIC D3DPOOL pool = D3DPOOL_DEFAULT; retTex->mProfile = profile; D3DFORMAT d3dTextureFormat = GFXD3D9TextureFormat[format]; #ifndef TORQUE_OS_XENON if( retTex->mProfile->isDynamic() ) { usage = D3DUSAGE_DYNAMIC; } else { usage = 0; pool = d3d->isD3D9Ex() ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED; } if( retTex->mProfile->isRenderTarget() ) { pool = D3DPOOL_DEFAULT; usage |= D3DUSAGE_RENDERTARGET; } if(retTex->mProfile->isZTarget()) { usage |= D3DUSAGE_DEPTHSTENCIL; pool = D3DPOOL_DEFAULT; } if( retTex->mProfile->isSystemMemory() ) { pool = D3DPOOL_SYSTEMMEM; } if( supportsAutoMips && !forceMips && !retTex->mProfile->isSystemMemory() && numMipLevels == 0 && !(depth > 0) ) { usage |= D3DUSAGE_AUTOGENMIPMAP; } #else if(retTex->mProfile->isRenderTarget()) { d3dTextureFormat = (D3DFORMAT)MAKELEFMT(d3dTextureFormat); } #endif // Set the managed flag... retTex->isManaged = (pool == D3DPOOL_MANAGED) || d3d->isD3D9Ex(); if( depth > 0 ) { #ifdef TORQUE_OS_XENON D3D9Assert( mD3DDevice->CreateVolumeTexture( width, height, depth, numMipLevels, 0 /* usage ignored on the 360 */, d3dTextureFormat, pool, retTex->get3DTexPtr(), NULL), "Failed to create volume texture" ); #else D3D9Assert( GFXD3DX.D3DXCreateVolumeTexture( mD3DDevice, width, height, depth, numMipLevels, usage, d3dTextureFormat, pool, retTex->get3DTexPtr() ), "GFXD3D9TextureManager::_createTexture - failed to create volume texture!" ); #endif retTex->mTextureSize.set( width, height, depth ); retTex->mMipLevels = retTex->get3DTex()->GetLevelCount(); // required for 3D texture support - John Kabus retTex->mFormat = format; } else { #ifdef TORQUE_OS_XENON D3D9Assert( mD3DDevice->CreateTexture(width, height, numMipLevels, usage, d3dTextureFormat, pool, retTex->get2DTexPtr(), NULL), "Failed to create texture" ); retTex->mMipLevels = retTex->get2DTex()->GetLevelCount(); #else // Figure out AA settings for depth and render targets D3DMULTISAMPLE_TYPE mstype; DWORD mslevel; switch (antialiasLevel) { case 0 : mstype = D3DMULTISAMPLE_NONE; mslevel = 0; break; case AA_MATCH_BACKBUFFER : mstype = d3d->getMultisampleType(); mslevel = d3d->getMultisampleLevel(); break; default : { mstype = D3DMULTISAMPLE_NONMASKABLE; mslevel = antialiasLevel; #ifdef TORQUE_DEBUG DWORD MaxSampleQualities; d3d->getD3D()->CheckDeviceMultiSampleType(mAdapterIndex, D3DDEVTYPE_HAL, d3dTextureFormat, FALSE, D3DMULTISAMPLE_NONMASKABLE, &MaxSampleQualities); AssertFatal(mslevel < MaxSampleQualities, "Invalid AA level!"); #endif } break; } bool fastCreate = true; // Check for power of 2 textures - this is a problem with FX 5xxx cards // with current drivers - 3/2/05 if( !isPow2(width) || !isPow2(height) ) { fastCreate = false; } if(retTex->mProfile->isZTarget()) { D3D9Assert(mD3DDevice->CreateDepthStencilSurface(width, height, d3dTextureFormat, mstype, mslevel, retTex->mProfile->canDiscard(), retTex->getSurfacePtr(), NULL), "Failed to create Z surface" ); retTex->mFormat = format; // Assigning format like this should be fine. } else { // Try to create the texture directly - should gain us a bit in high // performance cases where we know we're creating good stuff and we // don't want to bother with D3DX - slow function. HRESULT res = D3DERR_INVALIDCALL; if( fastCreate ) { res = mD3DDevice->CreateTexture(width, height, numMipLevels, usage, d3dTextureFormat, pool, retTex->get2DTexPtr(), NULL); } if( !fastCreate || (res != D3D_OK) ) { D3D9Assert( GFXD3DX.D3DXCreateTexture( mD3DDevice, width, height, numMipLevels, usage, d3dTextureFormat, pool, retTex->get2DTexPtr() ), "GFXD3D9TextureManager::_createTexture - failed to create texture!" ); } // If this is a render target, and it wants AA or wants to match the backbuffer (for example, to share the z) // Check the caps though, if we can't stretchrect between textures, use the old RT method. (Which hopefully means // that they can't force AA on us as well.) if (retTex->mProfile->isRenderTarget() && mslevel != 0 && (mDeviceCaps.Caps2 && D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES)) { D3D9Assert(mD3DDevice->CreateRenderTarget(width, height, d3dTextureFormat, mstype, mslevel, false, retTex->getSurfacePtr(), NULL), "GFXD3D9TextureManager::_createTexture - unable to create render target"); } // All done! retTex->mMipLevels = retTex->get2DTex()->GetLevelCount(); } #endif // Get the actual size of the texture... D3DSURFACE_DESC probeDesc; ZeroMemory(&probeDesc, sizeof probeDesc); if( retTex->get2DTex() != NULL ) D3D9Assert( retTex->get2DTex()->GetLevelDesc( 0, &probeDesc ), "Failed to get surface description"); else if( retTex->getSurface() != NULL ) D3D9Assert( retTex->getSurface()->GetDesc( &probeDesc ), "Failed to get surface description"); retTex->mTextureSize.set(probeDesc.Width, probeDesc.Height, 0); int fmt = probeDesc.Format; #if !defined(TORQUE_OS_XENON) GFXREVERSE_LOOKUP( GFXD3D9TextureFormat, GFXFormat, fmt ); retTex->mFormat = (GFXFormat)fmt; #else retTex->mFormat = format; #endif } }
SetAssocArray::SetAssocArray(uint32_t _numLines, uint32_t _assoc, ReplPolicy* _rp, HashFamily* _hf) : rp(_rp), hf(_hf), numLines(_numLines), assoc(_assoc) { array = gm_calloc<Address>(numLines); numSets = numLines/assoc; setMask = numSets - 1; assert(isPow2(numSets)); }
void ClipMap::recenter(Point2F center) { bool wantCompleteRefill = false; if(mNeedRefill || mForceClipmapPurge || Con::getBoolVariable("$forceFullClipmapPurgeEveryFrame", false)) wantCompleteRefill = true; PROFILE_START(ClipMap_recenter); // Reset our budget. mMaxTexelUploadPerRecenter = mClipMapSize * mClipMapSize * 2; AssertFatal(isPow2(mClipMapSize), "ClipMap::recenter - require pow2 clipmap size!"); // Clamp the center to the unit square. /* if(!mTile) { center.x = mClampF(center.x, 0.f, 1.f); center.y = mClampF(center.y, 0.f, 1.f); } */ // Ok, we're going to do toroidal updates on each entry of the clipstack // (except for the cap, which covers the whole texture), based on this // new center point. if( !wantCompleteRefill ) { // Calculate the new texel at most detailed level. Point2F texelCenterF = center * F32(mClipMapSize) * mLevels[0].mScale; Point2I texelCenter((S32)mFloor(texelCenterF.y), (S32)mFloor(texelCenterF.x)); // Update interest region. mImageCache->setInterestCenter(texelCenter); } // Note how many we were at so we can cut off at the right time. S32 lastTexelsUpdated = mTexelsUpdated; // For each texture... for(S32 i=mClipStackDepth-2; i>=0; i--) { ClipStackEntry &cse = mLevels[i]; // Calculate new center point for this texture. Point2F texelCenterF = center * F32(mClipMapSize) * cse.mScale; const S32 texelMin = mClipMapSize/2; //const S32 texelMax = S32(F32(mClipMapSize) * cse.mScale) - texelMin; Point2I texelTopLeft; //if(mTile) //{ texelTopLeft.x = S32(mFloor(texelCenterF.y)) - texelMin; texelTopLeft.y = S32(mFloor(texelCenterF.x)) - texelMin; //} //else //{ // texelTopLeft.x = mClamp(S32(mFloor(texelCenterF.y)), texelMin, texelMax) - texelMin; // texelTopLeft.y = mClamp(S32(mFloor(texelCenterF.x)), texelMin, texelMax) - texelMin; //} // Also, prevent very small updates - the RT changes are costly. Point2I d = cse.mToroidalOffset - texelTopLeft; if(mAbs(d.x) <= 2 && mAbs(d.y) <= 2) { // Update the center; otherwise we get some weird conditions around // edges of the clipmap space. cse.mClipCenter = center; continue; } // This + current toroid offset tells us what regions have to be blasted. RectI oldData(cse.mToroidalOffset, Point2I(mClipMapSize, mClipMapSize)); RectI newData(texelTopLeft, Point2I(mClipMapSize, mClipMapSize)); // Update clipstack level. cse.mClipCenter = center; cse.mToroidalOffset = texelTopLeft; // If we're refilling, that's all we want; continue with next level. if( wantCompleteRefill ) continue; // Make sure we have available data... if(!mImageCache->isDataAvailable(getMipLevel(cse.mScale), newData)) continue; // Alright, determine the set of data we actually need to upload. S32 rectCount = 0; RectI buffer[8]; calculateModuloDeltaBounds(oldData, newData, buffer, &rectCount); AssertFatal(rectCount < 8, "ClipMap::recenter - got too many rects back!"); /*if(rectCount) Con::printf(" issuing %d updates to clipmap level %d (offset=%dx%d)", rectCount, i, texelTopLeft.x, texelTopLeft.y); */ if(rectCount) { if (!mImageCache->beginRectUpdates(cse)) { mForceClipmapPurge = true; return; } //Con::errorf("layer %x, %d updates", &cse, rectCount); // And GO! for(S32 j=0; j<rectCount; j++) { PROFILE_START(ClipMap_recenter_upload); AssertFatal(buffer[j].isValidRect(),"ClipMap::recenter - got invalid rect!"); // Note the rect, so we can then wrap and let the image cache do its thing. RectI srcRegion = buffer[j]; buffer[j].point.x = srcRegion.point.x % mClipMapSize; buffer[j].point.y = srcRegion.point.y % mClipMapSize; AssertFatal(newData.contains(srcRegion), "ClipMap::recenter - got update buffer outside of expected new data bounds."); mTotalUpdates++; mTexelsUpdated += srcRegion.extent.x * srcRegion.extent.y; //Con::printf("updating (%d %d %d %d)", // buffer[j].point.x, buffer[j].point.y, buffer[j].extent.x, buffer[j].extent.y); mImageCache->doRectUpdate(getMipLevel(cse.mScale), cse, srcRegion, buffer[j]); PROFILE_END(); } mImageCache->finishRectUpdates(cse); } // Check if we've overrun our budget. if((mTexelsUpdated - lastTexelsUpdated) > mMaxTexelUploadPerRecenter) { //Con::warnf("ClipMap::recenter - exceeded budget for this frame, deferring till next frame."); break; } } if( wantCompleteRefill ) { fillWithTextureData(); mNeedRefill = false; } PROFILE_END(); }
//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (varTypeIsFloating(parentNode->TypeGet())) { // We can contain a floating point 0.0 constant in a compare instruction switch (parentNode->OperGet()) { default: return false; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: if (childNode->IsIntegralConst(0)) { // TODO-ARM-Cleanup: not tested yet. NYI_ARM("ARM IsContainableImmed for floating point type"); return true; } break; } } else { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->IsIconHandle() && comp->opts.compReloc) return false; ssize_t immVal = childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { default: return false; case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); break; case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); break; case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; break; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); break; #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif } } return false; }
//----------------------------------------------------------------------------- // innerCreateTexture //----------------------------------------------------------------------------- // This just creates the texture, no info is actually loaded to it. We do that later. void GFXGLTextureManager::innerCreateTexture( GFXGLTextureObject *retTex, U32 height, U32 width, U32 depth, GFXFormat format, GFXTextureProfile *profile, U32 numMipLevels, bool forceMips) { // No 24 bit formats. They trigger various oddities because hardware (and Apple's drivers apparently...) don't natively support them. if(format == GFXFormatR8G8B8) format = GFXFormatR8G8B8A8; retTex->mFormat = format; retTex->mIsZombie = false; retTex->mIsNPoT2 = false; GLenum binding = ( (height == 1 || width == 1) && ( height != width ) ) ? GL_TEXTURE_1D : ( (depth == 0) ? GL_TEXTURE_2D : GL_TEXTURE_3D ); if((profile->testFlag(GFXTextureProfile::RenderTarget) || profile->testFlag(GFXTextureProfile::ZTarget)) && (!isPow2(width) || !isPow2(height)) && !depth) retTex->mIsNPoT2 = true; retTex->mBinding = binding; // Bind it PRESERVE_TEXTURE(binding); glBindTexture(retTex->getBinding(), retTex->getHandle()); // Create it // TODO: Reenable mipmaps on render targets when Apple fixes their drivers if(forceMips && !retTex->mIsNPoT2) { retTex->mMipLevels = numMipLevels > 1 ? numMipLevels : 0; } else if(profile->testFlag(GFXTextureProfile::NoMipmap) || profile->testFlag(GFXTextureProfile::RenderTarget) || numMipLevels == 1 || retTex->mIsNPoT2) { retTex->mMipLevels = 1; } else { retTex->mMipLevels = numMipLevels; } if(!retTex->mIsNPoT2) { if(!isPow2(width)) width = getNextPow2(width); if(!isPow2(height)) height = getNextPow2(height); if(depth && !isPow2(depth)) depth = getNextPow2(depth); } AssertFatal(GFXGLTextureInternalFormat[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid internal format"); AssertFatal(GFXGLTextureFormat[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid format"); AssertFatal(GFXGLTextureType[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid type"); //calculate num mipmaps if(retTex->mMipLevels == 0) retTex->mMipLevels = getMaxMipmaps(width, height, 1); glTexParameteri(binding, GL_TEXTURE_MAX_LEVEL, retTex->mMipLevels-1 ); //If it wasn't for problems on amd drivers this next part could be really simplified and we wouldn't need to go through manually creating our //mipmap pyramid and instead just use glGenerateMipmap if(isCompressedFormat(format)) { AssertFatal(binding == GL_TEXTURE_2D, "GFXGLTextureManager::innerCreateTexture - Only compressed 2D textures are supported"); U32 tempWidth = width; U32 tempHeight = height; U32 size = getCompressedSurfaceSize(format,height,width); //Fill compressed images with 0's U8 *pTemp = (U8*)dMalloc(sizeof(U8)*size); dMemset(pTemp,0,size); for(U32 i=0; i< retTex->mMipLevels; i++) { tempWidth = getMax( U32(1), width >> i ); tempHeight = getMax( U32(1), height >> i ); size = getCompressedSurfaceSize(format,width,height,i); glCompressedTexImage2D(binding,i,GFXGLTextureInternalFormat[format],tempWidth,tempHeight,0,size,pTemp); } dFree(pTemp); } else { if(binding == GL_TEXTURE_2D)
int main(int argc, char* argv[]) { int sum, limit; scanf("%d %d", &sum, &limit); std::vector<int> lbArr(limit+1); std::vector<num> lbEvenArr; std::vector<int> set; int countElements = 0, countLb = 0; int lastPow = 0; for (int i = 2; i <=limit; i+=2){ if (i % 2 == 1){lbArr[i] = 1;} else if (isPow2(i)){ lbArr[i] = i; lastPow = i; } else { lbArr[i] = lbArr[i - lastPow]; num tmp; tmp.x = i; tmp.lb = lbArr[i]; lbEvenArr.push_back(tmp); countLb++; } } for (int i = pow(2, floor(log2((double)min(sum,limit)))); i>1; i/=2) { if (sum > i) { sum -= i; set.push_back(i); countElements++; } else break; } quickSort(lbEvenArr, 0, countLb); for (int i = countLb-1; i > 0; i--) { if (sum > lbEvenArr[i].lb) { sum -= lbEvenArr[i].lb; set.push_back(lbEvenArr[i].x); countElements++; } } for (int i = 1; i <= limit; i+=2) { if (sum > 0) { sum -= 1; set.push_back(i); countElements++; }else break; } if (sum == 0) { printf("%d\n", countElements); for (int i = 0; i < countElements; i++) printf("%d ", set[i]); } else printf("%d\n", -1); return 0; }
//------------------------------------------------------------------------ // IsContainableImmed: Is an immediate encodable in-place? // // Return Value: // True if the immediate can be folded into an instruction, // for example small enough and non-relocatable. // // TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction // (vcmp on arm, fcmp on arm64). // bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { if (!varTypeIsFloating(parentNode->TypeGet())) { // Make sure we have an actual immediate if (!childNode->IsCnsIntOrI()) return false; if (childNode->gtIntCon.ImmedValNeedsReloc(comp)) return false; // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal; emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef _TARGET_ARM_ insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) { case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ case GT_CMPXCHG: case GT_LOCKADD: case GT_XADD: return comp->compSupports(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); #endif break; #ifdef _TARGET_ARM64_ case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: return emitter::emitIns_valid_imm_for_cmp(immVal, size); case GT_AND: case GT_OR: case GT_XOR: case GT_TEST_EQ: case GT_TEST_NE: return emitter::emitIns_valid_imm_for_alu(immVal, size); case GT_JCMP: assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal)); return true; #elif defined(_TARGET_ARM_) case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: case GT_CMP: case GT_AND: case GT_OR: case GT_XOR: return emitter::emitIns_valid_imm_for_alu(immVal); #endif // _TARGET_ARM_ #ifdef _TARGET_ARM64_ case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: if (immVal == 0) return true; break; #endif default: break; } } return false; }
void TerrainFile::import( const GBitmap &heightMap, F32 heightScale, const Vector<U8> &layerMap, const Vector<String> &materials, bool flipYAxis ) { AssertFatal( heightMap.getWidth() == heightMap.getHeight(), "TerrainFile::import - Height map is not square!" ); AssertFatal( isPow2( heightMap.getWidth() ), "TerrainFile::import - Height map is not power of two!" ); const U32 newSize = heightMap.getWidth(); if ( newSize != mSize ) { mHeightMap.setSize( newSize * newSize ); mHeightMap.compact(); mSize = newSize; } // Convert the height map to heights. U16 *oBits = mHeightMap.address(); if ( heightMap.getFormat() == GFXFormatR5G6B5 ) { const F32 toFixedPoint = ( 1.0f / (F32)U16_MAX ) * floatToFixed( heightScale ); const U16 *iBits = (const U16*)heightMap.getBits(); if ( flipYAxis ) { for ( U32 i = 0; i < mSize * mSize; i++ ) { U16 height = convertBEndianToHost( *iBits ); *oBits = (U16)mCeil( (F32)height * toFixedPoint ); ++oBits; ++iBits; } } else { for(S32 y = mSize - 1; y >= 0; y--) { for(U32 x = 0; x < mSize; x++) { U16 height = convertBEndianToHost( *iBits ); mHeightMap[x + y * mSize] = (U16)mCeil( (F32)height * toFixedPoint ); ++iBits; } } } } else { const F32 toFixedPoint = ( 1.0f / (F32)U8_MAX ) * floatToFixed( heightScale ); const U8 *iBits = heightMap.getBits(); if ( flipYAxis ) { for ( U32 i = 0; i < mSize * mSize; i++ ) { *oBits = (U16)mCeil( ((F32)*iBits) * toFixedPoint ); ++oBits; iBits += heightMap.getBytesPerPixel(); } } else { for(S32 y = mSize - 1; y >= 0; y--) { for(U32 x = 0; x < mSize; x++) { mHeightMap[x + y * mSize] = (U16)mCeil( ((F32)*iBits) * toFixedPoint ); iBits += heightMap.getBytesPerPixel(); } } } } // Copy over the layer map. AssertFatal( layerMap.size() == mHeightMap.size(), "TerrainFile::import - Layer map is the wrong size!" ); mLayerMap = layerMap; mLayerMap.compact(); // Resolve the materials. _resolveMaterials( materials ); // Rebuild the collision grid map. _buildGridMap(); }
BBTexture* BBTexture_create (const char* name, GLsizei width, GLsizei height, GLenum type, GLenum channels, const void** data) { BBTexture* texture; BB_ASSERT(data); if (!(isPow2(width) && isPow2(height))) { #ifdef BB_DEVEL printf("Error: Invalid texture size.\n"); #endif return NULL; } texture = malloc(sizeof(BBTexture)); if (!texture) return NULL; texture->name = NULL; texture->width = width; texture->height = height; texture->type = type; texture->handle = 0; texture->next = NULL; texture->name = strdup(name); if (!texture->name) { BBTexture_destroy(texture); return NULL; } switch (type) { case GL_TEXTURE_2D: glGenTextures(1, &texture->handle); glBindTexture(GL_TEXTURE_2D, texture->handle); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); { // GLubyte* origBitmap = (GLubyte*)data[0]; // GLubyte* scaledBitmap = NULL; GLuint level = 0; GLuint w = texture->width; GLuint h = texture->height; GLboolean end = GL_FALSE; while (!end) { // if (level > 0) // { // if (!scaledBitmap) // { // GLsizei bpp = 4; // if (channels == GL_RGB) // bpp = 3; // scaledBitmap = (GLubyte*)malloc(w * h * bpp * sizeof(GLubyte)); // if (!scaledBitmap) // break; // } // generateMipMap(origBitmap, texture->width, texture->height, channels, level, scaledBitmap); // glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, scaledBitmap); // } // else // { // glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, origBitmap); // } BB_ASSERT(level < MAX_MIPMAPS && data[level]); glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, data[level]); w >>= 1; h >>= 1; if (w == 0 && h == 0) { end = GL_TRUE; } else { if (w < 1) w = 1; if (h < 1) h = 1; level++; } } // free(scaledBitmap); } break; case GL_TEXTURE_CUBE_MAP: glGenTextures(1, &texture->handle); glBindTexture(GL_TEXTURE_CUBE_MAP, texture->handle); glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[0]); glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[1]); glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[2]); glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[3]); glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[4]); glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[5]); break; default: BBTexture_destroy(texture); return NULL; } return texture; }
// Internal function - Called to load up a texture map - file is known to exist Texture *textureFromBitmap(bitmap *loadbm, Texture *tex) { bitmap *swizzleBm, *scaleBm, *resizeCanvasSrc; bool mustSwizzle = false; bool mustScale = false; // Step 1: Check for a need to swizzle (if the video card doesn't support this texture mode) uintf dataType = loadbm->flags & (bitmap_DataTypeMask | bitmap_DataInfoMask); // ### This code is not yet complete, must leave this block with 'swizzleBM' pointing to swizzled bitmap data // If Video card only handles 'Power-of-2' texture dimensions bool resizeCanvas=false; // if (GLESWarnings) //!(videoFeatures & videodriver_nonP2Tex)) { uintf newCanvasWidth = loadbm->width; uintf newCanvasHeight = loadbm->height; if (!isPow2(loadbm->width)) { resizeCanvas=true; newCanvasWidth=nextPow2(loadbm->width); } if (!isPow2(loadbm->height)) { resizeCanvas=true; newCanvasHeight=nextPow2(loadbm->height); } if (resizeCanvas) { resizeCanvasSrc = loadbm; loadbm = newbitmap("resizeCanvasP2Tex",newCanvasWidth,newCanvasHeight,bitmap_ARGB32); uintf x,y; uint32 *src32 = (uint32 *)resizeCanvasSrc->pixel; uint32 *dst32 = (uint32 *)loadbm->pixel; for (y=0; y<resizeCanvasSrc->height; y++) { uint32 *src = &src32[y*(resizeCanvasSrc->width)]; uint32 *dst = &dst32[y*newCanvasWidth]; for (x=0; x<resizeCanvasSrc->width; x++) *dst++ = *src++; for (;x<newCanvasWidth; x++) *dst++ = 0; } for (;y<newCanvasHeight; y++) { uint32 *dst = &dst32[y*newCanvasWidth]; for (x=0; x<newCanvasWidth; x++) *dst++=0; } } /* // Work out X scale uintf size = 1; while (size<=maxtexwidth) { if (newx<=size) break; size <<=1; } if (size>maxtexwidth) size = maxtexwidth; newx = size; // Work out Y scale size = 1; while (size<=maxtexheight) { if (newy<=size) break; size <<=1; } if (size>maxtexheight) size = maxtexheight; newy = size; */ } // Step 2: Check if we need to resize - this may change swizzle mode uintf newx = loadbm->width; uintf newy = loadbm->height; if (newx>maxtexwidth) newx = maxtexwidth; if (newy>maxtexheight) newy = maxtexheight; if (newx!=loadbm->width || newy!=loadbm->height) { dataType = bitmap_DataTypeRGB | bitmap_RGB_32bit; mustSwizzle = true; mustScale = true; } if (mustSwizzle) { dataType |= loadbm->flags & bitmap_AlphaMask; swizzleBm = SwizzleBitmap(loadbm, dataType); } else swizzleBm = loadbm; if (mustScale) { // Bitmap needs to be resized before hardware will accept it scaleBm = scalebitmap(swizzleBm,newx,newy); } else scaleBm = swizzleBm; // If we don't have a texture provided, create a new one if (!tex) tex = newTexture(NULL, 0, 0); downloadbitmaptex(tex, scaleBm, 0); estimatedtexmemused += tex->texmemused; if (mustScale) deleteBitmap(scaleBm); if (mustSwizzle) deleteBitmap(swizzleBm); if (resizeCanvas) { deleteBitmap(loadbm); loadbm = resizeCanvasSrc; tex->flags |= texture_canvasSize; tex->UVscale.x = (float)loadbm->width / (float)tex->width; tex->UVscale.y = (float)loadbm->height/ (float)tex->height; } return tex; }
/* Isotropic/anisotropic EWA mip-map texture map class based on PBRT */ MIPMap::MIPMap(int width, int height, Spectrum *pixels, EFilterType filterType, EWrapMode wrapMode, Float maxAnisotropy) : m_width(width), m_height(height), m_filterType(filterType), m_wrapMode(wrapMode), m_maxAnisotropy(maxAnisotropy) { Spectrum *texture = pixels; if (filterType != ENone && (!isPow2(width) || !isPow2(height))) { m_width = (int) roundToPow2((uint32_t) width); m_height = (int) roundToPow2((uint32_t) height); /* The texture needs to be up-sampled */ Spectrum *texture1 = new Spectrum[m_width*height]; /* Re-sample into the X direction */ ResampleWeight *weights = resampleWeights(width, m_width); for (int y=0; y<height; y++) { for (int x=0; x<m_width; x++) { texture1[x+m_width*y] = Spectrum(0.0f); for (int j=0; j<4; j++) { int pos = weights[x].firstTexel + j; if (pos < 0 || pos >= height) { if (wrapMode == ERepeat) pos = modulo(pos, width); else if (wrapMode == EClamp) pos = clamp(pos, 0, width-1); } if (pos >= 0 && pos < width) texture1[x+m_width*y] += pixels[pos+y*width] * weights[x].weight[j]; } } } delete[] weights; delete[] pixels; /* Re-sample into the Y direction */ texture = new Spectrum[m_width*m_height]; weights = resampleWeights(height, m_height); memset(texture, 0, sizeof(Spectrum)*m_width*m_height); for (int x=0; x<m_width; x++) { for (int y=0; y<m_height; y++) { for (int j=0; j<4; j++) { int pos = weights[y].firstTexel + j; if (pos < 0 || pos >= height) { if (wrapMode == ERepeat) pos = modulo(pos, height); else if (wrapMode == EClamp) pos = clamp(pos, 0, height-1); } if (pos >= 0 && pos < height) texture[x+m_width*y] += texture1[x+pos*m_width] * weights[y].weight[j]; } } } for (int y=0; y<m_height; y++) for (int x=0; x<m_width; x++) texture[x+m_width*y].clampNegative(); delete[] weights; delete[] texture1; } if (m_filterType != ENone) m_levels = 1 + log2i((uint32_t) std::max(width, height)); else m_levels = 1; m_pyramid = new Spectrum*[m_levels]; m_pyramid[0] = texture; m_levelWidth = new int[m_levels]; m_levelHeight= new int[m_levels]; m_levelWidth[0] = m_width; m_levelHeight[0] = m_height; /* Generate the mip-map hierarchy */ for (int i=1; i<m_levels; i++) { m_levelWidth[i] = std::max(1, m_levelWidth[i-1]/2); m_levelHeight[i] = std::max(1, m_levelHeight[i-1]/2); m_pyramid[i] = new Spectrum[m_levelWidth[i] * m_levelHeight[i]]; for (int y = 0; y < m_levelHeight[i]; y++) { for (int x = 0; x < m_levelWidth[i]; x++) { m_pyramid[i][x+y*m_levelWidth[i]] = ( getTexel(i-1, 2*x, 2*y) + getTexel(i-1, 2*x+1, 2*y) + getTexel(i-1, 2*x, 2*y+1) + getTexel(i-1, 2*x+1, 2*y+1)) * 0.25f; } } } if (m_filterType == EEWA) { m_weightLut = static_cast<Float *>(allocAligned(sizeof(Float)*MIPMAP_LUTSIZE)); for (int i=0; i<MIPMAP_LUTSIZE; ++i) { Float pos = (Float) i / (Float) (MIPMAP_LUTSIZE-1); m_weightLut[i] = std::exp(-2.0f * pos) - std::exp(-2.0f); } } }
void GFont::makeFont(int charsetSize, const String& infileBase, String outfile) { debugAssert(FileSystem::exists(infileBase + ".tga")); debugAssert(FileSystem::exists(infileBase + ".ini")); debugAssert(charsetSize == 128 || charsetSize == 256); if (outfile == "") { outfile = infileBase + ".fnt"; } TextInput ini(infileBase + ".ini"); BinaryOutput out(outfile, G3D_LITTLE_ENDIAN); ini.readSymbol("["); ini.readSymbol("Char"); ini.readSymbol("Widths"); ini.readSymbol("]"); // Version out.writeInt32(2); // Number of characters out.writeInt32(charsetSize); // Character widths for (int i = 0; i < charsetSize; ++i) { int n = (int)ini.readNumber(); (void)n; debugAssert(n == i); ini.readSymbol("="); int cw = (int)ini.readNumber(); out.writeInt16(cw); } // Load provided source image shared_ptr<Image> image = Image::fromFile(infileBase + ".tga"); debugAssert(isPow2(image->width())); debugAssert(isPow2(image->height())); image->convertToR8(); // Convert to image buffer so can access bytes directly shared_ptr<PixelTransferBuffer> imageBuffer = image->toPixelTransferBuffer(); debugAssert(imageBuffer->format() == ImageFormat::R8()); // Autodetect baseline from capital E const uint8* p = static_cast<const uint8*>(imageBuffer->mapRead()); { // Size of a character, in texels int w = imageBuffer->width() / 16; int x0 = ('E' % 16) * w; int y0 = ('E' / 16) * w; int baseline = w * 2 / 3; bool done = false; // Search up from the bottom for the first pixel for (int y = y0 + w - 1; (y >= y0) && ! done; --y) { for (int x = x0; (x < x0 + w) && ! done; ++x) { if (p[x + y * w * 16] != 0) { baseline = y - y0 + 1; done = true; } } } out.writeUInt16(baseline); } // Texture width out.writeUInt16(imageBuffer->width()); out.writeBytes(p, square(imageBuffer->width()) * 256 / charsetSize); imageBuffer->unmap(); out.compress(); out.commit(false); }
T profileReduce(ReduceType datatype, cl_int n, int numThreads, int numBlocks, int maxThreads, int maxBlocks, int whichKernel, int testIterations, bool cpuFinalReduction, int cpuFinalThreshold, double* dTotalTime, T* h_odata, cl_mem d_idata, cl_mem d_odata) { T gpu_result = 0; bool needReadBack = true; cl_kernel finalReductionKernel[10]; int finalReductionIterations=0; //shrLog("Profile Kernel %d\n", whichKernel); cl_kernel reductionKernel = getReductionKernel(datatype, whichKernel, numThreads, isPow2(n) ); clSetKernelArg(reductionKernel, 0, sizeof(cl_mem), (void *) &d_idata); clSetKernelArg(reductionKernel, 1, sizeof(cl_mem), (void *) &d_odata); clSetKernelArg(reductionKernel, 2, sizeof(cl_int), &n); clSetKernelArg(reductionKernel, 3, sizeof(T) * numThreads, NULL); if( !cpuFinalReduction ) { int s=numBlocks; int threads = 0, blocks = 0; int kernel = (whichKernel == 6) ? 5 : whichKernel; while(s > cpuFinalThreshold) { getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads); finalReductionKernel[finalReductionIterations] = getReductionKernel(datatype, kernel, threads, isPow2(s) ); clSetKernelArg(finalReductionKernel[finalReductionIterations], 0, sizeof(cl_mem), (void *) &d_odata); clSetKernelArg(finalReductionKernel[finalReductionIterations], 1, sizeof(cl_mem), (void *) &d_odata); clSetKernelArg(finalReductionKernel[finalReductionIterations], 2, sizeof(cl_int), &n); clSetKernelArg(finalReductionKernel[finalReductionIterations], 3, sizeof(T) * numThreads, NULL); if (kernel < 3) s = (s + threads - 1) / threads; else s = (s + (threads*2-1)) / (threads*2); finalReductionIterations++; } } size_t globalWorkSize[1]; size_t localWorkSize[1]; for (int i = 0; i < testIterations; ++i) { gpu_result = 0; clFinish(cqCommandQueue); if(i>0) shrDeltaT(1); // execute the kernel globalWorkSize[0] = numBlocks * numThreads; localWorkSize[0] = numThreads; ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue,reductionKernel, 1, 0, globalWorkSize, localWorkSize, 0, NULL, NULL); // check if kernel execution generated an error oclCheckError(ciErrNum, CL_SUCCESS); if (cpuFinalReduction) { // sum partial sums from each block on CPU // copy result from device to host clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, numBlocks * sizeof(T), h_odata, 0, NULL, NULL); for(int i=0; i<numBlocks; i++) { gpu_result += h_odata[i]; } needReadBack = false; } else { // sum partial block sums on GPU int s=numBlocks; int kernel = (whichKernel == 6) ? 5 : whichKernel; int it = 0; while(s > cpuFinalThreshold) { int threads = 0, blocks = 0; getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads); globalWorkSize[0] = threads * blocks; localWorkSize[0] = threads; ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue, finalReductionKernel[it], 1, 0, globalWorkSize, localWorkSize, 0, NULL, NULL); oclCheckError(ciErrNum, CL_SUCCESS); if (kernel < 3) s = (s + threads - 1) / threads; else s = (s + (threads*2-1)) / (threads*2); it++; } if (s > 1) { // copy result from device to host clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, s * sizeof(T), h_odata, 0, NULL, NULL); for(int i=0; i < s; i++) { gpu_result += h_odata[i]; } needReadBack = false; } } clFinish(cqCommandQueue); if(i>0) *dTotalTime += shrDeltaT(1); } if (needReadBack) { // copy final sum from device to host clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, sizeof(T), &gpu_result, 0, NULL, NULL); } // Release the kernels clReleaseKernel(reductionKernel); if( !cpuFinalReduction ) { for(int it=0; it<finalReductionIterations; ++it) { clReleaseKernel(finalReductionKernel[it]); } } return gpu_result; }