Exemple #1
0
//--------------------------------------------------------------------------
void GBitmap::allocateBitmap(const U32 in_width, const U32 in_height, const bool in_extrudeMipLevels, const GFXFormat in_format )
{
   //-------------------------------------- Some debug checks...
   U32 svByteSize = mByteSize;
   U8 *svBits = mBits;

   AssertFatal(in_width != 0 && in_height != 0, "GBitmap::allocateBitmap: width or height is 0");

   if (in_extrudeMipLevels == true) 
   {
      AssertFatal(isPow2(in_width) == true && isPow2(in_height) == true, "GBitmap::GBitmap: in order to extrude mip levels, bitmap w/h must be pow2");
   }

   mInternalFormat = in_format;
   mWidth          = in_width;
   mHeight         = in_height;

   mBytesPerPixel = 1;
   switch (mInternalFormat) 
   {
     case GFXFormatA8:
     case GFXFormatL8:           mBytesPerPixel = 1;
      break;
     case GFXFormatR8G8B8:       mBytesPerPixel = 3;
      break;
     case GFXFormatR8G8B8X8:
     case GFXFormatR8G8B8A8:     mBytesPerPixel = 4;
      break;
     case GFXFormatR5G6B5:
     case GFXFormatR5G5B5A1:     mBytesPerPixel = 2;
      break;
     default:
      AssertFatal(false, "GBitmap::GBitmap: misunderstood format specifier");
      break;
   }

   // Set up the mip levels, if necessary...
   mNumMipLevels       = 1;
   U32 allocPixels = in_width * in_height * mBytesPerPixel;
   mMipLevelOffsets[0] = 0;


   if (in_extrudeMipLevels == true) 
   {
      U32 currWidth  = in_width;
      U32 currHeight = in_height;

      do 
      {
         mMipLevelOffsets[mNumMipLevels] = mMipLevelOffsets[mNumMipLevels - 1] +
                                         (currWidth * currHeight * mBytesPerPixel);
         currWidth  >>= 1;
         currHeight >>= 1;
         if (currWidth  == 0) currWidth  = 1;
         if (currHeight == 0) currHeight = 1;

         mNumMipLevels++;
         allocPixels += currWidth * currHeight * mBytesPerPixel;
      } while (currWidth != 1 || currHeight != 1);
   }
PrecomputedRandom::PrecomputedRandom(int dataSize, uint32 seed) : 
 Random((void*)NULL),
 m_hemiUniform(NULL),
 m_sphereBits(NULL),
 m_modMask(dataSize - 1),
 m_freeData(true) {

    alwaysAssertM(isPow2(dataSize), "dataSize must be a power of 2");
    m_index = seed & m_modMask;

    HemiUniformData* h;
    SphereBitsData* s;
    m_hemiUniform = h = (HemiUniformData*) System::malloc(sizeof(HemiUniformData) * dataSize);
    m_sphereBits = s = (SphereBitsData*) System::malloc(sizeof(SphereBitsData) * dataSize);

    Random r;

    for (int i = 0; i < dataSize; ++i) {
        h[i].uniform = r.uniform();
        r.cosHemi(h[i].cosHemiX, h[i].cosHemiY, h[i].cosHemiZ); 

        s[i].bits = r.bits();
        r.sphere(s[i].sphereX, s[i].sphereY, s[i].sphereZ);         
    }

}
Exemple #3
0
GLsizei ComputePitch(GLsizei width, GLint internalformat, GLint alignment)
{
    ASSERT(alignment > 0 && isPow2(alignment));

    GLsizei rawPitch = ComputePixelSize(internalformat) * width;
    return (rawPitch + alignment - 1) & ~(alignment - 1);
}
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer.
/// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere
void CRoutine_Sum_NVidia::Init(int n)
{
	int status = CL_SUCCESS;

	mInputSize = n;
	mBufferSize = n;

	// The NVidia SDK kernel on which this routine is based is designed only for power-of-two
	// sized buffers. Because of this, we'll create internal buffers that round up to the
	// next highest power of two.
	if(!isPow2(mBufferSize))
		mBufferSize = nextPow2(mBufferSize);

	// TODO: Workaround for issue 32 in which kernel fails to compute sums for N = [33 - 64]
	// https://github.com/bkloppenborg/liboi/issues/32
	if(mBufferSize < 128)
		mBufferSize = 128;

	BuildKernels();

	if(mTempBuffer1 == NULL)
	{
		mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status);
		CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.");
	}

	if(mTempBuffer2 == NULL)
	{
		mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status);
		CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.");
	}
}
Exemple #5
0
int main() {
	uint64 a;
	int k,l;
	scanf("%d",&k); 
	while(k>0) {
		l=0;
		scanf("%llu",&a);
		while(a>1) {
			if( isPow2(a) ) {
				a/=2;
			} else {
				a=a-(1LLU<<msb(a));
			}
			l++;
//			printf("%llu\n",a);
		}
		if(l%2) {
			printf("Louise\n");
		} else {
			printf("Richard\n");
		}
		k--;
	}
	return 0;
}
Exemple #6
0
	msg getCodeFromHam (msg t) {
		msg h;//unde o sa extrag codul initial
		int i, j, fromPos;
		int pNo = 0;

		while (pow2 (pNo) < t.len){
			pNo ++;
		}
		h.len = t.len - pNo; //nr de biti de paritate
		for (i = 0; i < h.len; i++){
			h.payload[i] = 0;
		}



		for (i = 0; i < 8; i++){
			fromPos = 1;
			for (j = 0; j < h.len; j++){
				fromPos ++;
				while (isPow2(fromPos) == 1){
					fromPos ++;
				}
				if (getBit (t.payload [fromPos - 1], i) == 1){
					setBit (&h.payload [j], i);
				}
			}
		}

		return h;


	}
Exemple #7
0
GLsizei ComputePitch(GLsizei width, GLenum format, GLenum type, GLint alignment)
{
    ASSERT(alignment > 0 && isPow2(alignment));

    GLsizei rawPitch = ComputePixelSize(format, type) * width;
    return (rawPitch + alignment - 1) & ~(alignment - 1);
}
Exemple #8
0
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer.
/// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere
void CRoutine_Sum::Init(int n)
{
	int err = CL_SUCCESS;

	mInputSize = n;
	mBufferSize = n;

	// The NVidia SDK kernel on which this routine is based is designed only for power-of-two
	// sized buffers. Because of this, we'll create internal buffers that round up to the
	// next highest power of two.
	if(!isPow2(mBufferSize))
		mBufferSize = nextPow2(mBufferSize);

	// TODO: Workaround for issue 32
	// https://github.com/bkloppenborg/liboi/issues/32
	if(mBufferSize < 128)
		mBufferSize = 128;

	BuildKernels();

	if(mTempBuffer1 == NULL)
	{
		mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err);
		mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err);
		COpenCL::CheckOCLError("Could not create parallel sum temporary buffer.", err);
	}
}
/**
 * Evaluate a row of the gram matrix
 * @param d_xtraindata device pointer to the training set
 * @param d_dottraindata device pointer to the array containing the dot product of the row with itself
 * @param d_kernelrow device pointer that will store the array extracted from d_xtraindata.
 * @param d_kerneldot device pointer that will store the result of the kernel evaluation
 * @param d_kdata device pointer to the matrix that stores the cached values
 * @param gid index that points to the point in d_xtraindata to be  calculated
 * @param cacheid index that points to the location in cache that will keep the results
 * @param ntraining number of training samples in the training set
 * @param nfeatures number of features in the training samples
 * @param beta value of the parameter of the RBF kernel
 * @param a if using polynomial or sigmoid kernel the value of a x_i x_j
 * @param b if using polynomial or sigmoid kernel the value of b
 * @param d if using polynomial kernel
 * @param kernelcode code that indicates the kernel type to run
 */
void kerneleval ( 	float* d_xtraindata,
					float* d_dottraindata,
					float* d_kernelrow,
					float* d_kerneldot,
					float* d_kdata,
					int gid,
					int cacheid,
					int ntraining,
					int nfeatures,
					float beta,
					float a,
					float b,
					float d,
					int kernelcode)
{

	int numThreads = (nfeatures < MAXTHREADS*2) ? nextPow2((nfeatures + 1)/ 2) : MAXTHREADS;
	int numBlocks = (nfeatures + (numThreads * 2 - 1)) / (numThreads * 2);
	int numBlocksRed = min(MAXBLOCKS, numBlocks);

	dim3 dimBlockKernelRow(numThreads, 1, 1);
	dim3 dimGridKernelRow(numBlocksRed, 1, 1);

	int smemSize = 0;
	bool isNtrainingPow2=isPow2(nfeatures);


	if(isNtrainingPow2)
	{
		switch (numThreads)
		{
			case 512:
				ExtractKernelRow <512,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case 256:
				ExtractKernelRow <256,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case 128:
				ExtractKernelRow <128,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case 64:
				ExtractKernelRow <64,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case 32:
				ExtractKernelRow <32,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case 16:
				ExtractKernelRow <16,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case  8:
				ExtractKernelRow <8,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case  4:
				ExtractKernelRow <4,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case  2:
				ExtractKernelRow <2,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
			case  1:
				ExtractKernelRow <1,true><<< dimGridKernelRow, dimBlockKernelRow, smemSize >>>(d_xtraindata,d_kernelrow,	gid,ntraining,nfeatures); break;
		}
	}
	else
	{
		switch (numThreads)
PrecomputedRandom::PrecomputedRandom(const HemiUniformData* data1, const SphereBitsData* data2, int dataSize, uint32 seed) : 
 Random((void*)NULL),
 m_hemiUniform(data1),
 m_sphereBits(data2),
 m_modMask(dataSize - 1),
 m_freeData(false) {

    m_index = seed & m_modMask;
    alwaysAssertM(isPow2(dataSize), "dataSize must be a power of 2");
}
Exemple #11
0
void* System::alignedMalloc(size_t bytes, size_t alignment) {
    alwaysAssertM(isPow2(alignment), "alignment must be a power of 2");

    // We must align to at least a word boundary.
    alignment = iMax((int)alignment, sizeof(void *));

    // Pad the allocation size with the alignment size and the
    // size of the redirect pointer.
    size_t totalBytes = bytes + alignment + sizeof(intptr_t);

    void* truePtr = System::malloc(totalBytes);

    if (!truePtr) {
        // malloc returned NULL
        return NULL;
    }

    debugAssert(isValidHeapPointer(truePtr));
    #ifdef G3D_WIN32
    // The blocks we return will not be valid Win32 debug heap
    // pointers because they are offset 
    //  debugAssert(_CrtIsValidPointer(truePtr, totalBytes, TRUE) );
    #endif

    // The return pointer will be the next aligned location (we must at least
    // leave space for the redirect pointer, however).
    char* alignedPtr = ((char*)truePtr)+ sizeof(intptr_t);

#if 0
    // 2^n - 1 has the form 1111... in binary.
    uint32 bitMask = (alignment - 1);

    // Advance forward until we reach an aligned location.
    while ((((intptr_t)alignedPtr) & bitMask) != 0) {
        alignedPtr += sizeof(void*);
    }
#else
    alignedPtr += alignment - (((intptr_t)alignedPtr) & (alignment - 1));
    // assert((alignedPtr - truePtr) + bytes <= totalBytes);
#endif

    debugAssert((alignedPtr - truePtr) + bytes <= totalBytes);

    // Immediately before the aligned location, write the true array location
    // so that we can free it correctly.
    intptr_t* redirectPtr = (intptr_t*)(alignedPtr - sizeof(intptr_t));
    redirectPtr[0] = (intptr_t)truePtr;

    debugAssert(isValidHeapPointer(truePtr));

    #ifdef G3D_WIN32
        debugAssert( _CrtIsValidPointer(alignedPtr, bytes, TRUE) );
    #endif
    return (void*)alignedPtr;
}
Exemple #12
0
void	Boot::setBootBlock(BootBlock *bootBlock)
{
  _bootBlock = bootBlock;
  _clusterSize = _bootBlock->bytePerSector * _bootBlock->sectorPerCluster;
  if (isPow2(_bootBlock->clusterIndexRecord))
    _indexRecordSize = _bootBlock->clusterIndexRecord * _clusterSize;
  else {
    DEBUG(INFO, "Invalid index record size in BootSector\n");
    ;
  }
}
Exemple #13
0
static bool
getGDGT(SparseGTInfo *gtInfo,
        const SparseExtentHeader *hdr)
{
	if (hdr->grainSize < 1 || hdr->grainSize > 128 || !isPow2(hdr->grainSize)) {
		return false;
	}
	/* disklib supports only 512 GTEs per GT (=> 4KB GT size).  Streaming is more flexible. */
	if (hdr->numGTEsPerGT < VMDK_SECTOR_SIZE / sizeof(uint32_t) || !isPow2(hdr->numGTEsPerGT)) {
		return false;
	}
	gtInfo->lastGrainNr = hdr->capacity / hdr->grainSize;
	gtInfo->lastGrainSize = (hdr->capacity & (hdr->grainSize - 1)) * VMDK_SECTOR_SIZE;

	{
		uint64_t GTEs = gtInfo->lastGrainNr + (gtInfo->lastGrainSize != 0);
		/* Number of GTEs must be less than 2^32.  Actually capacity must be less than 2^32 (2TB) for everything except streamOptimized format... */
		uint32_t GTs = CEILING(GTEs, hdr->numGTEsPerGT);
		uint32_t GDsectors = CEILING(GTs * sizeof(uint32_t), VMDK_SECTOR_SIZE);
		uint32_t GTsectors = CEILING(hdr->numGTEsPerGT * sizeof(uint32_t), VMDK_SECTOR_SIZE);
		uint32_t *gd = calloc(GDsectors + GTsectors * GTs, VMDK_SECTOR_SIZE);
		uint32_t *gt;

		if (!gd) {
			return false;
		}
		gt = gd + GDsectors * VMDK_SECTOR_SIZE / sizeof(uint32_t);
		gtInfo->GTEs = GTEs;
		gtInfo->GTs = GTs;
		gtInfo->GDsectors = GDsectors;
		gtInfo->gd = gd;
		gtInfo->GTsectors = GTsectors;
		gtInfo->gt = gt;
	}
	return true;
}
void CRoutine_Sum_NVidia::BuildKernels()
{
	int whichKernel = 6;
	int numBlocks = 0;
	int numThreads = 0;
#ifdef __APPLE__
	int maxThreads = 64;
#else
	int maxThreads = 128;
#endif
	int maxBlocks = 64;
	int cpuFinalThreshold = 1;

	getNumBlocksAndThreads(whichKernel, mBufferSize, maxBlocks, maxThreads, numBlocks, numThreads);
	BuildReductionKernel(whichKernel, numThreads, isPow2(mBufferSize) );
	mBlocks.push_back(numBlocks);
	mThreads.push_back(numThreads);
	mReductionPasses += 1;

	int s = numBlocks;
	int threads = 0, blocks = 0;
	int kernel = (whichKernel == 6) ? 5 : whichKernel;

	while(s > cpuFinalThreshold)
	{
		getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads);
		BuildReductionKernel(kernel, threads, isPow2(s) );
		mBlocks.push_back(blocks);
		mThreads.push_back(threads);

		s = (s + (threads*2-1)) / (threads*2);
		mReductionPasses += 1;
	}

	mFinalS = s;
}
Exemple #15
0
ImageBuffer::ImageBuffer(const ImageFormat* format, int width, int height, int depth, int rowAlignment) 
    : m_buffer(NULL)
    , m_format(format)
    , m_rowAlignment(rowAlignment)
    , m_rowStride(0)
    , m_width(width)
    , m_height(height)
    , m_memoryManager(NULL)
    , m_depth(depth) {

    debugAssert(m_format);
    debugAssert(isPow2(m_rowAlignment));
    debugAssert(m_width > 0);
    debugAssert(m_height > 0);
    debugAssert(m_depth > 0);
}
Exemple #16
0
/**
 * Check
 *  - BOOT_MEDIA_DESCRIPTOR_ID in mediaDescriptorId
 *  - BOOT_FAT_NTFS_SIGNATURE in signature
 */
bool			Boot::isBootBlock(uint64_t offset)
{
  std::ostringstream	expectedMediaID;
  BootBlock		*bootBlock = new BootBlock;

  _vfile->seek(offset);
  _vfile->read(bootBlock, BOOT_BLOCK_SIZE);
  expectedMediaID << BOOT_MEDIA_DESCRIPTOR_ID;
  if ((expectedMediaID.str() == std::string(bootBlock->mediaDescriptorId))
      && (bootBlock->signature == BOOT_FAT_NTFS_SIGNATURE)) {
    setBootBlock(bootBlock);

#if __WORDSIZE == 64
    DEBUG(VERBOSE, "NTFS Boot block found at offset 0x%lx in vfile %s\n", offset, _vfile->node()->absolute().c_str());
#else
    DEBUG(VERBOSE, "NTFS Boot block found at offset 0x%llx in vfile %s\n", offset, _vfile->node()->absolute().c_str());
#endif
    DEBUG(VERBOSE, "Byte per sector: %u\n", bootBlock->bytePerSector);
    DEBUG(VERBOSE, "Sector per cluster: %u\n", bootBlock->sectorPerCluster);
#if __WORDSIZE == 64
    DEBUG(VERBOSE, "Number of sector: %lx\n", bootBlock->numberOfSector);
    DEBUG(VERBOSE, "Start Mft: 0x%lx\n", bootBlock->startMft);
    DEBUG(VERBOSE, "Start Mft 16b Mirror: 0x%lx\n", bootBlock->startMftMirr);
#else
    DEBUG(VERBOSE, "Number of sector: %llx\n", bootBlock->numberOfSector);
    DEBUG(VERBOSE, "Start Mft: 0x%llx\n", bootBlock->startMft);
    DEBUG(VERBOSE, "Start Mft 16b Mirror: 0x%llx\n", bootBlock->startMftMirr);
#endif
    DEBUG(VERBOSE, "Cluster Mft record: %u\n", bootBlock->clusterMftRecord);
    if (isPow2(bootBlock->clusterMftRecord)) {
      _mftEntrySize = bootBlock->clusterMftRecord * _clusterSize;
      DEBUG(VERBOSE,"MFT Entry size: %u\n", _mftEntrySize);
    }
    else {
      DEBUG(VERBOSE, "MFT Entry size not valid in bootblock");
      DEBUG(VERBOSE, "Will search for it");
      ;
    }
    DEBUG(VERBOSE, "Cluster Index record: %u\n", bootBlock->clusterIndexRecord);
    DEBUG(VERBOSE, "Cluster Size: %u\n", _clusterSize);
  }
  else {
    delete bootBlock;;
    return false;
  }
  return true;
}
ZArray::ZArray(uint32_t _numLines, uint32_t _ways, uint32_t _candidates, ReplPolicy* _rp, HashFamily* _hf) //(int _size, int _lineSize, int _assoc, int _zassoc, ReplacementPolicy<T>* _rp, int _hashType)
    : rp(_rp), hf(_hf), numLines(_numLines), ways(_ways), cands(_candidates)
{
    assert_msg(ways > 1, "zcaches need >=2 ways to work");
    assert_msg(cands >= ways, "candidates < ways does not make sense in a zcache");
    assert_msg(numLines % ways == 0, "number of lines is not a multiple of ways");

    //Populate secondary parameters
    numSets = numLines/ways;
    assert_msg(isPow2(numSets), "must have a power of 2 # sets, but you specified %d", numSets);
    setMask = numSets - 1;

    lookupArray = gm_calloc<uint32_t>(numLines);
    array = gm_calloc<Address>(numLines);
    for (uint32_t i = 0; i < numLines; i++) {
        lookupArray[i] = i;  // start with a linear mapping; with swaps, it'll get progressively scrambled
    }
    swapArray = gm_calloc<uint32_t>(cands/ways + 2);  // conservative upper bound (tight within 2 ways)
}
//-----------------------------------------------------------------------------
// _innerCreateTexture
//-----------------------------------------------------------------------------
void GFXD3D9TextureManager::_innerCreateTexture( GFXD3D9TextureObject *retTex, 
                                               U32 height, 
                                               U32 width, 
                                               U32 depth,
                                               GFXFormat format, 
                                               GFXTextureProfile *profile, 
                                               U32 numMipLevels,
                                               bool forceMips,
                                               S32 antialiasLevel)
{
   GFXD3D9Device* d3d = static_cast<GFXD3D9Device*>(GFX);

   // Some relevant helper information...
   bool supportsAutoMips = GFX->getCardProfiler()->queryProfile("autoMipMapLevel", true);
   
   DWORD usage = 0;   // 0, D3DUSAGE_RENDERTARGET, or D3DUSAGE_DYNAMIC
   D3DPOOL pool = D3DPOOL_DEFAULT;

   retTex->mProfile = profile;

   D3DFORMAT d3dTextureFormat = GFXD3D9TextureFormat[format];

#ifndef TORQUE_OS_XENON
   if( retTex->mProfile->isDynamic() )
   {
      usage = D3DUSAGE_DYNAMIC;
   }
   else
   {
      usage = 0;
      pool = d3d->isD3D9Ex() ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED;
   }

   if( retTex->mProfile->isRenderTarget() )
   {
      pool = D3DPOOL_DEFAULT;
      usage |= D3DUSAGE_RENDERTARGET;
   }

   if(retTex->mProfile->isZTarget())
   {
      usage |= D3DUSAGE_DEPTHSTENCIL;
      pool = D3DPOOL_DEFAULT;
   }

   if( retTex->mProfile->isSystemMemory() )
   {
      pool = D3DPOOL_SYSTEMMEM;
   }

   if( supportsAutoMips && 
       !forceMips &&
       !retTex->mProfile->isSystemMemory() &&
       numMipLevels == 0 &&
       !(depth > 0) )
   {
      usage |= D3DUSAGE_AUTOGENMIPMAP;
   }
#else
   if(retTex->mProfile->isRenderTarget())
   {
      d3dTextureFormat = (D3DFORMAT)MAKELEFMT(d3dTextureFormat);
   }
#endif

   // Set the managed flag...
   retTex->isManaged = (pool == D3DPOOL_MANAGED) || d3d->isD3D9Ex();
   
   if( depth > 0 )
   {
#ifdef TORQUE_OS_XENON
      D3D9Assert( mD3DDevice->CreateVolumeTexture( width, height, depth, numMipLevels, 0 /* usage ignored on the 360 */, 
         d3dTextureFormat, pool, retTex->get3DTexPtr(), NULL), "Failed to create volume texture" );
#else
      D3D9Assert(
         GFXD3DX.D3DXCreateVolumeTexture(
            mD3DDevice,
            width,
            height,
            depth,
            numMipLevels,
            usage,
            d3dTextureFormat,
            pool,
            retTex->get3DTexPtr()
         ), "GFXD3D9TextureManager::_createTexture - failed to create volume texture!"
      );
#endif

      retTex->mTextureSize.set( width, height, depth );
      retTex->mMipLevels = retTex->get3DTex()->GetLevelCount();
      // required for 3D texture support - John Kabus
	  retTex->mFormat = format;
   }
   else
   {
#ifdef TORQUE_OS_XENON
      D3D9Assert( mD3DDevice->CreateTexture(width, height, numMipLevels, usage, d3dTextureFormat, pool, retTex->get2DTexPtr(), NULL), "Failed to create texture" );
      retTex->mMipLevels = retTex->get2DTex()->GetLevelCount();
#else
      // Figure out AA settings for depth and render targets
      D3DMULTISAMPLE_TYPE mstype;
      DWORD mslevel;

      switch (antialiasLevel)
      {
         case 0 :
            mstype = D3DMULTISAMPLE_NONE;
            mslevel = 0;
            break;
         case AA_MATCH_BACKBUFFER :
            mstype = d3d->getMultisampleType();
            mslevel = d3d->getMultisampleLevel();
            break;
         default :
            {
               mstype = D3DMULTISAMPLE_NONMASKABLE;
               mslevel = antialiasLevel;
#ifdef TORQUE_DEBUG
               DWORD MaxSampleQualities;      
               d3d->getD3D()->CheckDeviceMultiSampleType(mAdapterIndex, D3DDEVTYPE_HAL, d3dTextureFormat, FALSE, D3DMULTISAMPLE_NONMASKABLE, &MaxSampleQualities);
               AssertFatal(mslevel < MaxSampleQualities, "Invalid AA level!");
#endif
            }
            break;
      }
     
      bool fastCreate = true;
      // Check for power of 2 textures - this is a problem with FX 5xxx cards
      // with current drivers - 3/2/05
      if( !isPow2(width) || !isPow2(height) )
      {
         fastCreate = false;
      }

      if(retTex->mProfile->isZTarget())
      {
         D3D9Assert(mD3DDevice->CreateDepthStencilSurface(width, height, d3dTextureFormat,
            mstype, mslevel, retTex->mProfile->canDiscard(), retTex->getSurfacePtr(), NULL), "Failed to create Z surface" );

         retTex->mFormat = format; // Assigning format like this should be fine.
      }
      else
      {
         // Try to create the texture directly - should gain us a bit in high
         // performance cases where we know we're creating good stuff and we
         // don't want to bother with D3DX - slow function.
         HRESULT res = D3DERR_INVALIDCALL;
         if( fastCreate )
         {
            res = mD3DDevice->CreateTexture(width, height, numMipLevels, usage, d3dTextureFormat, pool, retTex->get2DTexPtr(), NULL);
         }

         if( !fastCreate || (res != D3D_OK) )
         {
            D3D9Assert(
               GFXD3DX.D3DXCreateTexture(
               mD3DDevice,
               width,
               height,
               numMipLevels,
               usage,
               d3dTextureFormat,
               pool,
               retTex->get2DTexPtr()
               ), "GFXD3D9TextureManager::_createTexture - failed to create texture!"
               );
         }

         // If this is a render target, and it wants AA or wants to match the backbuffer (for example, to share the z)
         // Check the caps though, if we can't stretchrect between textures, use the old RT method.  (Which hopefully means
         // that they can't force AA on us as well.)
         if (retTex->mProfile->isRenderTarget() && mslevel != 0 && (mDeviceCaps.Caps2 && D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES))
         {
            D3D9Assert(mD3DDevice->CreateRenderTarget(width, height, d3dTextureFormat, 
               mstype, mslevel, false, retTex->getSurfacePtr(), NULL),
               "GFXD3D9TextureManager::_createTexture - unable to create render target");
         }

         // All done!
         retTex->mMipLevels = retTex->get2DTex()->GetLevelCount();
      }
#endif

      // Get the actual size of the texture...
      D3DSURFACE_DESC probeDesc;
      ZeroMemory(&probeDesc, sizeof probeDesc);

      if( retTex->get2DTex() != NULL )
         D3D9Assert( retTex->get2DTex()->GetLevelDesc( 0, &probeDesc ), "Failed to get surface description");
      else if( retTex->getSurface() != NULL )
         D3D9Assert( retTex->getSurface()->GetDesc( &probeDesc ), "Failed to get surface description");

      retTex->mTextureSize.set(probeDesc.Width, probeDesc.Height, 0);
      
      int fmt = probeDesc.Format;

#if !defined(TORQUE_OS_XENON)
      GFXREVERSE_LOOKUP( GFXD3D9TextureFormat, GFXFormat, fmt );
      retTex->mFormat = (GFXFormat)fmt;
#else
      retTex->mFormat = format;
#endif
   }
}
SetAssocArray::SetAssocArray(uint32_t _numLines, uint32_t _assoc, ReplPolicy* _rp, HashFamily* _hf) : rp(_rp), hf(_hf), numLines(_numLines), assoc(_assoc)  {
    array = gm_calloc<Address>(numLines);
    numSets = numLines/assoc;
    setMask = numSets - 1;
    assert(isPow2(numSets));
}
Exemple #20
0
void ClipMap::recenter(Point2F center)
{
   bool wantCompleteRefill = false;
   if(mNeedRefill || mForceClipmapPurge || Con::getBoolVariable("$forceFullClipmapPurgeEveryFrame", false))
      wantCompleteRefill = true;

   PROFILE_START(ClipMap_recenter);

   // Reset our budget.
   mMaxTexelUploadPerRecenter = mClipMapSize * mClipMapSize * 2;

   AssertFatal(isPow2(mClipMapSize), 
      "ClipMap::recenter - require pow2 clipmap size!");

   // Clamp the center to the unit square.

   /*
   if(!mTile)
   {
      center.x = mClampF(center.x, 0.f, 1.f);
      center.y = mClampF(center.y, 0.f, 1.f);
   }
   */

   // Ok, we're going to do toroidal updates on each entry of the clipstack
   // (except for the cap, which covers the whole texture), based on this
   // new center point.
   if( !wantCompleteRefill )
   {
      // Calculate the new texel at most detailed level.
      Point2F texelCenterF = center * F32(mClipMapSize) * mLevels[0].mScale;
      Point2I texelCenter((S32)mFloor(texelCenterF.y), (S32)mFloor(texelCenterF.x));
      
      // Update interest region.
      mImageCache->setInterestCenter(texelCenter);
   }

   // Note how many we were at so we can cut off at the right time.
   S32 lastTexelsUpdated = mTexelsUpdated;

   // For each texture...
   for(S32 i=mClipStackDepth-2; i>=0; i--)
   {
      ClipStackEntry &cse = mLevels[i];

      // Calculate new center point for this texture.
      Point2F texelCenterF = center * F32(mClipMapSize) * cse.mScale;

      const S32 texelMin = mClipMapSize/2;
      //const S32 texelMax = S32(F32(mClipMapSize) * cse.mScale) - texelMin;

      Point2I texelTopLeft;

      //if(mTile)
      //{
         texelTopLeft.x = S32(mFloor(texelCenterF.y)) - texelMin;
         texelTopLeft.y = S32(mFloor(texelCenterF.x)) - texelMin;
      //}
      //else
      //{
      //   texelTopLeft.x = mClamp(S32(mFloor(texelCenterF.y)), texelMin, texelMax) - texelMin;
      //   texelTopLeft.y = mClamp(S32(mFloor(texelCenterF.x)), texelMin, texelMax) - texelMin;
      //}

      // Also, prevent very small updates - the RT changes are costly.
      Point2I d = cse.mToroidalOffset - texelTopLeft;
      if(mAbs(d.x) <= 2 && mAbs(d.y) <= 2)
      {
         // Update the center; otherwise we get some weird conditions around
         // edges of the clipmap space.
         cse.mClipCenter = center;
         continue;
      }

      // This + current toroid offset tells us what regions have to be blasted.
      RectI oldData(cse.mToroidalOffset,  Point2I(mClipMapSize, mClipMapSize));
      RectI newData(texelTopLeft,         Point2I(mClipMapSize, mClipMapSize));

      // Update clipstack level.
      cse.mClipCenter      = center;
      cse.mToroidalOffset  = texelTopLeft;

      // If we're refilling, that's all we want; continue with next level.
      if( wantCompleteRefill )
         continue;

      // Make sure we have available data...
      if(!mImageCache->isDataAvailable(getMipLevel(cse.mScale), newData))
		  continue;

      // Alright, determine the set of data we actually need to upload.
      S32   rectCount = 0;
      RectI buffer[8];

      calculateModuloDeltaBounds(oldData, newData, buffer, &rectCount);
      AssertFatal(rectCount < 8, "ClipMap::recenter - got too many rects back!");

      /*if(rectCount)
         Con::printf("    issuing %d updates to clipmap level %d (offset=%dx%d)", 
                        rectCount, i, texelTopLeft.x, texelTopLeft.y); */

      if(rectCount)
      {
         if (!mImageCache->beginRectUpdates(cse))
         {
            mForceClipmapPurge = true;
            return;
         }
         //Con::errorf("layer %x, %d updates", &cse,  rectCount);

         // And GO!
         for(S32 j=0; j<rectCount; j++)
         {
            PROFILE_START(ClipMap_recenter_upload);

            AssertFatal(buffer[j].isValidRect(),"ClipMap::recenter - got invalid rect!");

            // Note the rect, so we can then wrap and let the image cache do its thing.
            RectI srcRegion = buffer[j];
            buffer[j].point.x = srcRegion.point.x % mClipMapSize;
            buffer[j].point.y = srcRegion.point.y % mClipMapSize;

            AssertFatal(newData.contains(srcRegion), 
               "ClipMap::recenter - got update buffer outside of expected new data bounds.");

            mTotalUpdates++;
            mTexelsUpdated += srcRegion.extent.x  * srcRegion.extent.y;

            //Con::printf("updating (%d %d %d %d)",
            //   buffer[j].point.x, buffer[j].point.y, buffer[j].extent.x, buffer[j].extent.y);

            mImageCache->doRectUpdate(getMipLevel(cse.mScale), cse, srcRegion, buffer[j]);

            PROFILE_END();
         }

         mImageCache->finishRectUpdates(cse);
      }

      // Check if we've overrun our budget.
      if((mTexelsUpdated - lastTexelsUpdated) > mMaxTexelUploadPerRecenter)
      {
         //Con::warnf("ClipMap::recenter - exceeded budget for this frame, deferring till next frame.");
         break;
      }

   }

   if( wantCompleteRefill )
   {
      fillWithTextureData();
      mNeedRefill = false;
   }

   PROFILE_END();
}
Exemple #21
0
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (varTypeIsFloating(parentNode->TypeGet()))
    {
        // We can contain a floating point 0.0 constant in a compare instruction
        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                if (childNode->IsIntegralConst(0))
                {
                    // TODO-ARM-Cleanup: not tested yet.
                    NYI_ARM("ARM IsContainableImmed for floating point type");

                    return true;
                }
                break;
        }
    }
    else
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->IsIconHandle() && comp->opts.compReloc)
            return false;

        ssize_t  immVal = childNode->gtIntCon.gtIconVal;
        emitAttr attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            default:
                return false;

            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
                break;
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
                break;
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
                break;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
                break;
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif
        }
    }

    return false;
}
//-----------------------------------------------------------------------------
// innerCreateTexture
//-----------------------------------------------------------------------------
// This just creates the texture, no info is actually loaded to it.  We do that later.
void GFXGLTextureManager::innerCreateTexture( GFXGLTextureObject *retTex,
        U32 height,
        U32 width,
        U32 depth,
        GFXFormat format,
        GFXTextureProfile *profile,
        U32 numMipLevels,
        bool forceMips)
{
    // No 24 bit formats.  They trigger various oddities because hardware (and Apple's drivers apparently...) don't natively support them.
    if(format == GFXFormatR8G8B8)
        format = GFXFormatR8G8B8A8;

    retTex->mFormat = format;
    retTex->mIsZombie = false;
    retTex->mIsNPoT2 = false;

    GLenum binding = ( (height == 1 || width == 1) && ( height != width ) ) ? GL_TEXTURE_1D : ( (depth == 0) ? GL_TEXTURE_2D : GL_TEXTURE_3D );
    if((profile->testFlag(GFXTextureProfile::RenderTarget) || profile->testFlag(GFXTextureProfile::ZTarget)) && (!isPow2(width) || !isPow2(height)) && !depth)
        retTex->mIsNPoT2 = true;
    retTex->mBinding = binding;

    // Bind it
    PRESERVE_TEXTURE(binding);
    glBindTexture(retTex->getBinding(), retTex->getHandle());

    // Create it
    // TODO: Reenable mipmaps on render targets when Apple fixes their drivers
    if(forceMips && !retTex->mIsNPoT2)
    {
        retTex->mMipLevels = numMipLevels > 1 ? numMipLevels : 0;
    }
    else if(profile->testFlag(GFXTextureProfile::NoMipmap) || profile->testFlag(GFXTextureProfile::RenderTarget) || numMipLevels == 1 || retTex->mIsNPoT2)
    {
        retTex->mMipLevels = 1;
    }
    else
    {
        retTex->mMipLevels = numMipLevels;
    }

    if(!retTex->mIsNPoT2)
    {
        if(!isPow2(width))
            width = getNextPow2(width);
        if(!isPow2(height))
            height = getNextPow2(height);
        if(depth && !isPow2(depth))
            depth = getNextPow2(depth);
    }

    AssertFatal(GFXGLTextureInternalFormat[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid internal format");
    AssertFatal(GFXGLTextureFormat[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid format");
    AssertFatal(GFXGLTextureType[format] != GL_ZERO, "GFXGLTextureManager::innerCreateTexture - invalid type");

    //calculate num mipmaps
    if(retTex->mMipLevels == 0)
        retTex->mMipLevels = getMaxMipmaps(width, height, 1);

    glTexParameteri(binding, GL_TEXTURE_MAX_LEVEL, retTex->mMipLevels-1 );

    //If it wasn't for problems on amd drivers this next part could be really simplified and we wouldn't need to go through manually creating our
    //mipmap pyramid and instead just use glGenerateMipmap
    if(isCompressedFormat(format))
    {
        AssertFatal(binding == GL_TEXTURE_2D,
                    "GFXGLTextureManager::innerCreateTexture - Only compressed 2D textures are supported");

        U32 tempWidth = width;
        U32 tempHeight = height;
        U32 size = getCompressedSurfaceSize(format,height,width);
        //Fill compressed images with 0's
        U8 *pTemp = (U8*)dMalloc(sizeof(U8)*size);
        dMemset(pTemp,0,size);

        for(U32 i=0; i< retTex->mMipLevels; i++)
        {
            tempWidth = getMax( U32(1), width >> i );
            tempHeight = getMax( U32(1), height >> i );
            size = getCompressedSurfaceSize(format,width,height,i);
            glCompressedTexImage2D(binding,i,GFXGLTextureInternalFormat[format],tempWidth,tempHeight,0,size,pTemp);
        }

        dFree(pTemp);
    }
    else
    {
        if(binding == GL_TEXTURE_2D)
int main(int argc, char* argv[])
{
	int sum, limit;
	scanf("%d %d", &sum, &limit);
	std::vector<int> lbArr(limit+1);
	std::vector<num> lbEvenArr;
	std::vector<int> set;
	int countElements = 0,
		countLb = 0;
	
	int lastPow = 0;
	for (int i = 2; i <=limit; i+=2){
		if (i % 2 == 1){lbArr[i] = 1;}
		else if (isPow2(i)){ lbArr[i] = i; lastPow = i; }
		else
		{
			lbArr[i] = lbArr[i - lastPow];
			num tmp;
			tmp.x = i;
			tmp.lb = lbArr[i];
			lbEvenArr.push_back(tmp);
			countLb++;
		}
	}
	
	for (int i = pow(2, floor(log2((double)min(sum,limit)))); i>1; i/=2)
	{
		if (sum > i)
		{
			sum -= i;
			set.push_back(i);
			countElements++;
		}
		else break;
	}

	quickSort(lbEvenArr, 0, countLb);

	for (int i = countLb-1; i > 0; i--)
	{
		if (sum > lbEvenArr[i].lb)
		{
			sum -= lbEvenArr[i].lb;
			set.push_back(lbEvenArr[i].x);
			countElements++;
		}
	}

	for (int i = 1; i <= limit; i+=2)
	{
		if (sum > 0)
		{
			sum -= 1;
			set.push_back(i);
			countElements++;
		}else break;
	}

	if (sum == 0)
	{
		printf("%d\n", countElements);
		for (int i = 0; i < countElements; i++)
			printf("%d ", set[i]);
	}
	else printf("%d\n", -1);


	return 0;
}
//------------------------------------------------------------------------
// IsContainableImmed: Is an immediate encodable in-place?
//
// Return Value:
//    True if the immediate can be folded into an instruction,
//    for example small enough and non-relocatable.
//
// TODO-CQ: we can contain a floating point 0.0 constant in a compare instruction
// (vcmp on arm, fcmp on arm64).
//
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
    if (!varTypeIsFloating(parentNode->TypeGet()))
    {
        // Make sure we have an actual immediate
        if (!childNode->IsCnsIntOrI())
            return false;
        if (childNode->gtIntCon.ImmedValNeedsReloc(comp))
            return false;

        // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
        target_ssize_t immVal = (target_ssize_t)childNode->gtIntCon.gtIconVal;
        emitAttr       attr   = emitActualTypeSize(childNode->TypeGet());
        emitAttr       size   = EA_SIZE(attr);
#ifdef _TARGET_ARM_
        insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
#endif

        switch (parentNode->OperGet())
        {
            case GT_ADD:
            case GT_SUB:
#ifdef _TARGET_ARM64_
            case GT_CMPXCHG:
            case GT_LOCKADD:
            case GT_XADD:
                return comp->compSupports(InstructionSet_Atomics) ? false
                                                                  : emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
                return emitter::emitIns_valid_imm_for_add(immVal, flags);
#endif
                break;

#ifdef _TARGET_ARM64_
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
                return emitter::emitIns_valid_imm_for_cmp(immVal, size);
            case GT_AND:
            case GT_OR:
            case GT_XOR:
            case GT_TEST_EQ:
            case GT_TEST_NE:
                return emitter::emitIns_valid_imm_for_alu(immVal, size);
            case GT_JCMP:
                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                return true;
#elif defined(_TARGET_ARM_)
            case GT_EQ:
            case GT_NE:
            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:
            case GT_CMP:
            case GT_AND:
            case GT_OR:
            case GT_XOR:
                return emitter::emitIns_valid_imm_for_alu(immVal);
#endif // _TARGET_ARM_

#ifdef _TARGET_ARM64_
            case GT_STORE_LCL_FLD:
            case GT_STORE_LCL_VAR:
                if (immVal == 0)
                    return true;
                break;
#endif

            default:
                break;
        }
    }

    return false;
}
void TerrainFile::import(  const GBitmap &heightMap, 
                           F32 heightScale,
                           const Vector<U8> &layerMap, 
                           const Vector<String> &materials,
                           bool flipYAxis )
{
   AssertFatal( heightMap.getWidth() == heightMap.getHeight(), "TerrainFile::import - Height map is not square!" );
   AssertFatal( isPow2( heightMap.getWidth() ), "TerrainFile::import - Height map is not power of two!" );

   const U32 newSize = heightMap.getWidth();
   if ( newSize != mSize )
   {
      mHeightMap.setSize( newSize * newSize );
      mHeightMap.compact();
      mSize = newSize;
   }

   // Convert the height map to heights.
   U16 *oBits = mHeightMap.address();
   if ( heightMap.getFormat() == GFXFormatR5G6B5 )
   {
      const F32 toFixedPoint = ( 1.0f / (F32)U16_MAX ) * floatToFixed( heightScale );
      const U16 *iBits = (const U16*)heightMap.getBits();
      if ( flipYAxis )
      {
         for ( U32 i = 0; i < mSize * mSize; i++ )
         {
            U16 height = convertBEndianToHost( *iBits );
            *oBits = (U16)mCeil( (F32)height * toFixedPoint );
            ++oBits;
            ++iBits;
         }
      }
      else
      {
         for(S32 y = mSize - 1; y >= 0; y--) {
            for(U32 x = 0; x < mSize; x++) {
               U16 height = convertBEndianToHost( *iBits );
               mHeightMap[x + y * mSize] = (U16)mCeil( (F32)height * toFixedPoint );
               ++iBits;
            }
         }
      }
   }
   else
   {
      const F32 toFixedPoint = ( 1.0f / (F32)U8_MAX ) * floatToFixed( heightScale );
      const U8 *iBits = heightMap.getBits();
      if ( flipYAxis )
      {
         for ( U32 i = 0; i < mSize * mSize; i++ )
         {
            *oBits = (U16)mCeil( ((F32)*iBits) * toFixedPoint );
            ++oBits;
            iBits += heightMap.getBytesPerPixel();
         }
      }
      else
      {
         for(S32 y = mSize - 1; y >= 0; y--) {
            for(U32 x = 0; x < mSize; x++) {
               mHeightMap[x + y * mSize] = (U16)mCeil( ((F32)*iBits) * toFixedPoint );
               iBits += heightMap.getBytesPerPixel();
            }
         }
      }
   }

   // Copy over the layer map.
   AssertFatal( layerMap.size() == mHeightMap.size(), "TerrainFile::import - Layer map is the wrong size!" );
   mLayerMap = layerMap;
   mLayerMap.compact();

   // Resolve the materials.
   _resolveMaterials( materials );

   // Rebuild the collision grid map.
   _buildGridMap();
}
Exemple #26
0
BBTexture* BBTexture_create (const char* name, GLsizei width, GLsizei height,
							 GLenum type, GLenum channels, const void** data)
{
	BBTexture* texture;
	BB_ASSERT(data);

	if (!(isPow2(width) && isPow2(height)))
	{
#ifdef BB_DEVEL
		printf("Error: Invalid texture size.\n");
#endif
		return NULL;
	}

	texture = malloc(sizeof(BBTexture));
	if (!texture)
		return NULL;

	texture->name = NULL;
	texture->width = width;
	texture->height = height;
	texture->type = type;
	texture->handle = 0;
	texture->next = NULL;

	texture->name = strdup(name);
	if (!texture->name)
	{
		BBTexture_destroy(texture);
		return NULL;
	}

	switch (type)
	{
	case GL_TEXTURE_2D:
		glGenTextures(1, &texture->handle);
		glBindTexture(GL_TEXTURE_2D, texture->handle);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
//		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
		{
//			GLubyte* origBitmap = (GLubyte*)data[0];
//			GLubyte* scaledBitmap = NULL;
			GLuint level = 0;
			GLuint w = texture->width;
			GLuint h = texture->height;
			GLboolean end = GL_FALSE;
			while (!end)
			{
//				if (level > 0)
//				{
//					if (!scaledBitmap)
//					{
//						GLsizei bpp = 4;
//						if (channels == GL_RGB)
//							bpp = 3;
//						scaledBitmap = (GLubyte*)malloc(w * h * bpp * sizeof(GLubyte));
//						if (!scaledBitmap)
//							break;
//					}
//					generateMipMap(origBitmap, texture->width, texture->height, channels, level, scaledBitmap);
//					glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, scaledBitmap);
//				}
//				else
//				{
//					glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, origBitmap);
//				}

				BB_ASSERT(level < MAX_MIPMAPS && data[level]);
				glTexImage2D(GL_TEXTURE_2D, level, channels, w, h, 0, channels, GL_UNSIGNED_BYTE, data[level]);

				w >>= 1;
				h >>= 1;
				if (w == 0 && h == 0)
				{
					end = GL_TRUE;
				}
				else
				{
					if (w < 1)
						w = 1;
					if (h < 1)
						h = 1;
					level++;
				}
			}
//			free(scaledBitmap);
		}
		break;
	case GL_TEXTURE_CUBE_MAP:
		glGenTextures(1, &texture->handle);
		glBindTexture(GL_TEXTURE_CUBE_MAP, texture->handle);
		glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[0]);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[1]);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[2]);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[3]);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[4]);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, channels, texture->width, texture->height, 0, channels, GL_UNSIGNED_BYTE, data[5]);
		break;
	default:
		BBTexture_destroy(texture);
		return NULL;
	}

	return texture;
}
// Internal function - Called to load up a texture map - file is known to exist
Texture *textureFromBitmap(bitmap *loadbm, Texture *tex)
{	bitmap *swizzleBm, *scaleBm, *resizeCanvasSrc;
	bool mustSwizzle = false;
	bool mustScale = false;

	// Step 1: Check for a need to swizzle (if the video card doesn't support this texture mode)
	uintf dataType = loadbm->flags & (bitmap_DataTypeMask | bitmap_DataInfoMask);
	// ### This code is not yet complete, must leave this block with 'swizzleBM' pointing to swizzled bitmap data

	// If Video card only handles 'Power-of-2' texture dimensions
	bool resizeCanvas=false;
	// if (GLESWarnings) //!(videoFeatures & videodriver_nonP2Tex))
	{	uintf newCanvasWidth = loadbm->width;
		uintf newCanvasHeight = loadbm->height;

		if (!isPow2(loadbm->width))
		{	resizeCanvas=true;
			newCanvasWidth=nextPow2(loadbm->width);
		}
		if (!isPow2(loadbm->height))
		{	resizeCanvas=true;
			newCanvasHeight=nextPow2(loadbm->height);
		}
		if (resizeCanvas)
		{	resizeCanvasSrc = loadbm;
			loadbm = newbitmap("resizeCanvasP2Tex",newCanvasWidth,newCanvasHeight,bitmap_ARGB32);
			uintf x,y;
			uint32 *src32 = (uint32 *)resizeCanvasSrc->pixel;
			uint32 *dst32 = (uint32 *)loadbm->pixel;
			for (y=0; y<resizeCanvasSrc->height; y++)
			{	uint32 *src = &src32[y*(resizeCanvasSrc->width)];
				uint32 *dst = &dst32[y*newCanvasWidth];
				for (x=0; x<resizeCanvasSrc->width; x++)
					*dst++ = *src++;
				for (;x<newCanvasWidth; x++)
					*dst++ = 0;
			}
			for (;y<newCanvasHeight; y++)
			{	uint32 *dst = &dst32[y*newCanvasWidth];
				for (x=0; x<newCanvasWidth; x++)
					*dst++=0;
			}
		}

/*		// Work out X scale
		uintf size = 1;
		while (size<=maxtexwidth)
		{	if (newx<=size) break;
			size <<=1;
		}
		if (size>maxtexwidth) size = maxtexwidth;
		newx = size;

		// Work out Y scale
		size = 1;
		while (size<=maxtexheight)
		{	if (newy<=size) break;
			size <<=1;
		}
		if (size>maxtexheight) size = maxtexheight;
		newy = size;
*/
	}

	// Step 2: Check if we need to resize - this may change swizzle mode
	uintf newx = loadbm->width;
	uintf newy = loadbm->height;
	if (newx>maxtexwidth)
		newx = maxtexwidth;
	if (newy>maxtexheight)
		newy = maxtexheight;

	if (newx!=loadbm->width || newy!=loadbm->height)
	{	dataType = bitmap_DataTypeRGB | bitmap_RGB_32bit;
		mustSwizzle = true;
		mustScale = true;
	}

	if (mustSwizzle)
	{	dataType |= loadbm->flags & bitmap_AlphaMask;
		swizzleBm = SwizzleBitmap(loadbm, dataType);
	}	else
		swizzleBm = loadbm;

	if (mustScale)
	{	// Bitmap needs to be resized before hardware will accept it
		scaleBm = scalebitmap(swizzleBm,newx,newy);
	}	else
		scaleBm = swizzleBm;

	// If we don't have a texture provided, create a new one
	if (!tex)
		tex = newTexture(NULL, 0, 0);
	downloadbitmaptex(tex, scaleBm, 0);
	estimatedtexmemused += tex->texmemused;
	if (mustScale)
		deleteBitmap(scaleBm);
	if (mustSwizzle)
		deleteBitmap(swizzleBm);
	if (resizeCanvas)
	{	deleteBitmap(loadbm);
		loadbm = resizeCanvasSrc;
		tex->flags |= texture_canvasSize;
		tex->UVscale.x = (float)loadbm->width / (float)tex->width;
		tex->UVscale.y = (float)loadbm->height/ (float)tex->height;
	}
	return tex;
}
Exemple #28
0
/* Isotropic/anisotropic EWA mip-map texture map class based on PBRT */
MIPMap::MIPMap(int width, int height, Spectrum *pixels, 
	EFilterType filterType, EWrapMode wrapMode, Float maxAnisotropy) 
		: m_width(width), m_height(height), m_filterType(filterType), 
		  m_wrapMode(wrapMode), m_maxAnisotropy(maxAnisotropy) {
	Spectrum *texture = pixels;

	if (filterType != ENone && (!isPow2(width) || !isPow2(height))) {
		m_width = (int) roundToPow2((uint32_t) width);
		m_height = (int) roundToPow2((uint32_t) height);

		/* The texture needs to be up-sampled */
		Spectrum *texture1 = new Spectrum[m_width*height];

		/* Re-sample into the X direction */
		ResampleWeight *weights = resampleWeights(width, m_width);
		for (int y=0; y<height; y++) {
			for (int x=0; x<m_width; x++) {
				texture1[x+m_width*y] = Spectrum(0.0f);
				for (int j=0; j<4; j++) {
					int pos = weights[x].firstTexel + j;
					if (pos < 0 || pos >= height) {
						if (wrapMode == ERepeat) 
							pos = modulo(pos, width);
						else if (wrapMode == EClamp)
							pos = clamp(pos, 0, width-1);
					}
					if (pos >= 0 && pos < width)
						texture1[x+m_width*y] += pixels[pos+y*width] 
							* weights[x].weight[j];
				}
			}
		}
		delete[] weights;
		delete[] pixels;

		/* Re-sample into the Y direction */
		texture = new Spectrum[m_width*m_height];
		weights = resampleWeights(height, m_height);
		memset(texture, 0, sizeof(Spectrum)*m_width*m_height);
		for (int x=0; x<m_width; x++) {
			for (int y=0; y<m_height; y++) {
				for (int j=0; j<4; j++) {
					int pos = weights[y].firstTexel + j;
					if (pos < 0 || pos >= height) {
						if (wrapMode == ERepeat) 
							pos = modulo(pos, height);
						else if (wrapMode == EClamp)
							pos = clamp(pos, 0, height-1);
					}
					if (pos >= 0 && pos < height)
						texture[x+m_width*y] += texture1[x+pos*m_width]
							* weights[y].weight[j];
				}
			}
		}
		for (int y=0; y<m_height; y++)
			for (int x=0; x<m_width; x++)
			texture[x+m_width*y].clampNegative();
		delete[] weights;
		delete[] texture1;
	}

	if (m_filterType != ENone)
		m_levels = 1 + log2i((uint32_t) std::max(width, height));
	else
		m_levels = 1;

	m_pyramid = new Spectrum*[m_levels];
	m_pyramid[0] = texture;
	m_levelWidth = new int[m_levels];
	m_levelHeight= new int[m_levels];
	m_levelWidth[0] = m_width;
	m_levelHeight[0] = m_height;

	/* Generate the mip-map hierarchy */
	for (int i=1; i<m_levels; i++) {
		m_levelWidth[i]  = std::max(1, m_levelWidth[i-1]/2);
		m_levelHeight[i] = std::max(1, m_levelHeight[i-1]/2);
		m_pyramid[i] = new Spectrum[m_levelWidth[i] * m_levelHeight[i]];
		for (int y = 0; y < m_levelHeight[i]; y++) {
			for (int x = 0; x < m_levelWidth[i]; x++) {
				m_pyramid[i][x+y*m_levelWidth[i]] = (
					getTexel(i-1, 2*x, 2*y) + 
					getTexel(i-1, 2*x+1, 2*y) + 
					getTexel(i-1, 2*x, 2*y+1) + 
					getTexel(i-1, 2*x+1, 2*y+1)) * 0.25f;
			}
		}
	}

	if (m_filterType == EEWA) {
		m_weightLut = static_cast<Float *>(allocAligned(sizeof(Float)*MIPMAP_LUTSIZE));
		for (int i=0; i<MIPMAP_LUTSIZE; ++i) {
			Float pos = (Float) i / (Float) (MIPMAP_LUTSIZE-1);
			m_weightLut[i] = std::exp(-2.0f * pos) - std::exp(-2.0f);
		}
	}
}
Exemple #29
0
void GFont::makeFont(int charsetSize, const String& infileBase, String outfile) {

    debugAssert(FileSystem::exists(infileBase + ".tga"));
    debugAssert(FileSystem::exists(infileBase + ".ini"));
    debugAssert(charsetSize == 128 || charsetSize == 256);

    if (outfile == "") {
        outfile = infileBase + ".fnt";
    }

    TextInput    ini(infileBase + ".ini");
    BinaryOutput out(outfile, G3D_LITTLE_ENDIAN);

    ini.readSymbol("[");
    ini.readSymbol("Char");
    ini.readSymbol("Widths");
    ini.readSymbol("]");

    // Version
    out.writeInt32(2);

    // Number of characters
    out.writeInt32(charsetSize);

    // Character widths
    for (int i = 0; i < charsetSize; ++i) {
        int n = (int)ini.readNumber();
        (void)n;
        debugAssert(n == i);
        ini.readSymbol("=");
        int cw = (int)ini.readNumber();
        out.writeInt16(cw);
    }

    // Load provided source image
    shared_ptr<Image> image = Image::fromFile(infileBase + ".tga");
    debugAssert(isPow2(image->width()));
    debugAssert(isPow2(image->height()));
    image->convertToR8();

    // Convert to image buffer so can access bytes directly
    shared_ptr<PixelTransferBuffer> imageBuffer = image->toPixelTransferBuffer();
    debugAssert(imageBuffer->format() == ImageFormat::R8());

    // Autodetect baseline from capital E
    const uint8* p        = static_cast<const uint8*>(imageBuffer->mapRead());
    {
        // Size of a character, in texels
        int          w        = imageBuffer->width() / 16;

        int          x0       = ('E' % 16) * w;
        int          y0       = ('E' / 16) * w;
        
        int          baseline = w * 2 / 3;

        bool         done     = false;
    
        // Search up from the bottom for the first pixel
        for (int y = y0 + w - 1; (y >= y0) && ! done; --y) {
            for (int x = x0; (x < x0 + w) && ! done; ++x) {
                if (p[x + y * w * 16] != 0) {
                    baseline = y - y0 + 1;
                    done = true;
                }
            }
        }

        out.writeUInt16(baseline);
    }

    // Texture width
    out.writeUInt16(imageBuffer->width());
    out.writeBytes(p, square(imageBuffer->width()) * 256 / charsetSize);
    imageBuffer->unmap();
 
    out.compress();
    out.commit(false);
}
T profileReduce(ReduceType datatype,
                  cl_int  n, 
                  int  numThreads,
                  int  numBlocks,
                  int  maxThreads,
                  int  maxBlocks,
                  int  whichKernel, 
                  int  testIterations,
                  bool cpuFinalReduction,
                  int  cpuFinalThreshold,
                  double* dTotalTime,
                  T* h_odata,
                  cl_mem d_idata, 
                  cl_mem d_odata)
{


    T gpu_result = 0;
    bool needReadBack = true;
    cl_kernel finalReductionKernel[10];
    int finalReductionIterations=0;

    //shrLog("Profile Kernel %d\n", whichKernel);

    cl_kernel reductionKernel = getReductionKernel(datatype, whichKernel, numThreads, isPow2(n) );
    clSetKernelArg(reductionKernel, 0, sizeof(cl_mem), (void *) &d_idata);
    clSetKernelArg(reductionKernel, 1, sizeof(cl_mem), (void *) &d_odata);
    clSetKernelArg(reductionKernel, 2, sizeof(cl_int), &n);
    clSetKernelArg(reductionKernel, 3, sizeof(T) * numThreads, NULL);

    if( !cpuFinalReduction ) {
        int s=numBlocks;
        int threads = 0, blocks = 0;
        int kernel = (whichKernel == 6) ? 5 : whichKernel;
        
        while(s > cpuFinalThreshold) 
        {
            getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads);

            finalReductionKernel[finalReductionIterations] = getReductionKernel(datatype, kernel, threads, isPow2(s) );
            clSetKernelArg(finalReductionKernel[finalReductionIterations], 0, sizeof(cl_mem), (void *) &d_odata);
            clSetKernelArg(finalReductionKernel[finalReductionIterations], 1, sizeof(cl_mem), (void *) &d_odata);
            clSetKernelArg(finalReductionKernel[finalReductionIterations], 2, sizeof(cl_int), &n);
            clSetKernelArg(finalReductionKernel[finalReductionIterations], 3, sizeof(T) * numThreads, NULL);
            
            if (kernel < 3)
                s = (s + threads - 1) / threads;
            else
                s = (s + (threads*2-1)) / (threads*2);

            finalReductionIterations++;
        }
    }
    
    size_t globalWorkSize[1];
    size_t localWorkSize[1];

    for (int i = 0; i < testIterations; ++i)
    {
        gpu_result = 0;

        clFinish(cqCommandQueue);
        if(i>0) shrDeltaT(1);

        // execute the kernel
        globalWorkSize[0] = numBlocks * numThreads;
        localWorkSize[0] = numThreads;
	
        ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue,reductionKernel, 1, 0, globalWorkSize, localWorkSize,
                                          0, NULL, NULL);               

        // check if kernel execution generated an error        
        oclCheckError(ciErrNum, CL_SUCCESS);

        if (cpuFinalReduction)
        {
            // sum partial sums from each block on CPU        
            // copy result from device to host
            clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, numBlocks * sizeof(T), 
                                h_odata, 0, NULL, NULL);

            for(int i=0; i<numBlocks; i++) 
            {
                gpu_result += h_odata[i];
            }

            needReadBack = false;
        }
        else
        {
            // sum partial block sums on GPU
            int s=numBlocks;
            int kernel = (whichKernel == 6) ? 5 : whichKernel;
            int it = 0;
            

            while(s > cpuFinalThreshold) 
            {
                int threads = 0, blocks = 0;
                getNumBlocksAndThreads(kernel, s, maxBlocks, maxThreads, blocks, threads);

                globalWorkSize[0] = threads * blocks;
                localWorkSize[0] = threads;
                
                ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue, finalReductionKernel[it], 1, 0,
                                                  globalWorkSize, localWorkSize, 0, NULL, NULL);               
                oclCheckError(ciErrNum, CL_SUCCESS);
                
                if (kernel < 3)
                    s = (s + threads - 1) / threads;
                else
                    s = (s + (threads*2-1)) / (threads*2);

                it++;
            }

            if (s > 1)
            {
                // copy result from device to host
                clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, s * sizeof(T), 
                                    h_odata, 0, NULL, NULL);

                for(int i=0; i < s; i++) 
                {
                    gpu_result += h_odata[i];
                }

                needReadBack = false;
            }
        }

        clFinish(cqCommandQueue);
        if(i>0) *dTotalTime += shrDeltaT(1); 
    }

    if (needReadBack)
    {
        // copy final sum from device to host
        clEnqueueReadBuffer(cqCommandQueue, d_odata, CL_TRUE, 0, sizeof(T), 
                            &gpu_result, 0, NULL, NULL);
    }

    // Release the kernels
    clReleaseKernel(reductionKernel);
    if( !cpuFinalReduction ) {
        for(int it=0; it<finalReductionIterations; ++it) {
            clReleaseKernel(finalReductionKernel[it]);
        }
        
    }

    return gpu_result;
}