virtual void copy(DataSpace<DIM3> &devCurrentSize)
        {

            cudaMemcpy3DParms params;

            //  assert(this->source->getDataSpace().productOfComponents() <= this->destination->getDataSpace().productOfComponents());

            params.srcArray = NULL;
            params.srcPos = make_cudaPos(
                                         this->source->getOffset()[0] * sizeof (TYPE),
                                         this->source->getOffset()[1],
                                         this->source->getOffset()[2]);
            params.srcPtr = this->source->getCudaPitched();

            params.dstArray = NULL;
            params.dstPos = make_cudaPos(
                                         this->destination->getOffset()[0] * sizeof (TYPE),
                                         this->destination->getOffset()[1],
                                         this->destination->getOffset()[2]);
            ;
            params.dstPtr = this->destination->getCudaPitched();

            params.extent = make_cudaExtent(
                                            devCurrentSize[0] * sizeof (TYPE),
                                            devCurrentSize[1],
                                            devCurrentSize[2]);
            params.kind = cudaMemcpyDeviceToDevice;
            CUDA_CHECK(cudaMemcpy3DAsync(&params, this->getCudaStream()));
        }
Exemple #2
0
 void operator()(Type* dest, const math::Size_t<2u> pitchDest, 
                 Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size,
                 flags::Memcopy::Direction direction)
 {
         const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost,
                                  cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice};
                                  
         cudaPitchedPtr pitchedPtrDest;
         pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest;
         pitchedPtrDest.xsize = size.x(); pitchedPtrDest.ysize = size.y();
         cudaPitchedPtr pitchedPtrSource;
         pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source;
         pitchedPtrSource.xsize = size.x(); pitchedPtrSource.ysize = size.y();
         
         cudaMemcpy3DParms params;
         params.srcArray = NULL;
         params.srcPos = make_cudaPos(0,0,0);
         params.srcPtr = pitchedPtrSource;
         params.dstArray = NULL;
         params.dstPos = make_cudaPos(0,0,0);
         params.dstPtr = pitchedPtrDest;
         params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z());
         params.kind = kind[direction];
         CUDA_CHECK_NO_EXCEP(cudaMemcpy3D(&params));
 }                    
Exemple #3
0
    //! @brief Copy the ND-array device array to host array.
    //! You must allocate the array with the appropriate size.
    __host__
    inline void copy_to_host(T *host_data) const
    {
      if (N == 2)
      {
        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy2D(
          host_data, _sizes[0] * sizeof(T), _data, _pitch, _sizes[0] * sizeof(T), _sizes[1],
          cudaMemcpyDeviceToHost));
      }
      else if (N == 3)
      {
        cudaMemcpy3DParms params = { 0 };

        params.srcPtr = make_cudaPitchedPtr(reinterpret_cast<void *>(_data),
                                            _pitch, _sizes[0], _sizes[1]);
        params.dstPtr = make_cudaPitchedPtr(host_data, _sizes[0] * sizeof(T),
                                            _sizes[0], _sizes[1]);
        params.extent = make_cudaExtent(_sizes[0]*sizeof(T), _sizes[1],
                                        _sizes[2]);
        params.kind = cudaMemcpyDeviceToHost;

        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy3D(&params));
      }
      else
        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy(host_data, _data, sizeof(T) * size(),
                                         cudaMemcpyDeviceToHost));
    }
        virtual void copy(DataSpace<DIM3> &hostCurrentSize)
        {
            cudaPitchedPtr hostPtr;
            hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE);
            hostPtr.ptr = this->host->getBasePointer();
            hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE);
            hostPtr.ysize = this->host->getDataSpace()[1];

            cudaMemcpy3DParms params;
            params.dstArray = NULL;
            params.dstPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE),
                                         this->device->getOffset()[1],
                                         this->device->getOffset()[2]);
            params.dstPtr = this->device->getCudaPitched();

            params.srcArray = NULL;
            params.srcPos = make_cudaPos(0, 0, 0);
            params.srcPtr = hostPtr;

            params.extent = make_cudaExtent(
                                            hostCurrentSize[0] * sizeof (TYPE),
                                            hostCurrentSize[1],
                                            hostCurrentSize[2]);
            params.kind = cudaMemcpyHostToDevice;

            CUDA_CHECK(cudaMemcpy3DAsync(&params, this->getCudaStream()));
        }
bool ControlCubeCache::_readElement(NodeLinkedList<index_node_t> * element)
{
	#ifndef NDEBUG
	if ((int)element->element > _maxNumCubes) 
	{
		std::cerr<<"Control Cube CPU Cache, try to write outside reserved memory"<<std::endl;
		throw;
	}
	#endif
	index_node_t idCube = element->id;
	float * cube = (_memory + element->element*_sizeElement);

	if (!checkCubeInside(element->id))
	{
		if (cudaSuccess != cudaMemset((void*)cube, 0, _sizeElement*sizeof(float)))
		{
			std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl;
			LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<_sizeElement<<std::endl;
			throw;
		}
		return true;
	}

	index_node_t idCubeCPU = idCube >> 3*(_levelCube - _cpuCache->getCubeLevel()); 
	float * pCube = _cpuCache->getAndBlockElement(idCubeCPU);

	if (pCube != 0)
	{
		vmml::vector<3, int> coord = getMinBoxIndex2(idCube, _levelCube, _nLevels);
		vmml::vector<3, int> coordC = getMinBoxIndex2(idCubeCPU, _cpuCache->getCubeLevel(), _nLevels);
		coord -= coordC;
		vmml::vector<3, int> realDimCPU = _cpuCache->getRealCubeDim();

		cudaMemcpy3DParms myParms = {0};
		myParms.srcPtr = make_cudaPitchedPtr((void*)pCube, realDimCPU.z()*sizeof(float), realDimCPU.x(), realDimCPU.y()); 
		//myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _realcubeDim.z()*sizeof(float), _realcubeDim.x(), _realcubeDim.y()); 
		myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _dimCube*sizeof(float), _dimCube, _dimCube); 
		myParms.extent = make_cudaExtent(_dimCube*sizeof(float), _dimCube, _dimCube);
		myParms.dstPos = make_cudaPos(0,0,0);
		myParms.srcPos = make_cudaPos(coord.z()*sizeof(float), coord.y(), coord.x());
		myParms.kind = cudaMemcpyHostToDevice;

		if (cudaSuccess != cudaMemcpy3DAsync(&myParms, _stream) || cudaSuccess != cudaStreamSynchronize(_stream))
		{
			std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl;
			LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<pCube<<" "<<_sizeElement<<std::endl;
			throw;
		}

		_cpuCache->unlockElement(idCubeCPU);
		return true;
	}
	else
	{
		return false;
	}
}
inline cudaArray* MallocArray3D< uchar4 >( VolumeDescription volumeDescription )
{
    cudaChannelFormatDesc channelDesc  = cudaCreateChannelDesc( 8, 8, 8, 8, cudaChannelFormatKindUnsigned ); 
    cudaExtent            volumeExtent = make_cudaExtent( volumeDescription.numVoxels.x, volumeDescription.numVoxels.y, volumeDescription.numVoxels.z );

    cudaArray* cuArray; 
    MOJO_CUDA_SAFE( cudaMalloc3DArray( &cuArray, &channelDesc, volumeExtent ) ); 

    return cuArray;
}
Exemple #7
0
void SingleParticle2dx::Methods::CUDAProjectionMethod::prepareForProjections(SingleParticle2dx::DataStructures::ParticleContainer& cont)
{
	cudaSetDevice(getMyGPU());
	cudaStreamCreate(&m_stream);
	
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
	cudaExtent VS = make_cudaExtent(m_size, m_size, m_size);
	
	if( m_alloc_done == false )
	{
		cudaMalloc3DArray(&m_cuArray, &channelDesc, VS);
	}
		
	SingleParticle2dx::real_array3d_type real_data( boost::extents[m_size][m_size][m_size] );
	m_context->getRealSpaceData(real_data);
	unsigned int size = m_size*m_size*m_size*sizeof(float);
	
	if( m_alloc_done == false )
	{
		res_data_h = (float*)malloc(m_size*m_size*sizeof(float));
		cudaMalloc((void**)&res_data_d, m_size*m_size*sizeof(float));
		m_alloc_done = true;
	}
	
	cudaMemcpy3DParms copyParams = {0};
	copyParams.srcPtr = make_cudaPitchedPtr((void*)real_data.origin(), VS.width*sizeof(float), VS.width, VS.height);
	copyParams.dstArray = m_cuArray;
	copyParams.extent = VS;
	copyParams.kind = cudaMemcpyHostToDevice;
	
//	cudaMemcpy3D(&copyParams);
	cudaMemcpy3DAsync(&copyParams, m_stream);
		
	struct cudaResourceDesc resDesc;
	memset(&resDesc, 0, sizeof(resDesc));
	resDesc.resType = cudaResourceTypeArray;
	resDesc.res.array.array = m_cuArray;
	
	struct cudaTextureDesc texDesc;
	memset(&texDesc, 0, sizeof(texDesc));
	texDesc.addressMode[0]   = cudaAddressModeClamp;
	texDesc.addressMode[1]   = cudaAddressModeClamp;
	texDesc.addressMode[2]   = cudaAddressModeClamp;
	texDesc.filterMode       = cudaFilterModeLinear;
	texDesc.readMode         = cudaReadModeElementType;
	texDesc.normalizedCoords = 0;

	if(m_alloc_done == true)
	{
		cudaDestroyTextureObject(m_texObj);
	}

	m_texObj = 0;
	cudaCreateTextureObject(&m_texObj, &resDesc, &texDesc, NULL);
}
Exemple #8
0
void cpyD2H() {
    // D2H“]‘—
    cudaMemcpy3DParms parms = { 0 };
    parms.srcPos = make_cudaPos(0, 0, 0);
    parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitch.x, buf.pitch.x, buf.pitch.y);
    parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
    parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
    parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitch.x, buf.pitch.y, buf.pitch.z);
    parms.kind = cudaMemcpyDeviceToHost;
    cudaMemcpy3D(&parms);
}
Exemple #9
0
    void Buffer3D::fill( int value, dp::math::Vec3ui const& offset, dp::math::Vec3ui const& extent )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( offset[i] + ( extent[i] ? extent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      DP_ASSERT( ( offset[0] == 0 ) && ( offset[1] == 0 ) && ( offset[2] == 0 ) );
      CUDA_VERIFY( cudaMemset3D( m_pitchedPtr, value, make_cudaExtent( m_elementSize * ( extent[0] ? extent[0] : m_extent[0] ), extent[1] ? extent[1] : m_extent[1], extent[2] ? extent[2] : m_extent[2] ) ) );
    }
TEST(Malloc3DArray, NegativeChannels) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = 8;
    dsc.w = -8;
    dsc.f = cudaChannelFormatKindSigned;

    cudaError_t ret;

    ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0);
    EXPECT_EQ(cudaErrorInvalidChannelDescriptor, ret);
}
void CudaImagePyramidHost::copyFromHost(int width, int height, const void* source, int layer)
{
    assert(isInitialized());
    assert(_textureType == cudaTextureType2DLayered);

    cudaMemcpy3DParms myParms = {0};
    myParms.srcPtr = make_cudaPitchedPtr((void*)source,width*_typeSize,width,height);
    myParms.srcPos = make_cudaPos(0,0,0);
    myParms.dstArray = _storage;
    myParms.dstPos = make_cudaPos(0,0,layer);
    myParms.extent = make_cudaExtent(width,height,1);
    myParms.kind = cudaMemcpyHostToDevice;
    cudaMemcpy3D(&myParms);

    checkCUDAError("Memcpy error", _name);
}
Exemple #12
0
void MemManager::cpyD2H(Task task) {
    cudaStream_t stream = task.stream;
    for (auto vdm : vdms) {
        VDMBuffer buf = vdm.bufs[task.n];

        // D2H転送
        cudaMemcpy3DParms parms = { 0 };
        parms.srcPos = make_cudaPos(0, 0, 0);
        parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy());
        parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
        parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
        parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz());
        parms.kind = cudaMemcpyDeviceToHost;
        cudaMemcpy3DAsync(&parms, stream);
    }
}
TEST(Malloc3DArray, Attributes) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    cudaError_t ret;

    ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaPointerAttributes attr;
    ret = cudaPointerGetAttributes(&attr, ary);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
}
Exemple #14
0
    void Buffer3D::setData( void const* srcData, dp::math::Vec3ui const& srcOffset, dp::math::Vec3ui const& srcStride, dp::math::Vec3ui const& srcExtent, dp::math::Vec3ui const& dstOffset )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( srcOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
        DP_ASSERT( dstOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      cudaMemcpy3DParms parms = { 0 };
      parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] );
      parms.srcPtr = make_cudaPitchedPtr( const_cast<void *>(srcData), m_elementSize * ( srcStride[0] ? srcStride[0] : m_extent[0] ), srcStride[0] ? srcStride[0] : m_extent[0], srcStride[1] ? srcStride[1] : m_extent[1] );
      parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] );
      parms.dstPtr = m_pitchedPtr;
      parms.extent = make_cudaExtent( m_elementSize * ( srcExtent[0] ? srcExtent[0] : m_extent[0] ), srcExtent[1] ? srcExtent[1] : m_extent[1], srcExtent[2] ? srcExtent[2] : m_extent[2] );
      parms.kind = cudaMemcpyHostToDevice;
      CUDA_VERIFY( cudaMemcpy3D( &parms ) );
    }
Exemple #15
0
void MemManager::cpyH2D(Task task) {
    cudaStream_t stream = task.stream;
    for (auto vdm : vdms) {
        VDMBuffer buf = vdm.bufs[task.n];

        // H2D転送
        cudaMemcpy3DParms parms = { 0 };
        parms.dstPos = make_cudaPos(0, 0, 0);
        parms.dstPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy());
        parms.srcPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
        parms.srcPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
        parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz());
        parms.kind = cudaMemcpyHostToDevice;
        cudaMemcpy3DAsync(&parms, stream);
    }

    cudaMemcpyAsync(confsetDevPtr, confsetHosPtrs[task.n], sizeof(MKConf)*nVdm, cudaMemcpyHostToDevice, stream);
}
Exemple #16
0
    void Buffer3D::getData( dp::cuda::BufferHostSharedPtr const& dstBuffer, dp::math::Vec3ui const& dstOffset, dp::math::Vec3ui const& dstStride, dp::math::Vec3ui const& dstExtent
                          , dp::math::Vec3ui const& srcOffset, dp::cuda::StreamSharedPtr const& stream )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( dstOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] );
        DP_ASSERT( srcOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      cudaMemcpy3DParms parms = { 0 };
      parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] );
      parms.srcPtr = m_pitchedPtr;
      parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] );
      parms.dstPtr = make_cudaPitchedPtr( dstBuffer->getPointer<void>(), m_elementSize * ( dstStride[0] ? dstStride[0] : m_extent[0] ), dstStride[0] ? dstStride[0] : m_extent[0], dstStride[1] ? dstStride[1] : m_extent[1] );
      parms.extent = make_cudaExtent( m_elementSize * ( dstExtent[0] ? dstExtent[0] : m_extent[0] ), dstExtent[1] ? dstExtent[1] : m_extent[1], dstExtent[2] ? dstExtent[2] : m_extent[2] );
      parms.kind = cudaMemcpyDeviceToHost;
      CUDA_VERIFY( cudaMemcpy3DAsync( &parms ) );
    }
const char *sOriginal[] =
{
    "volume.ppm",
    NULL
};

const char *sReference[] =
{
    "ref_volume.ppm",
    NULL
};

const char *sSDKsample = "CUDA 3D Volume Render";

char *volumeFilename = "Bucky.raw";
cudaExtent volumeSize = make_cudaExtent(32, 32, 32);
typedef unsigned char VolumeType;

//char *volumeFilename = "mrt16_angio.raw";
//cudaExtent volumeSize = make_cudaExtent(416, 512, 112);
//typedef unsigned short VolumeType;

uint width = 512, height = 512;
dim3 blockSize(16, 16);
dim3 gridSize;

float3 viewRotation;
float3 viewTranslation = make_float3(0.0, 0.0, -4.0f);
float invViewMatrix[12];

float density = 0.05f;
Exemple #18
0
 Buffer3D::Buffer3D( size_t elementSize, dp::math::Vec3ui const& extent )
   : m_elementSize( elementSize )
   , m_extent( extent )
 {
   CUDA_VERIFY( cudaMalloc3D( &m_pitchedPtr, make_cudaExtent( m_elementSize * m_extent[0], m_extent[1], m_extent[2] ) ) );
 }
Exemple #19
0
void VolSkin::init( int width, int height, TetMesh *tm )
{
	this->width = width;
	this->height = height;

	tetMesh = tm;

	// TEMP initialize volume data
	cudaExtent volumeSize = make_cudaExtent(128, 128, 128);
	//cudaExtent volumeSize = make_cudaExtent(256, 256, 256);

	// generate raw volume data
	float *h_densityData = (float*)malloc( sizeof(float)*volumeSize.width*volumeSize.height*volumeSize.depth );

	math::PerlinNoise pn;
	pn.setDepth( 4 );
	pn.setFrequency(3.0f);
	//pn.setInflection(true);

	for( int k=0;k<volumeSize.depth;++k )
		for( int j=0;j<volumeSize.height;++j )
			for( int i=0;i<volumeSize.width;++i )
			{
				int index = k*volumeSize.width*volumeSize.height + j*volumeSize.width + i;
				math::Vec3f uvw( (float)(i)/(float)(volumeSize.width),
								(float)(j)/(float)(volumeSize.height),
								(float)(k)/(float)(volumeSize.depth));
				float t = (float)(j)/(float)(volumeSize.height);
				//h_densityData[index] = 0.5f;
				//h_densityData[index] = (1.0f-t)*1.0f;
				h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x, uvw.y*2.0, uvw.z ) )*1.0f; // cylinder
				//h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x*2.0f, uvw.y*2.0f, uvw.z*2.0f ))*1.0f; // tetraeder
				//h_densityData[index] = (uvw.getLength() < 0.2f ? 1.0f : 0.0f)*2.0f;
			}



    // create 3D array
	d_densityArray = 0;
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
    cudaMalloc3DArray(&d_densityArray, &channelDesc, volumeSize);
	

	// copy data to 3D array
	cudaMemcpy3DParms copyParams = {0};
	copyParams.srcPtr   = make_cudaPitchedPtr((void*)h_densityData, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height);
	copyParams.dstArray = d_densityArray;
	copyParams.extent   = volumeSize;
	copyParams.kind     = cudaMemcpyHostToDevice;
	cudaMemcpy3D(&copyParams);



	// TMP
	/*
	h_debugVec.resize( 1000.0f );
	d_debugVec = h_debugVec;
	h_debugInfo.samples = convertToKernel(d_debugVec);
	h_debugInfo.numSamples = 0;
	cudaMemcpyToSymbol( d_debugInfo, &h_debugInfo, sizeof(DebugInfo), 0, cudaMemcpyHostToDevice );
	*/

	// setup lighting
	m_light0.cam = base::CameraPtr( new base::Camera() );
	m_light0.cam->m_aspectRatio = 1.0;
	//m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( -2.0f, -2.0f, 2.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false );
	//m_light0.cam->m_transform =  math::Matrix44f::TranslationMatrix( 0.3f, 0.15f, 2.0f );
	//m_light0.cam->m_transform =  math::Matrix44f::TranslationMatrix( -3.0f, 0.0f, 0.0f );
	m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( 4.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false );
	m_light0.cam->update();
	cudaMalloc( &m_light0.d_dctCoefficients, width*height*sizeof(float)*8 );// 8 floats /6 coefficients


		// set defaults
	setTotalCrossSection( 10.0f );
	setAlbedo( 1.0f );
	setAbsorptionColor( math::Vec3f(0.5f,0.5f, 0.5f) );
	setScatteringColor(math::Vec3f(0.5f, 0.5f, 0.5f));
	setLight(0, math::Vec3f(1.0f, 1.0f, 1.0f), 0.0f);
	setTime( 0.0f );
	setStepSize( 0.01f );


	// get tetmesh onto gpu
	gpuUploadTetMesh();

}
TEST(Malloc3DArray, NullArguments) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    // Commented out cases segfault.

    cudaError_t ret;
    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    /**
     * There's no reason why this should pass...

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);
     */

    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
}
TEST(Malloc3DArray, Limits) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    cudaError_t ret;

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(0,         0,          0),        0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    int device;
    ret = cudaGetDevice(&device);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaDeviceProp prop;
    ret = cudaGetDeviceProperties(&prop, device);
    ASSERT_EQ(cudaSuccess, ret);

    /* Adapt to what's available by a safe margin */
    size_t targetable = prop.totalGlobalMem / 8;

    if ((size_t) prop.maxTexture1D < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture1D, 0, 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture1D + 1, 0, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[0] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],     1, 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1, 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[1] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(1, prop.maxTexture2D[1],     0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(1, prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],
                prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],
                prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1,
                prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1,
                prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    } else if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] >
            prop.totalGlobalMem) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0], prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaErrorMemoryAllocation, ret);
    }

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(1,         1,         1),        0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(64,        64,        64),     0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    /* TODO:  More 3D tests. */
}
void CudaImagePyramidHost::initialize(int width, int height, cudaTextureFilterMode filter_mode, int depth)
{
    qDebug() << "pyramid host initializing with params: " << width << height << filter_mode << depth;
    if (isInitialized() && width == _baseWidth && height == _baseHeight && filter_mode == _filterMode) {
        return;
    }

    clear();
    qDebug() << "Clear done.";

    _baseWidth = width;
    _baseHeight = height;
    _filterMode = filter_mode;
    _numLayers = depth;

    // Get the texture and its channel descriptor to allocate the storage.
    const textureReference* constTexRefPtr=NULL;
    cudaGetTextureReference(&constTexRefPtr, _texture_name);
    qDebug() << "Texture Ref got:" << _name;
    if (constTexRefPtr == 0)
    {
        qDebug() << "constTexRefPtr==0";
    }
    checkCUDAError("Can't get tex ref for init TEXTURE_PYRAMID", _name);
    cudaChannelFormatDesc formatDesc = constTexRefPtr->channelDesc;

    if(_textureType == cudaTextureType2DLayered){
        cudaDeviceProp prop;
        qDebug() << "to get CUDA device prop";
        cudaGetDeviceProperties(&prop,0);
        qDebug() << "CUDA Device Prop got";
        if(prop.maxTexture2DLayered[0] < _baseWidth || prop.maxTexture2DLayered[1] < _baseHeight || prop.maxTexture2DLayered[2] < _numLayers){
            qDebug()<< "Max layered texture size:" << prop.maxTexture2DLayered[0] << " x " << prop.maxTexture2DLayered[1] << " x " << prop.maxTexture2DLayered[2];
            assert(0);
        }
        cudaExtent extent = make_cudaExtent(_baseWidth, _baseHeight, _numLayers);
        cudaMalloc3DArray(&_storage, &formatDesc, extent, cudaArrayLayered);
    }else{
        cudaMallocArray(&_storage, &formatDesc, _baseWidth, _baseHeight);
    }
    checkCUDAError("Failure to allocate", _name);

    qDebug() << "allocate done";

    // Set texture parameters.
    // Evil hack to get around an apparent bug in the cuda api:
    // cudaGetTextureReference only returns a const reference, and
    // there is no way to set the parameters with a reference other
    // than cast it to non-const.
    textureReference* texRefPtr=NULL;
    texRefPtr = const_cast<textureReference*>( constTexRefPtr );
    texRefPtr->addressMode[0] = cudaAddressModeClamp;
    texRefPtr->addressMode[1] = cudaAddressModeClamp;
    texRefPtr->filterMode = filter_mode;
    texRefPtr->normalized = false; // Use unnormalized (pixel) coordinates for addressing. This forbids texture mode wrap.

    bindTexture();

    qDebug() << "texture binded";

    bool found = false;
    for (size_t i = 0; i < _instances.size(); i++) {
        if (_instances[i] == this)
            found = true;
    }
    if (!found) {
        qDebug() << "Not found";
        _instances.push_back(this);
    }
    qDebug() << "paramid host initialized.";
}