Beispiel #1
0
    //! @brief Copy the ND-array device array to host array.
    //! You must allocate the array with the appropriate size.
    __host__
    inline void copy_to_host(T *host_data) const
    {
      if (N == 2)
      {
        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy2D(
          host_data, _sizes[0] * sizeof(T), _data, _pitch, _sizes[0] * sizeof(T), _sizes[1],
          cudaMemcpyDeviceToHost));
      }
      else if (N == 3)
      {
        cudaMemcpy3DParms params = { 0 };

        params.srcPtr = make_cudaPitchedPtr(reinterpret_cast<void *>(_data),
                                            _pitch, _sizes[0], _sizes[1]);
        params.dstPtr = make_cudaPitchedPtr(host_data, _sizes[0] * sizeof(T),
                                            _sizes[0], _sizes[1]);
        params.extent = make_cudaExtent(_sizes[0]*sizeof(T), _sizes[1],
                                        _sizes[2]);
        params.kind = cudaMemcpyDeviceToHost;

        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy3D(&params));
      }
      else
        SHAKTI_SAFE_CUDA_CALL(cudaMemcpy(host_data, _data, sizeof(T) * size(),
                                         cudaMemcpyDeviceToHost));
    }
Beispiel #2
0
 void operator()(Type* dest, const math::Size_t<2u> pitchDest, 
                 Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size,
                 flags::Memcopy::Direction direction)
 {
         const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost,
                                  cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice};
                                  
         cudaPitchedPtr pitchedPtrDest;
         pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest;
         pitchedPtrDest.xsize = size.x(); pitchedPtrDest.ysize = size.y();
         cudaPitchedPtr pitchedPtrSource;
         pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source;
         pitchedPtrSource.xsize = size.x(); pitchedPtrSource.ysize = size.y();
         
         cudaMemcpy3DParms params;
         params.srcArray = NULL;
         params.srcPos = make_cudaPos(0,0,0);
         params.srcPtr = pitchedPtrSource;
         params.dstArray = NULL;
         params.dstPos = make_cudaPos(0,0,0);
         params.dstPtr = pitchedPtrDest;
         params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z());
         params.kind = kind[direction];
         CUDA_CHECK_NO_EXCEP(cudaMemcpy3D(&params));
 }                    
Beispiel #3
0
void cpyD2H() {
    // D2H“]‘—
    cudaMemcpy3DParms parms = { 0 };
    parms.srcPos = make_cudaPos(0, 0, 0);
    parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitch.x, buf.pitch.x, buf.pitch.y);
    parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
    parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
    parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitch.x, buf.pitch.y, buf.pitch.z);
    parms.kind = cudaMemcpyDeviceToHost;
    cudaMemcpy3D(&parms);
}
Beispiel #4
0
SEXP R_auto_cudaMemcpy3D(SEXP r_p)
{
    SEXP r_ans = R_NilValue;
    const struct cudaMemcpy3DParms * p = GET_REF(r_p, const struct cudaMemcpy3DParms );
    
    cudaError_t ans;
    ans = cudaMemcpy3D(p);
    
    r_ans = Renum_convert_cudaError_t(ans) ;
    
    return(r_ans);
}
Beispiel #5
0
cudaError_t cudaMemcpy3Dfix(const struct cudaMemcpy3DParms *param) {
    const cudaMemcpy3DParms& p = *param;
	// Use cudaMemcpy3D for 3D only
	// But it does not handle 2D or 1D copies well

	if (1<p.extent.depth) {
		return cudaMemcpy3D( &p );

	} else if (1<p.extent.height) {
		// 2D copy

		// Arraycopy
		if (0 != p.srcArray && 0 == p.dstArray) {
			return cudaMemcpy2DFromArray(p.dstPtr.ptr, p.dstPtr.pitch, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind);

		} else if(0 == p.srcArray && 0 != p.dstArray) {
			return cudaMemcpy2DToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind);

		} else if(0 != p.srcArray && 0 != p.dstArray) {
			return cudaMemcpy2DArrayToArray( p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind);

		} else {
			return cudaMemcpy2D( p.dstPtr.ptr, p.dstPtr.pitch, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind );

		}

	} else {
		// 1D copy

        // p.extent.width should not include pitch
        EXCEPTION_ASSERT( p.extent.width == p.dstPtr.xsize );
        EXCEPTION_ASSERT( p.extent.width == p.srcPtr.xsize );

		// Arraycopy
		if (0 != p.srcArray && 0 == p.dstArray) {
			return cudaMemcpyFromArray(p.dstPtr.ptr, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind);

		} else if(0 == p.srcArray && 0 != p.dstArray) {
			return cudaMemcpyToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.extent.width, p.kind);

		} else if(0 != p.srcArray && 0 != p.dstArray) {
			return cudaMemcpyArrayToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind);

        } else {
            return cudaMemcpy( p.dstPtr.ptr, p.srcPtr.ptr, p.extent.width, p.kind );

		}
	}
}
void CudaImagePyramidHost::copyFromHost(int width, int height, const void* source, int layer)
{
    assert(isInitialized());
    assert(_textureType == cudaTextureType2DLayered);

    cudaMemcpy3DParms myParms = {0};
    myParms.srcPtr = make_cudaPitchedPtr((void*)source,width*_typeSize,width,height);
    myParms.srcPos = make_cudaPos(0,0,0);
    myParms.dstArray = _storage;
    myParms.dstPos = make_cudaPos(0,0,layer);
    myParms.extent = make_cudaExtent(width,height,1);
    myParms.kind = cudaMemcpyHostToDevice;
    cudaMemcpy3D(&myParms);

    checkCUDAError("Memcpy error", _name);
}
Beispiel #7
0
    void Buffer3D::setData( void const* srcData, dp::math::Vec3ui const& srcOffset, dp::math::Vec3ui const& srcStride, dp::math::Vec3ui const& srcExtent, dp::math::Vec3ui const& dstOffset )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( srcOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
        DP_ASSERT( dstOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      cudaMemcpy3DParms parms = { 0 };
      parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] );
      parms.srcPtr = make_cudaPitchedPtr( const_cast<void *>(srcData), m_elementSize * ( srcStride[0] ? srcStride[0] : m_extent[0] ), srcStride[0] ? srcStride[0] : m_extent[0], srcStride[1] ? srcStride[1] : m_extent[1] );
      parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] );
      parms.dstPtr = m_pitchedPtr;
      parms.extent = make_cudaExtent( m_elementSize * ( srcExtent[0] ? srcExtent[0] : m_extent[0] ), srcExtent[1] ? srcExtent[1] : m_extent[1], srcExtent[2] ? srcExtent[2] : m_extent[2] );
      parms.kind = cudaMemcpyHostToDevice;
      CUDA_VERIFY( cudaMemcpy3D( &parms ) );
    }
Beispiel #8
0
    CTfactory( const VolumeGPU<T>& src,
               U& texRef,
               const cudaTextureFilterMode fm = cudaFilterModePoint,
               const cudaTextureAddressMode am = cudaAddressModeClamp,
               const int norm = false ) : dca_data(NULL) {

        // Check for valid input
        if( src.d_data.ptr == NULL ) {
            std::cerr << __FUNCTION__
                      << ": Source has no data"
                      << std::endl;
            abort();
        }

        // Allocate memory
        cudaChannelFormatDesc cd = cudaCreateChannelDesc<T>();
        cudaExtent tmpExtent = ExtentFromDims( src.dims );

        CUDA_SAFE_CALL( cudaMalloc3DArray( &(this->dca_data),
                                           &cd,
                                           tmpExtent ) );

        // Do the copy
        cudaMemcpy3DParms cp = {0};

        cp.srcPtr = src.d_data;
        cp.dstArray = this->dca_data;
        cp.extent = tmpExtent;
        cp.kind = cudaMemcpyDeviceToDevice;

        CUDA_SAFE_CALL( cudaMemcpy3D( &cp ) );


        // Bind the texture
        texRef.normalized = norm;
        texRef.addressMode[0] = am;
        texRef.addressMode[1] = am;
        texRef.addressMode[2] = am;
        texRef.filterMode = fm;

        CUDA_SAFE_CALL( cudaBindTextureToArray( texRef, this->dca_data ) );
    }
Beispiel #9
0
void VolSkin::init( int width, int height, TetMesh *tm )
{
	this->width = width;
	this->height = height;

	tetMesh = tm;

	// TEMP initialize volume data
	cudaExtent volumeSize = make_cudaExtent(128, 128, 128);
	//cudaExtent volumeSize = make_cudaExtent(256, 256, 256);

	// generate raw volume data
	float *h_densityData = (float*)malloc( sizeof(float)*volumeSize.width*volumeSize.height*volumeSize.depth );

	math::PerlinNoise pn;
	pn.setDepth( 4 );
	pn.setFrequency(3.0f);
	//pn.setInflection(true);

	for( int k=0;k<volumeSize.depth;++k )
		for( int j=0;j<volumeSize.height;++j )
			for( int i=0;i<volumeSize.width;++i )
			{
				int index = k*volumeSize.width*volumeSize.height + j*volumeSize.width + i;
				math::Vec3f uvw( (float)(i)/(float)(volumeSize.width),
								(float)(j)/(float)(volumeSize.height),
								(float)(k)/(float)(volumeSize.depth));
				float t = (float)(j)/(float)(volumeSize.height);
				//h_densityData[index] = 0.5f;
				//h_densityData[index] = (1.0f-t)*1.0f;
				h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x, uvw.y*2.0, uvw.z ) )*1.0f; // cylinder
				//h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x*2.0f, uvw.y*2.0f, uvw.z*2.0f ))*1.0f; // tetraeder
				//h_densityData[index] = (uvw.getLength() < 0.2f ? 1.0f : 0.0f)*2.0f;
			}



    // create 3D array
	d_densityArray = 0;
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
    cudaMalloc3DArray(&d_densityArray, &channelDesc, volumeSize);
	

	// copy data to 3D array
	cudaMemcpy3DParms copyParams = {0};
	copyParams.srcPtr   = make_cudaPitchedPtr((void*)h_densityData, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height);
	copyParams.dstArray = d_densityArray;
	copyParams.extent   = volumeSize;
	copyParams.kind     = cudaMemcpyHostToDevice;
	cudaMemcpy3D(&copyParams);



	// TMP
	/*
	h_debugVec.resize( 1000.0f );
	d_debugVec = h_debugVec;
	h_debugInfo.samples = convertToKernel(d_debugVec);
	h_debugInfo.numSamples = 0;
	cudaMemcpyToSymbol( d_debugInfo, &h_debugInfo, sizeof(DebugInfo), 0, cudaMemcpyHostToDevice );
	*/

	// setup lighting
	m_light0.cam = base::CameraPtr( new base::Camera() );
	m_light0.cam->m_aspectRatio = 1.0;
	//m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( -2.0f, -2.0f, 2.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false );
	//m_light0.cam->m_transform =  math::Matrix44f::TranslationMatrix( 0.3f, 0.15f, 2.0f );
	//m_light0.cam->m_transform =  math::Matrix44f::TranslationMatrix( -3.0f, 0.0f, 0.0f );
	m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( 4.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false );
	m_light0.cam->update();
	cudaMalloc( &m_light0.d_dctCoefficients, width*height*sizeof(float)*8 );// 8 floats /6 coefficients


		// set defaults
	setTotalCrossSection( 10.0f );
	setAlbedo( 1.0f );
	setAbsorptionColor( math::Vec3f(0.5f,0.5f, 0.5f) );
	setScatteringColor(math::Vec3f(0.5f, 0.5f, 0.5f));
	setLight(0, math::Vec3f(1.0f, 1.0f, 1.0f), 0.0f);
	setTime( 0.0f );
	setStepSize( 0.01f );


	// get tetmesh onto gpu
	gpuUploadTetMesh();

}
Beispiel #10
0
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void RunKernels()
{
    static float t = 0.0f;

    // populate the 2d texture
    {
        cudaArray *cuArray;
        cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource, 0, 0);
        getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");

        // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it
        cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width, g_texture_2d.height, g_texture_2d.pitch, t);
        getLastCudaError("cuda_texture_2d failed");

        // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
        cudaMemcpy2DToArray(
            cuArray, // dst array
            0, 0,    // offset
            g_texture_2d.cudaLinearMemory, g_texture_2d.pitch,       // src
            g_texture_2d.width*4*sizeof(float), g_texture_2d.height, // extent
            cudaMemcpyDeviceToDevice); // kind
        getLastCudaError("cudaMemcpy2DToArray failed");
    }
    // populate the volume texture
    {
        size_t pitchSlice = g_texture_vol.pitch * g_texture_vol.height;
        cudaArray *cuArray;
        cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_vol.cudaResource, 0, 0);
        getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed");

        // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it
        cuda_texture_volume(g_texture_vol.cudaLinearMemory, g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth, g_texture_vol.pitch, pitchSlice, t);
        getLastCudaError("cuda_texture_3d failed");

        // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
        struct cudaMemcpy3DParms memcpyParams = {0};
        memcpyParams.dstArray = cuArray;
        memcpyParams.srcPtr.ptr = g_texture_vol.cudaLinearMemory;
        memcpyParams.srcPtr.pitch = g_texture_vol.pitch;
        memcpyParams.srcPtr.xsize = g_texture_vol.width;
        memcpyParams.srcPtr.ysize = g_texture_vol.height;
        memcpyParams.extent.width = g_texture_vol.width;
        memcpyParams.extent.height = g_texture_vol.height;
        memcpyParams.extent.depth = g_texture_vol.depth;
        memcpyParams.kind = cudaMemcpyDeviceToDevice;
        cudaMemcpy3D(&memcpyParams);
        getLastCudaError("cudaMemcpy3D failed");
    }

    // populate the faces of the cube map
    for (int face = 0; face < 6; ++face)
    {
        cudaArray *cuArray;
        cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource, face, 0);
        getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed");

        // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it
        cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size, g_texture_cube.size, g_texture_cube.pitch, face, t);
        getLastCudaError("cuda_texture_cube failed");

        // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
        cudaMemcpy2DToArray(
            cuArray, // dst array
            0, 0,    // offset
            g_texture_cube.cudaLinearMemory, g_texture_cube.pitch, // src
            g_texture_cube.size*4, g_texture_cube.size,            // extent
            cudaMemcpyDeviceToDevice); // kind
        getLastCudaError("cudaMemcpy2DToArray failed");
    }

    t += 0.1f;
}
Beispiel #11
0
cudaError_t WINAPI wine_cudaMemcpy3D( const struct cudaMemcpy3DParms *p ) {
    WINE_TRACE("\n");
    return cudaMemcpy3D( p );
}