Esempio n. 1
0
void TransferFunction::onColorTFChanged()
{
    //std::cout<<"Color changed"<<std::endl;
    if(compositeTex)
    {
        CudaSafeCall(cudaDestroyTextureObject(compositeTex));
        compositeTex = 0;
    }

    colorTF->GetTable(0.0, 1.0, TABLE_SIZE, colorTable);
    size_t j = 0, k = 0;
    for(size_t i = 0; i < TABLE_SIZE; ++i)
    {
        compositeTable[j++] = colorTable[k++];
        compositeTable[j++] = colorTable[k++];
        compositeTable[j++] = colorTable[k++];
        j++;
    }
    //CompositeTable();

    CudaSafeCall(cudaMemcpyToArray(array, 0, 0, compositeTable, sizeof(float) * TABLE_SIZE * 4, cudaMemcpyHostToDevice));
    CudaSafeCall(cudaCreateTextureObject(&compositeTex, &resourceDesc, &texDesc, NULL));

    Changed();
}
Esempio n. 2
0
TransferFunction::TransferFunction(vtkSmartPointer<vtkPiecewiseFunction> otf, vtkSmartPointer<vtkColorTransferFunction> ctf, QObject *parent) : QObject(parent)
{
    opacityTF = otf;
    colorTF = ctf;

    this->otf = QSharedPointer<ctkTransferFunction>(new ctkVTKPiecewiseFunction(opacityTF));
    this->ctf = QSharedPointer<ctkTransferFunction>(new ctkVTKColorTransferFunction(colorTF));

    connect(this->otf.data(), SIGNAL(changed()), this, SLOT(onOpacityTFChanged()));
    connect(this->ctf.data(), SIGNAL(changed()), this, SLOT(onColorTFChanged()));

    compositeTex = 0;

    // initialize each table
    opacityTF->GetTable(0.0, 1.0, TABLE_SIZE, opacityTable);
    colorTF->GetTable(0.0, 1.0, TABLE_SIZE, colorTable);
    CompositeTable();

    channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
    CudaSafeCall(cudaMallocArray(&array, &channelDesc, TABLE_SIZE));
    CudaSafeCall(cudaMemcpyToArray(array, 0, 0, compositeTable, sizeof(float) * TABLE_SIZE * 4, cudaMemcpyHostToDevice));

    memset(&resourceDesc, 0, sizeof(resourceDesc));
    resourceDesc.resType = cudaResourceTypeArray;
    resourceDesc.res.array.array = array;

    memset(&texDesc, 0, sizeof(texDesc));
    texDesc.addressMode[0] = cudaAddressModeClamp;
    texDesc.filterMode = cudaFilterModeLinear;
    texDesc.normalizedCoords = true;
    texDesc.readMode = cudaReadModeElementType;

    CudaSafeCall(cudaCreateTextureObject(&compositeTex, &resourceDesc, &texDesc, NULL));
}
cudaTextureObject_t create_environment_light_texture(const std::string& filename)
{
    int w = 0, h = 0, n = 0;
    float* data = stbi_loadf(filename.c_str(), &w, &h, &n, 0);
    if(!data)
    {
        std::cerr<<"Unable to load environment map: "<<filename<<std::endl;
        exit(0);
    }

    //create channel desc
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
    //create cudaArray
    cudaArray* array;
    checkCudaErrors(cudaMallocArray(&array, &channelDesc, w, h));
    if(n == 3)
    {
        uint32_t count = w * h;
        std::vector<float4> ext_data;
        ext_data.reserve(count);
        for(auto i = 0; i < count; ++i)
            ext_data.push_back(make_float4(data[i * 3], data[i * 3 + 1], data[i * 3 + 2], 0.f));

        checkCudaErrors(cudaMemcpyToArray(array, 0, 0, ext_data.data(), sizeof(float4) * w * h, cudaMemcpyHostToDevice));
    }
    else
        checkCudaErrors(cudaMemcpyToArray(array, 0, 0, data, sizeof(float4) * w * h, cudaMemcpyHostToDevice));
    //create resource desc
    cudaResourceDesc resDesc;
    memset(&resDesc, 0, sizeof(resDesc));
    resDesc.resType = cudaResourceTypeArray;
    resDesc.res.array.array = array;
    //create texture desc
    cudaTextureDesc texDesc;
    memset(&texDesc, 0, sizeof(texDesc));
    texDesc.addressMode[0] = cudaAddressModeWrap;
    texDesc.addressMode[1] = cudaAddressModeWrap;
    texDesc.filterMode = cudaFilterModeLinear;
    texDesc.readMode = cudaReadModeElementType;
    texDesc.normalizedCoords = true;
    //create cudaTextureObject
    cudaTextureObject_t tex;
    checkCudaErrors(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));

    return tex;
}
Esempio n. 4
0
void CudaImagePyramidHost::copyFromHost(const void* source)
{
    assert(isInitialized());
    assert(_textureType == cudaTextureType2D);

    cudaMemcpyToArray(_storage, 0,0, source, _baseWidth*_baseHeight*_typeSize, cudaMemcpyHostToDevice);

    checkCUDAError("Memcpy error", _name);
}
Esempio n. 5
0
void CudaTexture::copyFrom(void * src, unsigned size)
{
    std::cout<<" cu tex cpy "<<size<<"\n";
	cudaArray * arr;
	cutilSafeCall(cudaGraphicsMapResources(1, &_cuda_tex_resource, 0));
    cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&arr, _cuda_tex_resource, 0, 0));
	
	cutilSafeCall(cudaMemcpyToArray(arr, 0, 0, src, size, cudaMemcpyDeviceToDevice));

	cudaGraphicsUnmapResources(1, &_cuda_tex_resource, 0);
}
Esempio n. 6
0
cudaError_t cudaMemcpy3Dfix(const struct cudaMemcpy3DParms *param) {
    const cudaMemcpy3DParms& p = *param;
	// Use cudaMemcpy3D for 3D only
	// But it does not handle 2D or 1D copies well

	if (1<p.extent.depth) {
		return cudaMemcpy3D( &p );

	} else if (1<p.extent.height) {
		// 2D copy

		// Arraycopy
		if (0 != p.srcArray && 0 == p.dstArray) {
			return cudaMemcpy2DFromArray(p.dstPtr.ptr, p.dstPtr.pitch, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind);

		} else if(0 == p.srcArray && 0 != p.dstArray) {
			return cudaMemcpy2DToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind);

		} else if(0 != p.srcArray && 0 != p.dstArray) {
			return cudaMemcpy2DArrayToArray( p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind);

		} else {
			return cudaMemcpy2D( p.dstPtr.ptr, p.dstPtr.pitch, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind );

		}

	} else {
		// 1D copy

        // p.extent.width should not include pitch
        EXCEPTION_ASSERT( p.extent.width == p.dstPtr.xsize );
        EXCEPTION_ASSERT( p.extent.width == p.srcPtr.xsize );

		// Arraycopy
		if (0 != p.srcArray && 0 == p.dstArray) {
			return cudaMemcpyFromArray(p.dstPtr.ptr, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind);

		} else if(0 == p.srcArray && 0 != p.dstArray) {
			return cudaMemcpyToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.extent.width, p.kind);

		} else if(0 != p.srcArray && 0 != p.dstArray) {
			return cudaMemcpyArrayToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind);

        } else {
            return cudaMemcpy( p.dstPtr.ptr, p.srcPtr.ptr, p.extent.width, p.kind );

		}
	}
}
	void generateCUDAImage() {
		unsigned int* out_data = cuda_dest_resource;
		dim3 block(16, 16, 1); 
		dim3 grid(image_width / block.x, image_height / block.y, 1);
		launch_cudaProcess(grid, block, 0, out_data, image_width);

		cudaArray *texture_ptr;
		cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0);
		cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0);

		int num_texels = image_width * image_height;
		int num_values = num_texels * 4;
		int size_tex_data = sizeof(GLubyte) * num_values;
		cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice);
	}
Esempio n. 8
0
void processImage()
{
  	processLayer(sim_width, sim_height, cuda_dest_resource);

    cudaArray *texture_ptr;
    cutilSafeCall(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0));
    cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0));

    int num_texels = sim_width * sim_height;
    int num_values = num_texels * 4;
    int size_tex_data = sizeof(GLubyte) * num_values;
    cutilSafeCall(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice));

    cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0));

}
Esempio n. 9
0
SEXP R_auto_cudaMemcpyToArray(SEXP r_dst, SEXP r_wOffset, SEXP r_hOffset, SEXP r_src, SEXP r_count, SEXP r_kind)
{
    SEXP r_ans = R_NilValue;
    cudaArray_t dst = (cudaArray_t) getRReference(r_dst);
    size_t wOffset = REAL(r_wOffset)[0];
    size_t hOffset = REAL(r_hOffset)[0];
    const void * src = GET_REF(r_src, const void );
    size_t count = REAL(r_count)[0];
    enum cudaMemcpyKind kind = (enum cudaMemcpyKind) INTEGER(r_kind)[0];
    
    cudaError_t ans;
    ans = cudaMemcpyToArray(dst, wOffset, hOffset, src, count, kind);
    
    r_ans = Renum_convert_cudaError_t(ans) ;
    
    return(r_ans);
}
Esempio n. 10
0
void costVol_chamo::updataCV(Mat refImg, Mat projMat, float weightPerImg){
	
	
	cudaArray* cuArray;
	cudaTextureObject_t texObj;
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
	cudaSafeCall(cudaMallocArray(&cuArray, &channelDesc, width, height));

	cudaMemcpyToArray(cuArray, 0, 0, refImg.data, width*height*sizeof(float), cudaMemcpyHostToDevice);
	cudaSafeCall(cudaGetLastError());

	struct cudaResourceDesc resDesc;
	memset(&resDesc, 0, sizeof(resDesc));
	resDesc.resType = cudaResourceTypeArray;
	resDesc.res.array.array = cuArray;
	struct cudaTextureDesc texDesc;
	memset(&texDesc, 0, sizeof(texDesc));
	texDesc.addressMode[0] = cudaAddressModeClamp;
	texDesc.addressMode[1] = cudaAddressModeClamp;
	texDesc.filterMode = cudaFilterModeLinear;
	texDesc.readMode = cudaReadModeNormalizedFloat;
	texDesc.normalizedCoords = 0;
	cudaSafeCall(cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL));

	Mat finalTran = projMat*baseImgProjMat.inv();
	cvInput input;
	input.baseImg = (float *)baseImg.data;
	input.cvData = (float*)cvData.data;
	input.nearZ = nearZ;
	input.farZ = farZ;
	input.height = height;
	input.width = width;
	input.lowInd = (float*)lowInd.data;
	input.lowValue = (float*)bottonVal.data;
	for (int i = 0; i < 12; i++){
		input.transMat[i] = finalTran.at<float>(i);
	}
	input.refImg = texObj;
	input.zStep = (nearZ - farZ) / layers;
	input.stepCount = layers;
	updataCount++;
	input.weightPerImg = 1.0 / updataCount;
	updataCVCaller(input);
}
Esempio n. 11
0
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    float
    *h_Kernel,
    *h_Input,
    *h_Buffer,
    *h_OutputCPU,
    *h_OutputGPU;

    cudaArray
    *a_Src;

    cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>();

    float
    *d_Output;

    float
    gpuTime;

    StopWatchInterface *hTimer = NULL;

    const int imageW = 3072;
    const int imageH = 3072 / 2;
    const unsigned int iterations = 10;

    printf("[%s] - Starting...\n", argv[0]);

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    findCudaDevice(argc, (const char **)argv);

    sdkCreateTimer(&hTimer);

    printf("Initializing data...\n");
    h_Kernel    = (float *)malloc(KERNEL_LENGTH * sizeof(float));
    h_Input     = (float *)malloc(imageW * imageH * sizeof(float));
    h_Buffer    = (float *)malloc(imageW * imageH * sizeof(float));
    h_OutputCPU = (float *)malloc(imageW * imageH * sizeof(float));
    h_OutputGPU = (float *)malloc(imageW * imageH * sizeof(float));
    checkCudaErrors(cudaMallocArray(&a_Src, &floatTex, imageW, imageH));
    checkCudaErrors(cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)));

    srand(2009);

    for (unsigned int i = 0; i < KERNEL_LENGTH; i++)
    {
        h_Kernel[i] = (float)(rand() % 16);
    }

    for (unsigned int i = 0; i < imageW * imageH; i++)
    {
        h_Input[i] = (float)(rand() % 16);
    }

    setConvolutionKernel(h_Kernel);
    checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, h_Input, imageW * imageH * sizeof(float), cudaMemcpyHostToDevice));


    printf("Running GPU rows convolution (%u identical iterations)...\n", iterations);
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    for (unsigned int i = 0; i < iterations; i++)
    {
        convolutionRowsGPU(
            d_Output,
            a_Src,
            imageW,
            imageH
        );
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations;
    printf("Average convolutionRowsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    //While CUDA kernels can't write to textures directly, this copy is inevitable
    printf("Copying convolutionRowGPU() output back to the texture...\n");
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);
    checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToDevice));
    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer);
    printf("cudaMemcpyToArray() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    printf("Running GPU columns convolution (%i iterations)\n", iterations);
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    for (int i = 0; i < iterations; i++)
    {
        convolutionColumnsGPU(
            d_Output,
            a_Src,
            imageW,
            imageH
        );
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations;
    printf("Average convolutionColumnsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    printf("Reading back GPU results...\n");
    checkCudaErrors(cudaMemcpy(h_OutputGPU, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToHost));

    printf("Checking the results...\n");
    printf("...running convolutionRowsCPU()\n");
    convolutionRowsCPU(
        h_Buffer,
        h_Input,
        h_Kernel,
        imageW,
        imageH,
        KERNEL_RADIUS
    );

    printf("...running convolutionColumnsCPU()\n");
    convolutionColumnsCPU(
        h_OutputCPU,
        h_Buffer,
        h_Kernel,
        imageW,
        imageH,
        KERNEL_RADIUS
    );

    double delta = 0;
    double sum = 0;

    for (unsigned int i = 0; i < imageW * imageH; i++)
    {
        sum += h_OutputCPU[i] * h_OutputCPU[i];
        delta += (h_OutputGPU[i] - h_OutputCPU[i]) * (h_OutputGPU[i] - h_OutputCPU[i]);
    }

    double L2norm = sqrt(delta / sum);
    printf("Relative L2 norm: %E\n", L2norm);
    printf("Shutting down...\n");

    checkCudaErrors(cudaFree(d_Output));
    checkCudaErrors(cudaFreeArray(a_Src));
    free(h_OutputGPU);
    free(h_Buffer);
    free(h_Input);
    free(h_Kernel);

    sdkDeleteTimer(&hTimer);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();

    if (L2norm > 1e-6)
    {
        printf("Test failed!\n");
        exit(EXIT_FAILURE);
    }

    printf("Test passed\n");
    exit(EXIT_SUCCESS);
}
Esempio n. 12
0
HRESULT CUDARGBDSensor::process(ID3D11DeviceContext* context)
{
	HRESULT hr = S_OK;

	if (m_RGBDAdapter->process(context) == S_FALSE)	return S_FALSE;

	////////////////////////////////////////////////////////////////////////////////////
	// Process Color
	////////////////////////////////////////////////////////////////////////////////////

	//Start Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }

	if (m_bFilterIntensityValues)	gaussFilterFloat4Map(m_depthCameraData.d_colorData, m_RGBDAdapter->getColorMapResampledFloat4(), m_fBilateralFilterSigmaDIntensity, m_fBilateralFilterSigmaRIntensity, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());
	else							copyFloat4Map(m_depthCameraData.d_colorData, m_RGBDAdapter->getColorMapResampledFloat4(), m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());

	// Stop Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeFilterColor += m_timer.getElapsedTimeMS(); TimingLog::countTimeFilterColor++; }

	////////////////////////////////////////////////////////////////////////////////////
	// Process Depth
	////////////////////////////////////////////////////////////////////////////////////

	//Start Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }

	if (m_bFilterDepthValues) gaussFilterFloatMap(d_depthMapFilteredFloat, m_RGBDAdapter->getDepthMapResampledFloat(), m_fBilateralFilterSigmaD, m_fBilateralFilterSigmaR, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());
	else					 copyFloatMap(d_depthMapFilteredFloat, m_RGBDAdapter->getDepthMapResampledFloat(), m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());

	//TODO this call seems not needed as the depth map is overwriten later anyway later anyway...
	setInvalidFloatMap(m_depthCameraData.d_depthData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());

	// Stop Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeFilterDepth += m_timer.getElapsedTimeMS(); TimingLog::countTimeFilterDepth++; }

	////////////////////////////////////////////////////////////////////////////////////
	// Render to Color Space
	////////////////////////////////////////////////////////////////////////////////////

	//Start Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); }

	if (GlobalAppState::get().s_bUseCameraCalibration)
	{
		mat4f depthExt = m_RGBDAdapter->getDepthExtrinsics();

		g_CustomRenderTarget.Clear(context);
		g_CustomRenderTarget.Bind(context);
		g_RGBDRenderer.RenderDepthMap(context,
			d_depthMapFilteredFloat, m_depthCameraData.d_colorData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight(),
			m_RGBDAdapter->getDepthIntrinsicsInv(), depthExt, m_RGBDAdapter->getColorIntrinsics(),
			g_CustomRenderTarget.getWidth(), g_CustomRenderTarget.getHeight(),
			GlobalAppState::get().s_remappingDepthDiscontinuityThresOffset, GlobalAppState::get().s_remappingDepthDiscontinuityThresLin);
		g_CustomRenderTarget.Unbind(context);
		g_CustomRenderTarget.copyToCuda(m_depthCameraData.d_depthData, 0);


		//Util::writeToImage(m_depthCameraData.d_depthData, getDepthWidth(), getDepthHeight(), "depth.png");
		//Util::writeToImage(m_depthCameraData.d_colorData, getDepthWidth(), getDepthHeight(), "color.png");
	}
	else
	{
		copyFloatMap(m_depthCameraData.d_depthData, d_depthMapFilteredFloat, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());
	}


	bool bErode = false;
	if (bErode) {
		unsigned int numIter = 20;

		numIter = 2 * ((numIter + 1) / 2);
		for (unsigned int i = 0; i < numIter; i++) {
			if (i % 2 == 0) {
				erodeDepthMap(d_depthErodeHelper, m_depthCameraData.d_depthData, 5, getDepthWidth(), getDepthHeight(), 0.05f, 0.3f);
			}
			else {
				erodeDepthMap(m_depthCameraData.d_depthData, d_depthErodeHelper, 5, getDepthWidth(), getDepthHeight(), 0.05f, 0.3f);
			}
		}
	}

	//TODO check whether the intensity is actually used
	convertColorToIntensityFloat(d_intensityMapFilteredFloat, m_depthCameraData.d_colorData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());


	float4x4 M((m_RGBDAdapter->getColorIntrinsicsInv()).ptr());
	m_depthCameraData.updateParams(getDepthCameraParams());
	convertDepthFloatToCameraSpaceFloat4(d_cameraSpaceFloat4, m_depthCameraData.d_depthData, M, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight(), m_depthCameraData); // !!! todo
	computeNormals(d_normalMapFloat4, d_cameraSpaceFloat4, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight());

	float4x4 Mintrinsics((m_RGBDAdapter->getColorIntrinsics()).ptr());
	cudaMemcpyToArray(m_depthCameraData.d_depthArray, 0, 0, m_depthCameraData.d_depthData, sizeof(float)*m_depthCameraParams.m_imageHeight*m_depthCameraParams.m_imageWidth, cudaMemcpyDeviceToDevice);
	cudaMemcpyToArray(m_depthCameraData.d_colorArray, 0, 0, m_depthCameraData.d_colorData, sizeof(float4)*m_depthCameraParams.m_imageHeight*m_depthCameraParams.m_imageWidth, cudaMemcpyDeviceToDevice);

	// Stop Timing
	if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeRemapDepth += m_timer.getElapsedTimeMS(); TimingLog::countTimeRemapDepth++; }

	return hr;
}
Esempio n. 13
0
cudaError_t WINAPI wine_cudaMemcpyToArray( struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind ) {
    WINE_TRACE("\n");
    return cudaMemcpyToArray( dst, wOffset, hOffset, src, count, kind );
}
Esempio n. 14
0
int window_loop() {

    GLFWwindow* window;

    window = glfwCreateWindow(640, 480, "Shader test", NULL, NULL);
    if (!window)
    {
        glfwTerminate();
        return 0;
    }

    glfwMakeContextCurrent(window);
    //glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_DISABLED);
    glfwSetKeyCallback(window, key_callback);

    //glfwSetCursorPosCallback(window, cursor_callback);

    //glEnable(GL_CULL_FACE);
    glEnable(GL_LIGHT0);
    //glEnable(GL_DEPTH_TEST);
//	glEnable(GL_LIGHTING);

//	glEnable(GL_BLEND);
    // glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD);
    // glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
//	glEnable(GL_NORMALIZE);
    //glEnable(GL_NORMALIZE);
    glEnable(GL_TEXTURE_2D);
//glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );

    // glPolygonMode( GL_FRONT, GL_LINE );
    // glPolygonMode( GL_BACK, GL_POINT );

//	glEnable(GL_COLOR_MATERIAL);

    cudaGLSetGLDevice(0);


    double ot, nt = glfwGetTime();

    GLuint textureID[6];
    glGenTextures(1, textureID);

    png_bytep* tex1;
    int lw, lh;
    printf("Laddar PNG\n");
    read_png_file("/srv/texturer/Slate Tiles - (Normal Map).png", &tex1, &lw, &lh);

    printf("Laddade textur som är %i x %i pixelitaz stor.\n", lw, lh);

    float3* normal_map = NULL;

    size_t normal_map_bufferSize = 1024 * 1024 * sizeof(float3);
    cudaMalloc( &normal_map, normal_map_bufferSize );
    float3* host_normal_map = calloc(1024*1024, sizeof(float3));



    glBindTexture(GL_TEXTURE_2D, textureID[0]);

    for (int y=0; y<1024; y++) {
        for (int x=0; x<1024; x++) {
            host_normal_map[y*1024+x].x = (float)(tex1[y][x*3+0]-127) / 127;
            host_normal_map[y*1024+x].y = (float)(tex1[y][x*3+1]-127) / 127;
            host_normal_map[y*1024+x].z = (float)(tex1[y][x*3+2]-127) / 127;
        }
    }

    cudaMemcpy(normal_map, host_normal_map, normal_map_bufferSize, cudaMemcpyHostToDevice);

    glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA, 1024, 1024, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);


    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    double cx, cy;
    glfwGetCursorPos(window, &cx, &cy);


    struct cudaGraphicsResource *test1;
    int r1=cudaGraphicsGLRegisterImage(&test1, textureID[0], GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard);
    printf("r1=%i\n");

    uchar4* g_dstBuffer = NULL;

    size_t bufferSize = 1024 * 1024 * sizeof(uchar4);
    cudaMalloc( &g_dstBuffer, bufferSize );

    cudaMemset(g_dstBuffer, 0x7F, bufferSize);	//Make texture gray to start with

    printf("cuda alloc: %p\n", g_dstBuffer);

    double fps_time =0 ;
    int fps_count=0;

    while (!glfwWindowShouldClose(window))
    {

        ot=nt;
        nt =glfwGetTime();
        float dt = nt - ot;


        fps_time += dt;
        fps_count++;

        if (fps_time > 1) {
            printf("FPS: %f\n", fps_count/fps_time);
            fps_time=0;
            fps_count =0;

        }




        int width, height;
        glfwGetFramebufferSize(window, &width, &height);



        glClearColor(0.0, 0.0, 0.1, 1.0);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

        glViewport(0, 0, width-1, height-1);
        glMatrixMode(GL_PROJECTION);
        glLoadIdentity();

        glOrtho(0, width-1, height-1, 0,0,1);

        glMatrixMode(GL_MODELVIEW);


        for (int testa_flera=0; testa_flera<16; testa_flera++) {

            glLoadIdentity();
            glTranslatef(testa_flera*150, testa_flera*50+100, 0);
            glRotatef(testa_flera*10, 0,0,1);
            glTranslatef(0, testa_flera*50, 0);
            glScalef(0.5, 0.5, 0.5);


            float ta = fmod(nt+testa_flera*0.2, M_PI*2.0);
            float tb = fmod(nt*0.7+testa_flera*0.4, M_PI*2.0);
            float tc = fmod(nt*0.3+testa_flera*0.1, M_PI*2.0);
            float3 cam_vec = {sin(ta), sin(tb), sin(tc)};

            int res=cudaGraphicsMapResources(1, &test1, 0);
            //printf("res: %i (succ=%i)\n", res, cudaSuccess);
            struct cudaArray* dstArray = 0;
            int r2 = cudaGraphicsSubResourceGetMappedArray( &dstArray, test1, 0, 0 );
            //printf("r2: %i array: %p\n", r2, dstArray);

            first_test(g_dstBuffer, normal_map, cam_vec, 1024, 1024);

            cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice );

            cudaGraphicsUnmapResources(1, &test1, 0);

            glColor3f(1,1,1);
            glBegin(GL_QUADS);
            glTexCoord2f(0,0);
            glVertex3f(0,0,0);

            glTexCoord2f(1,0);
            glVertex3f(511,0,0);

            glTexCoord2f(1,1);
            glVertex3f(511,511,0);

            glTexCoord2f(0,1);
            glVertex3f(0,511,0);
            glEnd();



        }






        glfwSwapBuffers(window);
        glfwPollEvents();
    }


    glfwDestroyWindow(window);

    glfwTerminate();
    return(EXIT_SUCCESS);
}
Esempio n. 15
0
void InitCudaLayers()
{

	mmGridSizeX = sim_width/blockSizex;
	mmGridSizeY = sim_height/blockSizey;
	mmGridSize = mmGridSizeX*mmGridSizeY;
	memset(mmGrid, 0, sizeof(mmGrid));
	memset(mmYGGrid, 0, sizeof(mmYGGrid));

	tempHostData = (float*)malloc(sim_width*sim_height*TEMP_HOST_ELEM*sizeof(float));
	tempHostDataNoCuda = (float*)malloc(sim_width*sim_height*TEMP_HOST_ELEM*sizeof(float));
	grid8ValTick = (float*)malloc(sim_width*sim_height*8*sizeof(float));

	initColors();

	memset(gCudaLayer, 0, sizeof(gCudaLayer));
	memset(gCudaFuncLayer, 0, sizeof(gCudaFuncLayer));
	memset(gPhysLayer, 0, sizeof(gPhysLayer));
	memset(gStateLayer, 0, sizeof(gStateLayer));

	srand(0);
	int seed = rand();

	const cudaChannelFormatDesc desc4 = cudaCreateChannelDesc<float4>();
	cudaMallocArray(&gCudaVectArray, &desc4, sim_width, sim_height);
#if NFLAYERS ==2
	const cudaChannelFormatDesc desc2 = cudaCreateChannelDesc<float2>();
#else if NFLAYERS ==4
	const cudaChannelFormatDesc descF = desc4;
#endif
	cudaMallocArray(&gCudaFlArray, &descF, sim_width, sim_height);

	const cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
	cudaMallocArray(&gCudaFuncWavePack, &desc, sim_width);
	cudaMallocArray(&gCudaFuncSmooth, &desc, sim_width);

	cudaMallocArray(&(gCudaLayer[0]), &desc, sim_width, sim_height);
	cudaMallocArray(&(gCudaLayer[1]), &desc, sim_width, sim_height);

	cudaMallocArray(&(gCudaFuncLayer[0]), &desc, sim_width, sim_height);

	cudaMalloc(&cuTempData, TEMP_SIZE*sizeof(float)*sim_width*sim_height);
	cudaMalloc(&cuRandArr, sizeof(unsigned int)*sim_width*sim_height);

	cudaMalloc(&gStateLayer[0], sim_rect*sizeof(float));
	cudaMemset(gStateLayer[0], 0, sim_rect*sizeof(float));
	cudaMalloc(&gStateLayer[1], sim_rect*sizeof(float));
	cudaMemset(gStateLayer[1], 0, sim_rect*sizeof(float));

	cudaMalloc(&gPhysLayer[0], sim_rect*sizeof(float));
	cudaMemset(gPhysLayer[0], 0, sim_rect*sizeof(float));
	cudaMalloc(&gPhysLayer[1], sim_rect*sizeof(float));
	cudaMemset(gPhysLayer[1], 0, sim_rect*sizeof(float));

	cudaMalloc(&gRedBlueField, NFLAYERS*sim_rect*sizeof(float));
	cudaMemset(gRedBlueField, 0, NFLAYERS*sim_rect*sizeof(float));

	size_t pitch = 4*sim_width*sizeof(float);
	cudaMallocPitch((void**)&gVectorLayer, &pitch, 4*sim_width*sizeof(float), sim_height);

	cudaMemset2D(gVectorLayer, 4*sim_width*sizeof(float), 0, 4*sim_width*sizeof(float), sim_height);	

	InitWavePack(32, 1.f, sim_width, sim_height, cuTempData, gCudaFuncWavePack);

	InitSmooth(1, sim_width, cuTempData, gCudaFuncSmooth);

	InitRnd2DInt(seed, cuRandArr, sim_width, sim_height);

	InitFuncLayer(gCudaFuncLayer[0], cuTempData, sim_width, sim_height);

	InitPhysLayer(gPhysLayer[0], gStateLayer[0], cuRandArr, sim_width, sim_height);

	float* gridIni = cuTempData+3*sim_rect/2;
	float* halfTemp = cuTempData + sim_rect;
	float* out = cuTempData + 2*sim_rect;
	cudaMemset(out, 0, sim_rect*sizeof(float));

	seed = rand();
	int gridx = INTERP_SIZEX;
	int gridy = INTERP_SIZEX;
	InitRnd2DF(seed, gridIni, gridx, gridy);
	float scaleadd = .7f;
	Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height);

	seed = rand();
	gridx = (int)(gridx*2);
	gridy = (int)(gridy*2);
	InitRnd2DF(seed, gridIni, gridx, gridy);
	scaleadd = .3f;
	Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height);


	cudaMemcpyToArray(gCudaLayer[0], 0, 0, out, sizeof(float)*sim_rect, cudaMemcpyDeviceToDevice);

	cudaMemset(out, 0, sim_rect*sizeof(float));
	gridx = INTERP_SIZEX;
	gridy = INTERP_SIZEX;

	seed = rand();
	InitRnd2DF(seed, gridIni, gridx, gridy);
	scaleadd = .7f;
	Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height);

	seed = rand();
	gridx = (int)(gridx*1.5);
	gridy = (int)(gridy*1.5);
	InitRnd2DF(seed, gridIni, gridx, gridy);
	scaleadd = .3f;
	Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height);

	cudaMemcpyToArray(gCudaLayer[1], 0, 0, out, sizeof(float)*sim_rect, cudaMemcpyDeviceToDevice);

	float2 pos0;
	pos0.x = gObj0X;
	pos0.y = gObj0Y;

	float2 pos1;
	pos1.x = gObj1X;
	pos1.y = gObj1Y;

	gObjInertia.Init(pos0, pos1);

	LayerProc(sim_width, sim_height, gCudaLayer[0], gCudaFuncLayer[0], cuTempData, pos0.x , pos0.y, pos1.x , pos1.y);
	ParticleStateInit(cuTempData, cuRandArr, 
					   gStateLayer[0], gPhysLayer[0], gRedBlueField);

	InitBhv();

}