void TransferFunction::onColorTFChanged() { //std::cout<<"Color changed"<<std::endl; if(compositeTex) { CudaSafeCall(cudaDestroyTextureObject(compositeTex)); compositeTex = 0; } colorTF->GetTable(0.0, 1.0, TABLE_SIZE, colorTable); size_t j = 0, k = 0; for(size_t i = 0; i < TABLE_SIZE; ++i) { compositeTable[j++] = colorTable[k++]; compositeTable[j++] = colorTable[k++]; compositeTable[j++] = colorTable[k++]; j++; } //CompositeTable(); CudaSafeCall(cudaMemcpyToArray(array, 0, 0, compositeTable, sizeof(float) * TABLE_SIZE * 4, cudaMemcpyHostToDevice)); CudaSafeCall(cudaCreateTextureObject(&compositeTex, &resourceDesc, &texDesc, NULL)); Changed(); }
TransferFunction::TransferFunction(vtkSmartPointer<vtkPiecewiseFunction> otf, vtkSmartPointer<vtkColorTransferFunction> ctf, QObject *parent) : QObject(parent) { opacityTF = otf; colorTF = ctf; this->otf = QSharedPointer<ctkTransferFunction>(new ctkVTKPiecewiseFunction(opacityTF)); this->ctf = QSharedPointer<ctkTransferFunction>(new ctkVTKColorTransferFunction(colorTF)); connect(this->otf.data(), SIGNAL(changed()), this, SLOT(onOpacityTFChanged())); connect(this->ctf.data(), SIGNAL(changed()), this, SLOT(onColorTFChanged())); compositeTex = 0; // initialize each table opacityTF->GetTable(0.0, 1.0, TABLE_SIZE, opacityTable); colorTF->GetTable(0.0, 1.0, TABLE_SIZE, colorTable); CompositeTable(); channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); CudaSafeCall(cudaMallocArray(&array, &channelDesc, TABLE_SIZE)); CudaSafeCall(cudaMemcpyToArray(array, 0, 0, compositeTable, sizeof(float) * TABLE_SIZE * 4, cudaMemcpyHostToDevice)); memset(&resourceDesc, 0, sizeof(resourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = array; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.normalizedCoords = true; texDesc.readMode = cudaReadModeElementType; CudaSafeCall(cudaCreateTextureObject(&compositeTex, &resourceDesc, &texDesc, NULL)); }
cudaTextureObject_t create_environment_light_texture(const std::string& filename) { int w = 0, h = 0, n = 0; float* data = stbi_loadf(filename.c_str(), &w, &h, &n, 0); if(!data) { std::cerr<<"Unable to load environment map: "<<filename<<std::endl; exit(0); } //create channel desc cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>(); //create cudaArray cudaArray* array; checkCudaErrors(cudaMallocArray(&array, &channelDesc, w, h)); if(n == 3) { uint32_t count = w * h; std::vector<float4> ext_data; ext_data.reserve(count); for(auto i = 0; i < count; ++i) ext_data.push_back(make_float4(data[i * 3], data[i * 3 + 1], data[i * 3 + 2], 0.f)); checkCudaErrors(cudaMemcpyToArray(array, 0, 0, ext_data.data(), sizeof(float4) * w * h, cudaMemcpyHostToDevice)); } else checkCudaErrors(cudaMemcpyToArray(array, 0, 0, data, sizeof(float4) * w * h, cudaMemcpyHostToDevice)); //create resource desc cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = array; //create texture desc cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = true; //create cudaTextureObject cudaTextureObject_t tex; checkCudaErrors(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL)); return tex; }
void CudaImagePyramidHost::copyFromHost(const void* source) { assert(isInitialized()); assert(_textureType == cudaTextureType2D); cudaMemcpyToArray(_storage, 0,0, source, _baseWidth*_baseHeight*_typeSize, cudaMemcpyHostToDevice); checkCUDAError("Memcpy error", _name); }
void CudaTexture::copyFrom(void * src, unsigned size) { std::cout<<" cu tex cpy "<<size<<"\n"; cudaArray * arr; cutilSafeCall(cudaGraphicsMapResources(1, &_cuda_tex_resource, 0)); cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&arr, _cuda_tex_resource, 0, 0)); cutilSafeCall(cudaMemcpyToArray(arr, 0, 0, src, size, cudaMemcpyDeviceToDevice)); cudaGraphicsUnmapResources(1, &_cuda_tex_resource, 0); }
cudaError_t cudaMemcpy3Dfix(const struct cudaMemcpy3DParms *param) { const cudaMemcpy3DParms& p = *param; // Use cudaMemcpy3D for 3D only // But it does not handle 2D or 1D copies well if (1<p.extent.depth) { return cudaMemcpy3D( &p ); } else if (1<p.extent.height) { // 2D copy // Arraycopy if (0 != p.srcArray && 0 == p.dstArray) { return cudaMemcpy2DFromArray(p.dstPtr.ptr, p.dstPtr.pitch, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind); } else if(0 == p.srcArray && 0 != p.dstArray) { return cudaMemcpy2DToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind); } else if(0 != p.srcArray && 0 != p.dstArray) { return cudaMemcpy2DArrayToArray( p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.extent.height, p.kind); } else { return cudaMemcpy2D( p.dstPtr.ptr, p.dstPtr.pitch, p.srcPtr.ptr, p.srcPtr.pitch, p.extent.width, p.extent.height, p.kind ); } } else { // 1D copy // p.extent.width should not include pitch EXCEPTION_ASSERT( p.extent.width == p.dstPtr.xsize ); EXCEPTION_ASSERT( p.extent.width == p.srcPtr.xsize ); // Arraycopy if (0 != p.srcArray && 0 == p.dstArray) { return cudaMemcpyFromArray(p.dstPtr.ptr, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind); } else if(0 == p.srcArray && 0 != p.dstArray) { return cudaMemcpyToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcPtr.ptr, p.extent.width, p.kind); } else if(0 != p.srcArray && 0 != p.dstArray) { return cudaMemcpyArrayToArray(p.dstArray, p.dstPos.x, p.dstPos.y, p.srcArray, p.srcPos.x, p.srcPos.y, p.extent.width, p.kind); } else { return cudaMemcpy( p.dstPtr.ptr, p.srcPtr.ptr, p.extent.width, p.kind ); } } }
void generateCUDAImage() { unsigned int* out_data = cuda_dest_resource; dim3 block(16, 16, 1); dim3 grid(image_width / block.x, image_height / block.y, 1); launch_cudaProcess(grid, block, 0, out_data, image_width); cudaArray *texture_ptr; cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0); cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0); int num_texels = image_width * image_height; int num_values = num_texels * 4; int size_tex_data = sizeof(GLubyte) * num_values; cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice); }
void processImage() { processLayer(sim_width, sim_height, cuda_dest_resource); cudaArray *texture_ptr; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0)); cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0)); int num_texels = sim_width * sim_height; int num_values = num_texels * 4; int size_tex_data = sizeof(GLubyte) * num_values; cutilSafeCall(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0)); }
SEXP R_auto_cudaMemcpyToArray(SEXP r_dst, SEXP r_wOffset, SEXP r_hOffset, SEXP r_src, SEXP r_count, SEXP r_kind) { SEXP r_ans = R_NilValue; cudaArray_t dst = (cudaArray_t) getRReference(r_dst); size_t wOffset = REAL(r_wOffset)[0]; size_t hOffset = REAL(r_hOffset)[0]; const void * src = GET_REF(r_src, const void ); size_t count = REAL(r_count)[0]; enum cudaMemcpyKind kind = (enum cudaMemcpyKind) INTEGER(r_kind)[0]; cudaError_t ans; ans = cudaMemcpyToArray(dst, wOffset, hOffset, src, count, kind); r_ans = Renum_convert_cudaError_t(ans) ; return(r_ans); }
void costVol_chamo::updataCV(Mat refImg, Mat projMat, float weightPerImg){ cudaArray* cuArray; cudaTextureObject_t texObj; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); cudaSafeCall(cudaMallocArray(&cuArray, &channelDesc, width, height)); cudaMemcpyToArray(cuArray, 0, 0, refImg.data, width*height*sizeof(float), cudaMemcpyHostToDevice); cudaSafeCall(cudaGetLastError()); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cuArray; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.addressMode[1] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeNormalizedFloat; texDesc.normalizedCoords = 0; cudaSafeCall(cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL)); Mat finalTran = projMat*baseImgProjMat.inv(); cvInput input; input.baseImg = (float *)baseImg.data; input.cvData = (float*)cvData.data; input.nearZ = nearZ; input.farZ = farZ; input.height = height; input.width = width; input.lowInd = (float*)lowInd.data; input.lowValue = (float*)bottonVal.data; for (int i = 0; i < 12; i++){ input.transMat[i] = finalTran.at<float>(i); } input.refImg = texObj; input.zStep = (nearZ - farZ) / layers; input.stepCount = layers; updataCount++; input.weightPerImg = 1.0 / updataCount; updataCVCaller(input); }
//////////////////////////////////////////////////////////////////////////////// // Main program //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { float *h_Kernel, *h_Input, *h_Buffer, *h_OutputCPU, *h_OutputGPU; cudaArray *a_Src; cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>(); float *d_Output; float gpuTime; StopWatchInterface *hTimer = NULL; const int imageW = 3072; const int imageH = 3072 / 2; const unsigned int iterations = 10; printf("[%s] - Starting...\n", argv[0]); // use command-line specified CUDA device, otherwise use device with highest Gflops/s findCudaDevice(argc, (const char **)argv); sdkCreateTimer(&hTimer); printf("Initializing data...\n"); h_Kernel = (float *)malloc(KERNEL_LENGTH * sizeof(float)); h_Input = (float *)malloc(imageW * imageH * sizeof(float)); h_Buffer = (float *)malloc(imageW * imageH * sizeof(float)); h_OutputCPU = (float *)malloc(imageW * imageH * sizeof(float)); h_OutputGPU = (float *)malloc(imageW * imageH * sizeof(float)); checkCudaErrors(cudaMallocArray(&a_Src, &floatTex, imageW, imageH)); checkCudaErrors(cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float))); srand(2009); for (unsigned int i = 0; i < KERNEL_LENGTH; i++) { h_Kernel[i] = (float)(rand() % 16); } for (unsigned int i = 0; i < imageW * imageH; i++) { h_Input[i] = (float)(rand() % 16); } setConvolutionKernel(h_Kernel); checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, h_Input, imageW * imageH * sizeof(float), cudaMemcpyHostToDevice)); printf("Running GPU rows convolution (%u identical iterations)...\n", iterations); checkCudaErrors(cudaDeviceSynchronize()); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); for (unsigned int i = 0; i < iterations; i++) { convolutionRowsGPU( d_Output, a_Src, imageW, imageH ); } checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&hTimer); gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations; printf("Average convolutionRowsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime)); //While CUDA kernels can't write to textures directly, this copy is inevitable printf("Copying convolutionRowGPU() output back to the texture...\n"); checkCudaErrors(cudaDeviceSynchronize()); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToDevice)); checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&hTimer); gpuTime = sdkGetTimerValue(&hTimer); printf("cudaMemcpyToArray() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime)); printf("Running GPU columns convolution (%i iterations)\n", iterations); checkCudaErrors(cudaDeviceSynchronize()); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); for (int i = 0; i < iterations; i++) { convolutionColumnsGPU( d_Output, a_Src, imageW, imageH ); } checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&hTimer); gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations; printf("Average convolutionColumnsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime)); printf("Reading back GPU results...\n"); checkCudaErrors(cudaMemcpy(h_OutputGPU, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToHost)); printf("Checking the results...\n"); printf("...running convolutionRowsCPU()\n"); convolutionRowsCPU( h_Buffer, h_Input, h_Kernel, imageW, imageH, KERNEL_RADIUS ); printf("...running convolutionColumnsCPU()\n"); convolutionColumnsCPU( h_OutputCPU, h_Buffer, h_Kernel, imageW, imageH, KERNEL_RADIUS ); double delta = 0; double sum = 0; for (unsigned int i = 0; i < imageW * imageH; i++) { sum += h_OutputCPU[i] * h_OutputCPU[i]; delta += (h_OutputGPU[i] - h_OutputCPU[i]) * (h_OutputGPU[i] - h_OutputCPU[i]); } double L2norm = sqrt(delta / sum); printf("Relative L2 norm: %E\n", L2norm); printf("Shutting down...\n"); checkCudaErrors(cudaFree(d_Output)); checkCudaErrors(cudaFreeArray(a_Src)); free(h_OutputGPU); free(h_Buffer); free(h_Input); free(h_Kernel); sdkDeleteTimer(&hTimer); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); if (L2norm > 1e-6) { printf("Test failed!\n"); exit(EXIT_FAILURE); } printf("Test passed\n"); exit(EXIT_SUCCESS); }
HRESULT CUDARGBDSensor::process(ID3D11DeviceContext* context) { HRESULT hr = S_OK; if (m_RGBDAdapter->process(context) == S_FALSE) return S_FALSE; //////////////////////////////////////////////////////////////////////////////////// // Process Color //////////////////////////////////////////////////////////////////////////////////// //Start Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } if (m_bFilterIntensityValues) gaussFilterFloat4Map(m_depthCameraData.d_colorData, m_RGBDAdapter->getColorMapResampledFloat4(), m_fBilateralFilterSigmaDIntensity, m_fBilateralFilterSigmaRIntensity, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); else copyFloat4Map(m_depthCameraData.d_colorData, m_RGBDAdapter->getColorMapResampledFloat4(), m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); // Stop Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeFilterColor += m_timer.getElapsedTimeMS(); TimingLog::countTimeFilterColor++; } //////////////////////////////////////////////////////////////////////////////////// // Process Depth //////////////////////////////////////////////////////////////////////////////////// //Start Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } if (m_bFilterDepthValues) gaussFilterFloatMap(d_depthMapFilteredFloat, m_RGBDAdapter->getDepthMapResampledFloat(), m_fBilateralFilterSigmaD, m_fBilateralFilterSigmaR, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); else copyFloatMap(d_depthMapFilteredFloat, m_RGBDAdapter->getDepthMapResampledFloat(), m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); //TODO this call seems not needed as the depth map is overwriten later anyway later anyway... setInvalidFloatMap(m_depthCameraData.d_depthData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); // Stop Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeFilterDepth += m_timer.getElapsedTimeMS(); TimingLog::countTimeFilterDepth++; } //////////////////////////////////////////////////////////////////////////////////// // Render to Color Space //////////////////////////////////////////////////////////////////////////////////// //Start Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.start(); } if (GlobalAppState::get().s_bUseCameraCalibration) { mat4f depthExt = m_RGBDAdapter->getDepthExtrinsics(); g_CustomRenderTarget.Clear(context); g_CustomRenderTarget.Bind(context); g_RGBDRenderer.RenderDepthMap(context, d_depthMapFilteredFloat, m_depthCameraData.d_colorData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight(), m_RGBDAdapter->getDepthIntrinsicsInv(), depthExt, m_RGBDAdapter->getColorIntrinsics(), g_CustomRenderTarget.getWidth(), g_CustomRenderTarget.getHeight(), GlobalAppState::get().s_remappingDepthDiscontinuityThresOffset, GlobalAppState::get().s_remappingDepthDiscontinuityThresLin); g_CustomRenderTarget.Unbind(context); g_CustomRenderTarget.copyToCuda(m_depthCameraData.d_depthData, 0); //Util::writeToImage(m_depthCameraData.d_depthData, getDepthWidth(), getDepthHeight(), "depth.png"); //Util::writeToImage(m_depthCameraData.d_colorData, getDepthWidth(), getDepthHeight(), "color.png"); } else { copyFloatMap(m_depthCameraData.d_depthData, d_depthMapFilteredFloat, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); } bool bErode = false; if (bErode) { unsigned int numIter = 20; numIter = 2 * ((numIter + 1) / 2); for (unsigned int i = 0; i < numIter; i++) { if (i % 2 == 0) { erodeDepthMap(d_depthErodeHelper, m_depthCameraData.d_depthData, 5, getDepthWidth(), getDepthHeight(), 0.05f, 0.3f); } else { erodeDepthMap(m_depthCameraData.d_depthData, d_depthErodeHelper, 5, getDepthWidth(), getDepthHeight(), 0.05f, 0.3f); } } } //TODO check whether the intensity is actually used convertColorToIntensityFloat(d_intensityMapFilteredFloat, m_depthCameraData.d_colorData, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); float4x4 M((m_RGBDAdapter->getColorIntrinsicsInv()).ptr()); m_depthCameraData.updateParams(getDepthCameraParams()); convertDepthFloatToCameraSpaceFloat4(d_cameraSpaceFloat4, m_depthCameraData.d_depthData, M, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight(), m_depthCameraData); // !!! todo computeNormals(d_normalMapFloat4, d_cameraSpaceFloat4, m_RGBDAdapter->getWidth(), m_RGBDAdapter->getHeight()); float4x4 Mintrinsics((m_RGBDAdapter->getColorIntrinsics()).ptr()); cudaMemcpyToArray(m_depthCameraData.d_depthArray, 0, 0, m_depthCameraData.d_depthData, sizeof(float)*m_depthCameraParams.m_imageHeight*m_depthCameraParams.m_imageWidth, cudaMemcpyDeviceToDevice); cudaMemcpyToArray(m_depthCameraData.d_colorArray, 0, 0, m_depthCameraData.d_colorData, sizeof(float4)*m_depthCameraParams.m_imageHeight*m_depthCameraParams.m_imageWidth, cudaMemcpyDeviceToDevice); // Stop Timing if (GlobalAppState::get().s_timingsDetailledEnabled) { cutilSafeCall(cudaDeviceSynchronize()); m_timer.stop(); TimingLog::totalTimeRemapDepth += m_timer.getElapsedTimeMS(); TimingLog::countTimeRemapDepth++; } return hr; }
cudaError_t WINAPI wine_cudaMemcpyToArray( struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind ) { WINE_TRACE("\n"); return cudaMemcpyToArray( dst, wOffset, hOffset, src, count, kind ); }
int window_loop() { GLFWwindow* window; window = glfwCreateWindow(640, 480, "Shader test", NULL, NULL); if (!window) { glfwTerminate(); return 0; } glfwMakeContextCurrent(window); //glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_DISABLED); glfwSetKeyCallback(window, key_callback); //glfwSetCursorPosCallback(window, cursor_callback); //glEnable(GL_CULL_FACE); glEnable(GL_LIGHT0); //glEnable(GL_DEPTH_TEST); // glEnable(GL_LIGHTING); // glEnable(GL_BLEND); // glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); // glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); // glEnable(GL_NORMALIZE); //glEnable(GL_NORMALIZE); glEnable(GL_TEXTURE_2D); //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); // glPolygonMode( GL_FRONT, GL_LINE ); // glPolygonMode( GL_BACK, GL_POINT ); // glEnable(GL_COLOR_MATERIAL); cudaGLSetGLDevice(0); double ot, nt = glfwGetTime(); GLuint textureID[6]; glGenTextures(1, textureID); png_bytep* tex1; int lw, lh; printf("Laddar PNG\n"); read_png_file("/srv/texturer/Slate Tiles - (Normal Map).png", &tex1, &lw, &lh); printf("Laddade textur som är %i x %i pixelitaz stor.\n", lw, lh); float3* normal_map = NULL; size_t normal_map_bufferSize = 1024 * 1024 * sizeof(float3); cudaMalloc( &normal_map, normal_map_bufferSize ); float3* host_normal_map = calloc(1024*1024, sizeof(float3)); glBindTexture(GL_TEXTURE_2D, textureID[0]); for (int y=0; y<1024; y++) { for (int x=0; x<1024; x++) { host_normal_map[y*1024+x].x = (float)(tex1[y][x*3+0]-127) / 127; host_normal_map[y*1024+x].y = (float)(tex1[y][x*3+1]-127) / 127; host_normal_map[y*1024+x].z = (float)(tex1[y][x*3+2]-127) / 127; } } cudaMemcpy(normal_map, host_normal_map, normal_map_bufferSize, cudaMemcpyHostToDevice); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA, 1024, 1024, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); double cx, cy; glfwGetCursorPos(window, &cx, &cy); struct cudaGraphicsResource *test1; int r1=cudaGraphicsGLRegisterImage(&test1, textureID[0], GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard); printf("r1=%i\n"); uchar4* g_dstBuffer = NULL; size_t bufferSize = 1024 * 1024 * sizeof(uchar4); cudaMalloc( &g_dstBuffer, bufferSize ); cudaMemset(g_dstBuffer, 0x7F, bufferSize); //Make texture gray to start with printf("cuda alloc: %p\n", g_dstBuffer); double fps_time =0 ; int fps_count=0; while (!glfwWindowShouldClose(window)) { ot=nt; nt =glfwGetTime(); float dt = nt - ot; fps_time += dt; fps_count++; if (fps_time > 1) { printf("FPS: %f\n", fps_count/fps_time); fps_time=0; fps_count =0; } int width, height; glfwGetFramebufferSize(window, &width, &height); glClearColor(0.0, 0.0, 0.1, 1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glViewport(0, 0, width-1, height-1); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, width-1, height-1, 0,0,1); glMatrixMode(GL_MODELVIEW); for (int testa_flera=0; testa_flera<16; testa_flera++) { glLoadIdentity(); glTranslatef(testa_flera*150, testa_flera*50+100, 0); glRotatef(testa_flera*10, 0,0,1); glTranslatef(0, testa_flera*50, 0); glScalef(0.5, 0.5, 0.5); float ta = fmod(nt+testa_flera*0.2, M_PI*2.0); float tb = fmod(nt*0.7+testa_flera*0.4, M_PI*2.0); float tc = fmod(nt*0.3+testa_flera*0.1, M_PI*2.0); float3 cam_vec = {sin(ta), sin(tb), sin(tc)}; int res=cudaGraphicsMapResources(1, &test1, 0); //printf("res: %i (succ=%i)\n", res, cudaSuccess); struct cudaArray* dstArray = 0; int r2 = cudaGraphicsSubResourceGetMappedArray( &dstArray, test1, 0, 0 ); //printf("r2: %i array: %p\n", r2, dstArray); first_test(g_dstBuffer, normal_map, cam_vec, 1024, 1024); cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice ); cudaGraphicsUnmapResources(1, &test1, 0); glColor3f(1,1,1); glBegin(GL_QUADS); glTexCoord2f(0,0); glVertex3f(0,0,0); glTexCoord2f(1,0); glVertex3f(511,0,0); glTexCoord2f(1,1); glVertex3f(511,511,0); glTexCoord2f(0,1); glVertex3f(0,511,0); glEnd(); } glfwSwapBuffers(window); glfwPollEvents(); } glfwDestroyWindow(window); glfwTerminate(); return(EXIT_SUCCESS); }
void InitCudaLayers() { mmGridSizeX = sim_width/blockSizex; mmGridSizeY = sim_height/blockSizey; mmGridSize = mmGridSizeX*mmGridSizeY; memset(mmGrid, 0, sizeof(mmGrid)); memset(mmYGGrid, 0, sizeof(mmYGGrid)); tempHostData = (float*)malloc(sim_width*sim_height*TEMP_HOST_ELEM*sizeof(float)); tempHostDataNoCuda = (float*)malloc(sim_width*sim_height*TEMP_HOST_ELEM*sizeof(float)); grid8ValTick = (float*)malloc(sim_width*sim_height*8*sizeof(float)); initColors(); memset(gCudaLayer, 0, sizeof(gCudaLayer)); memset(gCudaFuncLayer, 0, sizeof(gCudaFuncLayer)); memset(gPhysLayer, 0, sizeof(gPhysLayer)); memset(gStateLayer, 0, sizeof(gStateLayer)); srand(0); int seed = rand(); const cudaChannelFormatDesc desc4 = cudaCreateChannelDesc<float4>(); cudaMallocArray(&gCudaVectArray, &desc4, sim_width, sim_height); #if NFLAYERS ==2 const cudaChannelFormatDesc desc2 = cudaCreateChannelDesc<float2>(); #else if NFLAYERS ==4 const cudaChannelFormatDesc descF = desc4; #endif cudaMallocArray(&gCudaFlArray, &descF, sim_width, sim_height); const cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>(); cudaMallocArray(&gCudaFuncWavePack, &desc, sim_width); cudaMallocArray(&gCudaFuncSmooth, &desc, sim_width); cudaMallocArray(&(gCudaLayer[0]), &desc, sim_width, sim_height); cudaMallocArray(&(gCudaLayer[1]), &desc, sim_width, sim_height); cudaMallocArray(&(gCudaFuncLayer[0]), &desc, sim_width, sim_height); cudaMalloc(&cuTempData, TEMP_SIZE*sizeof(float)*sim_width*sim_height); cudaMalloc(&cuRandArr, sizeof(unsigned int)*sim_width*sim_height); cudaMalloc(&gStateLayer[0], sim_rect*sizeof(float)); cudaMemset(gStateLayer[0], 0, sim_rect*sizeof(float)); cudaMalloc(&gStateLayer[1], sim_rect*sizeof(float)); cudaMemset(gStateLayer[1], 0, sim_rect*sizeof(float)); cudaMalloc(&gPhysLayer[0], sim_rect*sizeof(float)); cudaMemset(gPhysLayer[0], 0, sim_rect*sizeof(float)); cudaMalloc(&gPhysLayer[1], sim_rect*sizeof(float)); cudaMemset(gPhysLayer[1], 0, sim_rect*sizeof(float)); cudaMalloc(&gRedBlueField, NFLAYERS*sim_rect*sizeof(float)); cudaMemset(gRedBlueField, 0, NFLAYERS*sim_rect*sizeof(float)); size_t pitch = 4*sim_width*sizeof(float); cudaMallocPitch((void**)&gVectorLayer, &pitch, 4*sim_width*sizeof(float), sim_height); cudaMemset2D(gVectorLayer, 4*sim_width*sizeof(float), 0, 4*sim_width*sizeof(float), sim_height); InitWavePack(32, 1.f, sim_width, sim_height, cuTempData, gCudaFuncWavePack); InitSmooth(1, sim_width, cuTempData, gCudaFuncSmooth); InitRnd2DInt(seed, cuRandArr, sim_width, sim_height); InitFuncLayer(gCudaFuncLayer[0], cuTempData, sim_width, sim_height); InitPhysLayer(gPhysLayer[0], gStateLayer[0], cuRandArr, sim_width, sim_height); float* gridIni = cuTempData+3*sim_rect/2; float* halfTemp = cuTempData + sim_rect; float* out = cuTempData + 2*sim_rect; cudaMemset(out, 0, sim_rect*sizeof(float)); seed = rand(); int gridx = INTERP_SIZEX; int gridy = INTERP_SIZEX; InitRnd2DF(seed, gridIni, gridx, gridy); float scaleadd = .7f; Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height); seed = rand(); gridx = (int)(gridx*2); gridy = (int)(gridy*2); InitRnd2DF(seed, gridIni, gridx, gridy); scaleadd = .3f; Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height); cudaMemcpyToArray(gCudaLayer[0], 0, 0, out, sizeof(float)*sim_rect, cudaMemcpyDeviceToDevice); cudaMemset(out, 0, sim_rect*sizeof(float)); gridx = INTERP_SIZEX; gridy = INTERP_SIZEX; seed = rand(); InitRnd2DF(seed, gridIni, gridx, gridy); scaleadd = .7f; Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height); seed = rand(); gridx = (int)(gridx*1.5); gridy = (int)(gridy*1.5); InitRnd2DF(seed, gridIni, gridx, gridy); scaleadd = .3f; Spline2D(gridIni, gridx, gridy, halfTemp, scaleadd, out, sim_width, sim_height); cudaMemcpyToArray(gCudaLayer[1], 0, 0, out, sizeof(float)*sim_rect, cudaMemcpyDeviceToDevice); float2 pos0; pos0.x = gObj0X; pos0.y = gObj0Y; float2 pos1; pos1.x = gObj1X; pos1.y = gObj1Y; gObjInertia.Init(pos0, pos1); LayerProc(sim_width, sim_height, gCudaLayer[0], gCudaFuncLayer[0], cuTempData, pos0.x , pos0.y, pos1.x , pos1.y); ParticleStateInit(cuTempData, cuRandArr, gStateLayer[0], gPhysLayer[0], gRedBlueField); InitBhv(); }