TransferFunction::TransferFunction(vtkSmartPointer<vtkPiecewiseFunction> otf, vtkSmartPointer<vtkColorTransferFunction> ctf, QObject *parent) : QObject(parent) { opacityTF = otf; colorTF = ctf; this->otf = QSharedPointer<ctkTransferFunction>(new ctkVTKPiecewiseFunction(opacityTF)); this->ctf = QSharedPointer<ctkTransferFunction>(new ctkVTKColorTransferFunction(colorTF)); connect(this->otf.data(), SIGNAL(changed()), this, SLOT(onOpacityTFChanged())); connect(this->ctf.data(), SIGNAL(changed()), this, SLOT(onColorTFChanged())); compositeTex = 0; // initialize each table opacityTF->GetTable(0.0, 1.0, TABLE_SIZE, opacityTable); colorTF->GetTable(0.0, 1.0, TABLE_SIZE, colorTable); CompositeTable(); channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); CudaSafeCall(cudaMallocArray(&array, &channelDesc, TABLE_SIZE)); CudaSafeCall(cudaMemcpyToArray(array, 0, 0, compositeTable, sizeof(float) * TABLE_SIZE * 4, cudaMemcpyHostToDevice)); memset(&resourceDesc, 0, sizeof(resourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = array; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.normalizedCoords = true; texDesc.readMode = cudaReadModeElementType; CudaSafeCall(cudaCreateTextureObject(&compositeTex, &resourceDesc, &texDesc, NULL)); }
cudaChannelFormatDesc createChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) { if (Ctx->isCreated()) { return cudaCreateChannelDesc(x, y, z, w, f); } return cudaChannelFormatDesc(); // ? Replace with pointer ? }
inline cudaArray* MallocArray2D< float2 >( VolumeDescription volumeDescription ) { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc( 32, 32, 0, 0, cudaChannelFormatKindFloat ); cudaArray* cuArray; MOJO_CUDA_SAFE( cudaMallocArray( &cuArray, &channelDesc, volumeDescription.numVoxels.x, volumeDescription.numVoxels.y ) ); return cuArray; }
inline cudaArray* MallocArray3D< uchar4 >( VolumeDescription volumeDescription ) { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc( 8, 8, 8, 8, cudaChannelFormatKindUnsigned ); cudaExtent volumeExtent = make_cudaExtent( volumeDescription.numVoxels.x, volumeDescription.numVoxels.y, volumeDescription.numVoxels.z ); cudaArray* cuArray; MOJO_CUDA_SAFE( cudaMalloc3DArray( &cuArray, &channelDesc, volumeExtent ) ); return cuArray; }
void SingleParticle2dx::Methods::CUDAProjectionMethod::prepareForProjections(SingleParticle2dx::DataStructures::ParticleContainer& cont) { cudaSetDevice(getMyGPU()); cudaStreamCreate(&m_stream); cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); cudaExtent VS = make_cudaExtent(m_size, m_size, m_size); if( m_alloc_done == false ) { cudaMalloc3DArray(&m_cuArray, &channelDesc, VS); } SingleParticle2dx::real_array3d_type real_data( boost::extents[m_size][m_size][m_size] ); m_context->getRealSpaceData(real_data); unsigned int size = m_size*m_size*m_size*sizeof(float); if( m_alloc_done == false ) { res_data_h = (float*)malloc(m_size*m_size*sizeof(float)); cudaMalloc((void**)&res_data_d, m_size*m_size*sizeof(float)); m_alloc_done = true; } cudaMemcpy3DParms copyParams = {0}; copyParams.srcPtr = make_cudaPitchedPtr((void*)real_data.origin(), VS.width*sizeof(float), VS.width, VS.height); copyParams.dstArray = m_cuArray; copyParams.extent = VS; copyParams.kind = cudaMemcpyHostToDevice; // cudaMemcpy3D(©Params); cudaMemcpy3DAsync(©Params, m_stream); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = m_cuArray; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.addressMode[1] = cudaAddressModeClamp; texDesc.addressMode[2] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 0; if(m_alloc_done == true) { cudaDestroyTextureObject(m_texObj); } m_texObj = 0; cudaCreateTextureObject(&m_texObj, &resDesc, &texDesc, NULL); }
static void addImageToTextureFloatGray (vector<Mat > &imgs, cudaTextureObject_t texs[]) { for (size_t i=0; i<imgs.size(); i++) { int rows = imgs[i].rows; int cols = imgs[i].cols; // Create channel with floating point type cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc (32, 0, 0, 0, cudaChannelFormatKindFloat); // Allocate array with correct size and number of channels cudaArray *cuArray; checkCudaErrors(cudaMallocArray(&cuArray, &channelDesc, cols, rows)); checkCudaErrors (cudaMemcpy2DToArray (cuArray, 0, 0, imgs[i].ptr<float>(), imgs[i].step[0], cols*sizeof(float), rows, cudaMemcpyHostToDevice)); // Specify texture struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cuArray; // Specify texture object parameters struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 0; // Create texture object checkCudaErrors(cudaCreateTextureObject(&(texs[i]), &resDesc, &texDesc, NULL)); //texs[i] = texObj; } return; }
Texture::Texture(const float4 *data, int numElements) { cudaResourceDesc desc; memset(&desc, 0, sizeof(desc)); desc.resType = cudaResourceTypeLinear; desc.res.linear.devPtr = const_cast<float4 *>(data); desc.res.linear.desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); desc.res.linear.sizeInBytes = sizeof(float4)*numElements; cudaTextureDesc tdesc; memset(&tdesc, 0, sizeof(tdesc)); tdesc.addressMode[0] = cudaAddressModeBorder; tdesc.addressMode[1] = cudaAddressModeBorder; tdesc.addressMode[2] = cudaAddressModeBorder; CUDA_CHECK(cudaCreateTextureObject(&_texture, &desc, &tdesc, NULL)); }
CudaFloatTexture1D::CudaFloatTexture1D(int width, const double *data, CudaAction action, cudaStream_t stream, CudaInternalAPI *api) { channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); // Allocate the texture on the GPU... CUDA_SAFE_CALL(cudaMallocArray(&deviceArray, &channelDesc, width, 1)); // ... and in page-locked system memory CUDA_SAFE_CALL(cudaMallocHost((void**)&hostMem, sizeof(float) * width)); // Convert doubles to floats and save them to page-locked system memory std::transform(data, data + width, hostMem, typecast<float, double>); // Copy floats from the page-locked memory to the GPU CUDA_SAFE_CALL(cudaMemcpyToArrayAsync(deviceArray, 0, 0, hostMem, sizeof(float) * width, cudaMemcpyHostToDevice, stream)); if (action == BindToKernel) api->setDistDepDielTexture(deviceArray, &channelDesc); }
void costVol_chamo::updataCV(Mat refImg, Mat projMat, float weightPerImg){ cudaArray* cuArray; cudaTextureObject_t texObj; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); cudaSafeCall(cudaMallocArray(&cuArray, &channelDesc, width, height)); cudaMemcpyToArray(cuArray, 0, 0, refImg.data, width*height*sizeof(float), cudaMemcpyHostToDevice); cudaSafeCall(cudaGetLastError()); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cuArray; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.addressMode[1] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeNormalizedFloat; texDesc.normalizedCoords = 0; cudaSafeCall(cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL)); Mat finalTran = projMat*baseImgProjMat.inv(); cvInput input; input.baseImg = (float *)baseImg.data; input.cvData = (float*)cvData.data; input.nearZ = nearZ; input.farZ = farZ; input.height = height; input.width = width; input.lowInd = (float*)lowInd.data; input.lowValue = (float*)bottonVal.data; for (int i = 0; i < 12; i++){ input.transMat[i] = finalTran.at<float>(i); } input.refImg = texObj; input.zStep = (nearZ - farZ) / layers; input.stepCount = layers; updataCount++; input.weightPerImg = 1.0 / updataCount; updataCVCaller(input); }
ErrorCode GpuBinaryImageAlgorithm<InputPixelType, InputBandCount, OutputPixelType, OutputBandCount>::initializeDevice() { /* * Attempts to check the GPU and begin warm up * */ this->lastError = this->setGpuDevice(); if(this->lastError) return this->lastError; if (this->properties.getMajorCompute() < 1) { this->lastError = InitFailNoCUDA; return this->lastError; } /* * Verfies the properities of GPU * */ cudaStreamCreate(&this->stream); cudaError cuer = cudaGetLastError(); if(cuer != cudaSuccess){ this->lastError = InitFailcuStreamCreateErrorcudaErrorInvalidValue; return this->lastError; } //Set descriptor of input data before allocation // Sets is at single channel, 16-bit unsigned integer std::string inTypeIdentifier(typeid(this->tempForTypeTesting).name()); size_t bitDepth = 0; cudaChannelFormatDesc inputDescriptor; if(inTypeIdentifier == "a" || inTypeIdentifier == "s" || inTypeIdentifier == "i" || inTypeIdentifier == "l") { this->channelType = cudaChannelFormatKindSigned; } else if(inTypeIdentifier == "h" || inTypeIdentifier == "t" || inTypeIdentifier == "j" || inTypeIdentifier == "m") { this->channelType = cudaChannelFormatKindUnsigned; } else if(inTypeIdentifier == "f" || inTypeIdentifier == "d") { this->channelType = cudaChannelFormatKindFloat; } else { this->lastError = InitFailUnsupportedInputType; return this->lastError; } bitDepth = sizeof(this->tempForTypeTesting) * 8; inputDescriptor = cudaCreateChannelDesc(bitDepth, 0, 0, 0, this->channelType); cuer = cudaGetLastError(); if (cuer != cudaSuccess) { this->lastError = CudaError; std::cout << "CUDA ERR = " << cuer << std::endl; throw std::runtime_error("GPU WHS INIT FAILED TO CREATE CHANNEL"); } if(cuer != cudaSuccess){ this->lastError = CudaError; return this->lastError; } ////////////////////////////////////////////////////////// // ALLOCATE MEMORY FOR GPU INPUT AND OUTPUT DATA (TILE) // ///////////////////////////////////////////////////////// cuer = cudaGetLastError(); /*Gpu Input Data*/ cudaMallocArray( (cudaArray**)&this->gpuInputDataArray, &inputDescriptor, this->dataSize.width, this->dataSize.height ); this->gpuInput = this->gpuInputDataArray; cuer = cudaGetLastError(); if (cuer != cudaSuccess) { std::cout << "CUDA ERR = " << cuer << std::endl; throw std::runtime_error("GPU WHS INIT FAILED TO ALLOCATE MEMORY"); } this->usingTexture = true; //Gpu Output Data const size_t bytes = this->dataSize.width * this->dataSize.height * OutputBandCount * sizeof(OutputPixelType); this->outputDataSize = bytes; cudaMalloc((void**) &this->gpuOutputData, bytes); cuer = cudaGetLastError(); if (cuer != cudaSuccess) { throw new std::runtime_error("GPU WHS INIT FAILED TO ALLOCATE OUTPUT MEMORY"); } if (cuer == cudaErrorMemoryAllocation) { this->lastError = InitFailcuOutArrayMemErrorcudaErrorMemoryAllocation; return this->lastError; } ////////////////////////////////////////////////////////////////////////////////////// // CALL FUNCTION TO ALLOCATE ADDITIONAL GPU STORAGE - DOES NOTHING IF NOT OVERRIDEN // ///////////////////////////////////////////////////////////////////////////////////// /* Initialize the neighborhood coordinates */ /*uses two ints to store width and height coords by the windowRadius_*/ /* * Allocates the memory needed for the results coming back from the GPU * */ this->lastError = this->allocateAdditionalGpuMemory(); return this->lastError; }
ErrorCode GpuBinaryImageAlgorithm<InputPixelType, InputBandCount, OutputPixelType, OutputBandCount>::operator()(const cvt::cvTile<InputPixelType>& tile, const cvt::cvTile<InputPixelType> &tile2,const cvt::cvTile<OutputPixelType> ** outTile) { //TO-DO Error Check Template Params for Type/Bounds const cv::Size2i tileSize = tile.getSize(); if (tileSize != this->dataSize) { std::stringstream ss; ss << tileSize << " expected of " << this->dataSize << std::endl; throw std::runtime_error(ss.str()); } if (tileSize != tile2.getSize()) { throw std::runtime_error("Both the incoming tiles must have different sizes"); } /* * Copy data down for tile using the parents implementation */ this->lastError = this->copyTileToDevice(tile); if (this->lastError != cvt::Ok) { throw std::runtime_error("Failed to copy tile to device"); } std::string inTypeIdentifier(typeid(this->tempForTypeTesting).name()); size_t bitDepth = 0; cudaChannelFormatDesc inputDescriptor; if(inTypeIdentifier == "a" || inTypeIdentifier == "s" || inTypeIdentifier == "i" || inTypeIdentifier == "l") { this->channelType = cudaChannelFormatKindSigned; } else if(inTypeIdentifier == "h" || inTypeIdentifier == "t" || inTypeIdentifier == "j" || inTypeIdentifier == "m") { this->channelType = cudaChannelFormatKindUnsigned; } else if(inTypeIdentifier == "f" || inTypeIdentifier == "d") { this->channelType = cudaChannelFormatKindFloat; } else { this->lastError = InitFailUnsupportedInputType; return this->lastError; } bitDepth = sizeof(this->tempForTypeTesting) * 8; cudaError cuer; inputDescriptor = cudaCreateChannelDesc(bitDepth, 0, 0, 0, this->channelType); cuer = cudaGetLastError(); if (cuer != cudaSuccess) { this->lastError = CudaError; std::cout << "CUDA ERR = " << cuer << std::endl; throw std::runtime_error("GPU BINARY IMAGE RUN FAILED TO CREATE CHANNEL"); } cudaChannelFormatDesc input_descriptor = cudaCreateChannelDesc(bitDepth, 0, 0, 0, cudaChannelFormatKindUnsigned); cudaMallocArray((cudaArray**)&gpuInputDataArrayTwo_, &input_descriptor, this->dataSize.width, this->dataSize.height); const unsigned int offsetX = 0; const unsigned int offsetY = 0; const unsigned char* tile_data_ptr = &(tile2[0].data[0]); const unsigned int tileArea = tile.getSize().area(); cudaMemcpyToArrayAsync(gpuInputDataArrayTwo_, // the device | destination address offsetX , offsetY, // the destination offsets tile_data_ptr, // the host | source address sizeof(InputPixelType) * tileArea, // the size of the copy in bytes cudaMemcpyHostToDevice, // the type of the copy this->stream); // the device command stream cuer = cudaGetLastError(); if (cuer != cudaSuccess) { std::cout << "CUDA ERR = " << cuer << std::endl; throw std::runtime_error("GPU BINARY IMAGE RUN FAILED TO ALLOCATE MEMORY"); } // Invoke kernel with empirically chosen block size unsigned short bW = 16; unsigned short bH = 16; launchKernel(bW, bH); this->lastError = this->copyTileFromDevice(outTile); if(this->lastError != cvt::Ok) { std::runtime_error("Failed copy off tile from device"); } return Ok; }
struct cudaChannelFormatDesc WINAPI wine_cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f) { WINE_TRACE("\n"); return cudaCreateChannelDesc(x, y, z, w, f); }