void GPUSurfFeatureDetector::freeCudaResource()
{
	if (mCudaIsAllocated)
	{
		//Unregistering Texture
		mCudaNMSTexture->unregister();
		//mCudaFeatureCudaTexture->unregister();
		//mCudaFeatureTexture->unregister();

		//Deleting Texture
		mCudaRoot->getTextureManager()->destroyTexture(mCudaNMSTexture);
		//mCudaRoot->getTextureManager()->destroyTexture(mCudaFeatureCudaTexture);
		//mCudaRoot->getTextureManager()->destroyTexture(mCudaFeatureTexture);

		//Desallocating buffer for 2-pass feature location extraction on GPU
		for (int i=0; i<mNbOctave; ++i)
		{
			cudaFree(mDeviceFeatureCounterPass1[i]);
			cudaFree(mDeviceFeatureCounterPass2[i]);
			cudppDestroyPlan(mDeviceScanPlan[i]);
		}

		//Desallocating buffer for Feature location on GPU + CPU
		cudaFree(mDeviceFeatureFound);
		delete[] mHostFeatureFound;
	}
}
void cleanup()
{
    if (g_bQAReadback) {
        cudaFree(d_pos);
        cudaFree(d_normal);
    } else {
        cutilCheckError( cutDeleteTimer( timer ));

        deleteVBO(&posVbo,    &cuda_posvbo_resource);
        deleteVBO(&normalVbo, &cuda_normalvbo_resource);
    }

    cudppDestroyPlan(scanplan);

    cutilSafeCall(cudaFree(d_edgeTable));
    cutilSafeCall(cudaFree(d_triTable));
    cutilSafeCall(cudaFree(d_numVertsTable));

    cutilSafeCall(cudaFree(d_voxelVerts));
    cutilSafeCall(cudaFree(d_voxelVertsScan));
    cutilSafeCall(cudaFree(d_voxelOccupied));
    cutilSafeCall(cudaFree(d_voxelOccupiedScan));
    cutilSafeCall(cudaFree(d_compVoxelArray));

    if (d_volume) cutilSafeCall(cudaFree(d_volume));

    if (g_CheckRender) {
        delete g_CheckRender; g_CheckRender = NULL;
    }
    if (g_FrameBufferObject) {
        delete g_FrameBufferObject; g_FrameBufferObject = NULL;
    }
}
Beispiel #3
0
CMarchingCubes::~CMarchingCubes(void)
{
	if (m_pdVolume)
		cutilSafeCall(cudaFree(m_pdVolume));
	cutilSafeCall(cudaFree(m_pdEdgeTable));
	cutilSafeCall(cudaFree(m_pdTriTable));
	cutilSafeCall(cudaFree(m_pdNumVertsTable));

	cutilSafeCall(cudaFree(m_pdVoxelVerts));
	cutilSafeCall(cudaFree(m_pdVoxelVertsScan));
	cutilSafeCall(cudaFree(m_pdVoxelOccupied));
	cutilSafeCall(cudaFree(m_pdVoxelOccupiedScan));
	cutilSafeCall(cudaFree(m_pdCompactedVoxelArray));

	cudppDestroyPlan(m_Scanplan);
	cudaThreadExit();
}
void VHParticlesRender::clearParticlesRender(){

	cudppDestroyPlan(m_sortHandle);

	delete simpleSpriteProg;
	delete shadowedSpriteProg;
	delete shadowMapSpriteProg;
	delete displayTexProg;
	delete blurProg;

	TextureManager::Inst()->UnloadTexture(id1);

	glDeleteTextures(2, lightTex);
    glDeleteTextures(1, &imageTex);

	delete imageFbo;
	delete lightFbo;

}
Beispiel #5
0
void CompactingHashTable::Release() {
    HashTable::Release();

    CUDA_SAFE_CALL(cudaFree(d_unique_keys_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_cuckoo_keys_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_counts_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_unique_ids_));

    d_unique_keys_         = NULL;
    d_scratch_cuckoo_keys_ = NULL;
    d_scratch_counts_      = NULL;
    d_scratch_unique_ids_  = NULL;

    if (scanplan_) {
      cudppDestroyPlan(scanplan_);
    }
    scanplan_         = 0;
    unique_keys_size_ = 0;
}
Beispiel #6
0
void MultivalueHashTable::Release() {
    HashTable::Release();

    if (scanplan_) {
      cudppDestroyPlan(scanplan_);
      scanplan_ = 0;
    }

    cudaFree(d_index_counts_);
    cudaFree(d_sorted_values_);
    cudaFree(d_scratch_offsets_);
    cudaFree(d_scratch_is_unique_);
    cudaFree(d_unique_keys_);

    d_index_counts_      = NULL;
    d_sorted_values_     = NULL;
    d_scratch_offsets_   = NULL;
    d_scratch_is_unique_ = NULL;
    d_unique_keys_       = NULL;
}
Beispiel #7
0
  void IntIntSorter::executeOnGPUAsync(void * const keys, void * const vals, const int numKeys, int & numUniqueKeys, int ** keyOffsets, int ** valOffsets, int ** numVals)
  {
    if (numKeys == 0)
    {
      numUniqueKeys = 0;
      *keyOffsets = *valOffsets = NULL;
      *numVals = NULL;
      return;
    }
    if (numKeys > 32 * 1048576)
    {
      executeOnCPUAsync(keys, vals, numKeys, numUniqueKeys, keyOffsets, valOffsets, numVals);
      return;
    }
    int commRank;
    MPI_Comm_rank(MPI_COMM_WORLD, &commRank);
    CUDPPConfiguration cudppConfig;
    CUDPPHandle planHandle;

    void * gpuInputKeys   = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuInputVals   = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuUniqueFlags = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuValOffsets  = cudacpp::Runtime::malloc(sizeof(int) * numKeys);

    cudacpp::Runtime::memcpyHtoD(gpuInputKeys, keys, sizeof(int) * numKeys);
    cudacpp::Runtime::memcpyHtoD(gpuInputVals, vals, sizeof(int) * numKeys);

    /*
      what we need to get out of here:

      1 - sorted keys and values
      2 - num unique keys
      3 - number of values for each key
      4 - value offsets
      5 - compacted keys

      to get:
          simply sort
          A = find unique values
          B = reverse exclusive scan of "A"
          C = if A[i] == 1
                C[B[0] - B[i]] = i
          D = [0] = C[0] + 1
              [N] = #keys - C[#keys - 1]
              [i] = C[i + 1] - C[i]
          E = forward exclusive scan D
          F = keys[E[i]]

      1 = result of sort (only copy the values)
      2 = B[0]
      3 = D
      4 = E
      5 = F
    */

    // 1
    cudppConfig.algorithm  = CUDPP_SORT_RADIX;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_UINT;
    cudppConfig.options    = CUDPP_OPTION_KEY_VALUE_PAIRS;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppSort(planHandle, gpuInputKeys, gpuInputVals, sizeof(int) * 8, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    // cudacpp::Runtime::memcpyDtoH(keys, gpuInputKeys, sizeof(int) * numKeys);
    cudacpp::Runtime::memcpyDtoH(vals, gpuInputVals, sizeof(int) * numKeys);

    // 2 - A = gpuUniqueFlags
    gpmrIntIntSorterMarkUnique(gpuInputKeys, gpuUniqueFlags, numKeys);

    // 2 - B = gpuValOffsets
    cudppConfig.algorithm  = CUDPP_SCAN;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_INT;
    cudppConfig.options    = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_BACKWARD;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppScan(planHandle, gpuValOffsets, gpuUniqueFlags, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    cudacpp::Runtime::memcpyDtoH(&numUniqueKeys, gpuValOffsets, sizeof(int));
    ++numUniqueKeys;

    // 2 - C = gpuInputVals and
    // 3 - D = gpuValOffsets
    cudacpp::Runtime::sync();
    gpmrIntIntSorterFindOffsets(gpuInputKeys, gpuUniqueFlags, gpuValOffsets, gpuInputVals, gpuValOffsets, numKeys, numUniqueKeys);
    *numVals = reinterpret_cast<int * >(cudacpp::Runtime::mallocHost(numUniqueKeys * sizeof(int)));
    cudacpp::Runtime::sync();
    cudacpp::Runtime::memcpyDtoH(*numVals, gpuValOffsets, sizeof(int) * numUniqueKeys);
    cudacpp::Runtime::sync();

    // 4 - E = gpuUniqueFlags
    cudppConfig.algorithm  = CUDPP_SCAN;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_INT;
    cudppConfig.options    = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppScan(planHandle, gpuUniqueFlags, gpuValOffsets, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    *valOffsets = reinterpret_cast<int * >(cudacpp::Runtime::mallocHost(numUniqueKeys * sizeof(int)));
    cudacpp::Runtime::memcpyDtoH(*valOffsets, gpuUniqueFlags, sizeof(int) * numUniqueKeys);

    // 4 - F = gpuInputVals
    gpmrIntIntSorterSetCompactedKeys(gpuInputKeys, gpuUniqueFlags, gpuInputVals, numUniqueKeys);
    cudacpp::Runtime::memcpyDtoH(keys, gpuInputVals, sizeof(int) * numUniqueKeys);

    cudacpp::Runtime::free(gpuInputKeys);
    cudacpp::Runtime::free(gpuInputVals);
    cudacpp::Runtime::free(gpuUniqueFlags);
    cudacpp::Runtime::free(gpuValOffsets);
  }
Beispiel #8
0
bool MultivalueHashTable::Build(const unsigned  n,
                                const unsigned *d_keys,
                                const unsigned *d_vals)
{
    CUDA_CHECK_ERROR("Failed before build.");

    unsigned *d_sorted_keys = NULL;
    CUDA_SAFE_CALL(cudaMalloc((void**)&d_sorted_keys, sizeof(unsigned) * n));
    CUDA_SAFE_CALL(cudaMemcpy(d_sorted_keys, d_keys, sizeof(unsigned) * n, 
                              cudaMemcpyDeviceToDevice));

    unsigned *d_sorted_vals = NULL;
    CUDA_SAFE_CALL(cudaMalloc((void**)&d_sorted_vals, sizeof(unsigned) * n));
    CUDA_SAFE_CALL(cudaMemcpy(d_sorted_vals, d_vals, sizeof(unsigned) * n, 
                              cudaMemcpyDeviceToDevice));
    CUDA_CHECK_ERROR("Failed to allocate.");

    CUDPPConfiguration sort_config;
    sort_config.algorithm = CUDPP_SORT_RADIX;                  
    sort_config.datatype = CUDPP_UINT;
    sort_config.options = CUDPP_OPTION_KEY_VALUE_PAIRS;

    CUDPPHandle sort_plan;
    CUDPPResult sort_result = cudppPlan(theCudpp, &sort_plan, sort_config, n,
                                        1, 0);
    cudppRadixSort(sort_plan, d_sorted_keys, (void*)d_sorted_vals, n);

    if (sort_result != CUDPP_SUCCESS)
    {
        printf("Error in plan creation in MultivalueHashTable::build\n");
        bool retval = false;
        cudppDestroyPlan(sort_plan);
        return retval;
    }
    CUDA_CHECK_ERROR("Failed to sort");

    // Find the first key-value pair for each key.
    CUDAWrapper::CallCheckIfUnique(d_sorted_keys, n, d_scratch_is_unique_);

    // Assign a unique index from 0 to k-1 for each of the keys.
    cudppScan(scanplan_, d_scratch_offsets_, d_scratch_is_unique_, n);
    CUDA_CHECK_ERROR("Failed to scan");

    // Check how many unique keys were found.
    unsigned num_unique_keys;
    CUDA_SAFE_CALL(cudaMemcpy(&num_unique_keys, d_scratch_offsets_ + n - 1,
                              sizeof(unsigned), cudaMemcpyDeviceToHost));
    CUDA_CHECK_ERROR("Failed to get # unique keys");

    // Keep a list of the unique keys, and store info on each key's data
    // (location in the values array, how many there are).
    unsigned *d_compacted_keys = NULL;
    uint2 *d_index_counts_tmp = NULL;
    CUDA_SAFE_CALL(cudaMalloc((void**) &d_compacted_keys,
                              sizeof(unsigned) * num_unique_keys));
    CUDA_SAFE_CALL(cudaMalloc((void**) &d_index_counts_tmp,
                              sizeof(uint2) * num_unique_keys));
    CUDAWrapper::CallCompactKeys(d_sorted_keys,
                                 d_scratch_is_unique_,
                                 d_scratch_offsets_,
                                 n,
                                 d_index_counts_tmp,
                                 d_compacted_keys);

    // Determine the counts.
    CUDAWrapper::CallCountValues(d_index_counts_tmp, n, num_unique_keys);

    // Reinitialize the cuckoo hash table using the information we discovered.
    HashTable::Initialize(num_unique_keys,
                          target_space_usage_,
                          num_hash_functions_);

    d_index_counts_  = d_index_counts_tmp;
    d_unique_keys_   = d_compacted_keys;
    d_sorted_values_ = d_sorted_vals;
    sorted_values_size_ = n;

    // Build the cuckoo hash table with each key assigned a unique index.
    // Re-uses the sorted key memory as an array of values from 0 to k-1.
    CUDAWrapper::CallPrepareIndices(num_unique_keys, d_sorted_keys);
    bool success = HashTable::Build(num_unique_keys, d_unique_keys_,
                                    d_sorted_keys);
    CUDA_SAFE_CALL(cudaFree(d_sorted_keys));
    return success;
}