Ejemplo n.º 1
0
void CUDAResourceManager::deallocUSG(GPUUsg *usg)
{
    CUDA_SAFE_CALL(cudaFree(usg->getElemList()));
    CUDA_SAFE_CALL(cudaFree(usg->getTypeList()));
    CUDA_SAFE_CALL(cudaFree(usg->getConnList()));
    CUDA_SAFE_CALL(cudaFree(usg->getVertices()));
}
Ejemplo n.º 2
0
void
LiGL2D::setVbo(int spaceVect)
{
	GLuint oldVbo = 0;
	GLuint newVbo = 0;
	if(vbo != 0){
		oldVbo = vbo;
		vbo = 0;
	}
	if(iw != 0 && ih !=0){
		GLint bsize;
		// create buffer object
		unsigned int size = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1)) * 6 *  sizeof(float2);
		glGenBuffers( 1, &newVbo);
		glBindBuffer( GL_ARRAY_BUFFER, newVbo);
		// initialize buffer object
		glBufferData( GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
		glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize); 
		glBindBuffer( GL_ARRAY_BUFFER, 0);
		// register buffer object with CUDA
		CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newVbo));
		sVbo = ((int)iw/(spaceVect+1))*((int)ih/(spaceVect+1))*6;
		vbo = newVbo;
		emit sendVbo(vbo);
	}
	if(oldVbo != 0){
		CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldVbo));
		glDeleteBuffers(1, &oldVbo);
	}
}
Ejemplo n.º 3
0
	void CpuSNN::printSimSummary(FILE *fp)
	{
		DBG(2, fpLog, AT, "printSimSummary()");
		float etime;
		if(currentMode == GPU_MODE)	 {
			stopGPUTiming();
			etime = gpuExecutionTime;
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "secD2fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "secD1fireCnt", sizeof(int), 0, cudaMemcpyDeviceToHost));
			spikeCountAll1sec = spikeCountD1 + spikeCountD2;
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD2, "spikeCountD2", sizeof(int), 0, cudaMemcpyDeviceToHost));
			CUDA_SAFE_CALL( cudaMemcpyFromSymbol( &spikeCountD1, "spikeCountD1", sizeof(int), 0, cudaMemcpyDeviceToHost));
			spikeCountAll      = spikeCountD1 + spikeCountD2;
		}
		else {
			stopCPUTiming();
			etime = cpuExecutionTime;
		}

		fprintf(fp, "\n*** Network configuration dumped in %s.dot file...\n\
				Use graphViz to see the network connectivity...\n\n", networkName.c_str());
		fprintf(fp, "*********** %s Simulation Summary **********\n", (currentMode == GPU_MODE)?("GPU"):"CPU");
		fprintf(fp, "Network Parameters: \n\tN = %d (numNExcReg:numNInhReg=%2.1f:%2.1f), numPostSynapses = %d, D = %d\n", numN, 100.0*numNExcReg/numN, 100.0*numNInhReg/numN, numPostSynapses, D);
		fprintf(fp, "Random Seed: %d\n", randSeed);
		fprintf(fp, "Timing: \n\tModel Simulation Time = %lld sec \n\tActual Execution Time = %4.2f sec\n",  (unsigned long long)simTimeSec, etime/1000.0);
		fprintf(fp, "Average Firing Rate \n\t2+ms delay = %3.3f Hz \n\t1ms delay = %3.3f Hz \n\tOverall = %3.3f Hz\n",
			spikeCountD2/(1.0*simTimeSec*numNExcReg), spikeCountD1/(1.0*simTimeSec*numNInhReg), spikeCountAll/(1.0*simTimeSec*numN));
		fprintf(fp, "Overall Firing Count: \n\t2+ms delay = %d \n\t1ms delay = %d \n\tTotal = %d\n",
			spikeCountD2, spikeCountD1, spikeCountAll );
		fprintf(fp, "**************************************\n\n");

		fflush(fp);
	}
Ejemplo n.º 4
0
bool MultivalueHashTable::Initialize(const unsigned   max_table_entries,
                                     const float      space_usage,
                                     const unsigned   num_hash_functions)
{                                    
    bool success = HashTable::Initialize(max_table_entries, space_usage,
                                             num_hash_functions);
    target_space_usage_ = space_usage;

    // + 2N 32-bit entries
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_offsets_, 
                               sizeof(unsigned) * max_table_entries ));
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_is_unique_,
                               sizeof(unsigned) * max_table_entries ));

    success &= (d_scratch_offsets_ != NULL);
    success &= (d_scratch_is_unique_ != NULL);

    // Allocate memory for the scan.
    // + Unknown memory usage
    CUDPPConfiguration config;
    config.op            = CUDPP_ADD;
    config.datatype      = CUDPP_UINT;
    config.algorithm     = CUDPP_SCAN;
    config.options       = CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE;
    CUDPPResult result   = cudppPlan(theCudpp, &scanplan_, config, 
                                     max_table_entries, 1, 0);
    if (CUDPP_SUCCESS != result) {
        fprintf(stderr, "Failed to create plan.");
        return false;
    }
    return success;
}
Ejemplo n.º 5
0
void
LiGL2D::setPbo(int image_width, int image_height)
{
	makeCurrent();
	iw = image_width;
	ih = image_height;
	GLuint oldPbo = 0;
	GLuint newPbo = 0;
	GLuint oldTex = 0;

	if(pbo != 0){
		oldPbo = pbo;
		pbo = 0;
		oldTex = tex;
	}
	if(iw != 0 && ih !=0){
		glGenBuffers(1, &newPbo);
		glBindBuffer(GL_ARRAY_BUFFER, newPbo);
		glBufferData(GL_ARRAY_BUFFER, image_height*image_width* 4*sizeof(GLubyte),NULL, GL_DYNAMIC_DRAW);
		glBindBuffer(GL_ARRAY_BUFFER, 0);
		CUDA_SAFE_CALL(cudaGLRegisterBufferObject(newPbo));
		createTexture(&tex, iw, ih);
		glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);	
		pbo = newPbo;
		emit sendPbo(pbo);
	}
	if(oldPbo != 0){
		CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(oldPbo));
		glDeleteBuffers(1, &oldPbo);
	}
	if(oldTex != 0){
		glDeleteTextures(1, &oldTex);
	}
}
Ejemplo n.º 6
0
void
FiringBuffer::sync(cudaStream_t stream)
{
	memcpyFromDeviceAsync(mh_buffer.get(), md_buffer.get(),
			m_mapper.partitionCount() * m_pitch, stream);
	CUDA_SAFE_CALL(cudaEventRecord(m_copyDone, stream));
	CUDA_SAFE_CALL(cudaEventSynchronize(m_copyDone));
	populateSparse(mh_buffer.get());
}
Ejemplo n.º 7
0
void runbench_warmup(double *cd, long size){
	const long reduced_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/32;
	const int BLOCK_SIZE = 256;
	const int TOTAL_REDUCED_BLOCKS = reduced_grid_size/BLOCK_SIZE;

	dim3 dimBlock(BLOCK_SIZE, 1, 1);
	dim3 dimReducedGrid(TOTAL_REDUCED_BLOCKS, 1, 1);

	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< short, BLOCK_SIZE, 0 >), dim3(dimReducedGrid), dim3(dimBlock ), 0, 0, (short)1, (short*)cd);
	CUDA_SAFE_CALL( hipGetLastError() );
	CUDA_SAFE_CALL( hipDeviceSynchronize() );
}
Ejemplo n.º 8
0
//---------------------------------------------
//GPU memory operations
//---------------------------------------------
char *D_MALLOC(size_t size)
{	
	char *buf = NULL;
	CUDA_SAFE_CALL(cudaMalloc((void**)&buf, size));
	CUDA_SAFE_CALL(cudaMemset(buf, 0, size));
#ifdef __DEBUG__
#	ifdef __ALLOC__
	BenLog("+d%d bytes\n", size);
#	endif //__ALLOC__
	d_dmemUsage += size;
#endif
	return buf;
}
Ejemplo n.º 9
0
void ParticleListCPUSorted::copy_from(const ParticleList* list_in)
{
	ispecies = list_in -> ispecies;
	// Free realkind arrays

	if(list_in -> device_type == 0){
		for(int i=0;i<ParticleList_nfloats;i++)
		{
			memcpy(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind));
		}

		// Allocate int arrays
		for(int i=0;i<ParticleList_nints;i++)
		{
			memcpy(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int));
		}

		// allocate short ints for cluster id's
		memcpy(cluster_id,list_in->cluster_id,nptcls*sizeof(int));

//		memcpy(num_subcycles,list_in->num_subcycles,nptcls*sizeof(int));
//
//		memcpy(num_piccard,list_in->num_piccard,nptcls*sizeof(double));
//		memcpy(num_piccard2,list_in->num_piccard2,nptcls*sizeof(double));
	}
	else if(list_in->device_type == 1)
	{

#ifndef NO_CUDA
		enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost;
		// Free realkind arrays
		for(int i=0;i<ParticleList_nfloats;i++)
		{
			CUDA_SAFE_CALL(cudaMemcpyAsync(*get_float(i),*(list_in->get_float(i)),nptcls*sizeof(realkind),kind));
		}

		// Allocate int arrays
		for(int i=0;i<ParticleList_nints;i++)
		{
			CUDA_SAFE_CALL(cudaMemcpyAsync(*get_int(i),*(list_in->get_int(i)),nptcls*sizeof(int),kind));
		}

		// allocate short ints for cluster id's
		CUDA_SAFE_CALL(cudaMemcpyAsync(cluster_id,(list_in->cluster_id),nptcls*sizeof(int),kind));

		CUDA_SAFE_CALL(cudaDeviceSynchronize());
#endif
	}
}
Ejemplo n.º 10
0
CudaGridMap::CudaGridMap(const Vec3i &numGridPoints, const Vec3i &numGridPointsPadded, const double *inputEnergies, cudaStream_t stream)
    : stream(stream), numGridPoints(numGridPoints), numGridPointsPadded(numGridPointsPadded)
{
    // Allocate the padded grid in global memory
    CUDA_SAFE_CALL(cudaMalloc((void**)&energiesDevice, sizeof(float) * numGridPointsPadded.Cube()));

    // Convert doubles to floats and save them in page-locked memory
    int numGridPointsPerMap = numGridPoints.Cube();
    CUDA_SAFE_CALL(cudaMallocHost((void**)&energiesHost, sizeof(float) * numGridPointsPerMap));
    std::transform(inputEnergies, inputEnergies + numGridPointsPerMap, energiesHost, typecast<float, double>);

    // Copy the initial energies from the original grid to the padded one in global memory
    // Elements in the area of padding will stay uninitialized
    copyGridMapPadded(energiesDevice, numGridPointsPadded, energiesHost, numGridPoints, cudaMemcpyHostToDevice);
}
Ejemplo n.º 11
0
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
  try {
    Kokkos::Impl::num_uvm_allocations -= 1;
    CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
  } catch(...) {}
}
Ejemplo n.º 12
0
::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
                     , void *   const alloc_ptr
                     , size_t   const alloc_size )
{
  // Only valid for 300 <= __CUDA_ARCH__
  // otherwise return zero.

  ::cudaTextureObject_t tex_obj ;

  struct cudaResourceDesc resDesc ;
  struct cudaTextureDesc  texDesc ;

  memset( & resDesc , 0 , sizeof(resDesc) );
  memset( & texDesc , 0 , sizeof(texDesc) );

  resDesc.resType                = cudaResourceTypeLinear ;
  resDesc.res.linear.desc        = ( sizeof_alias ==  4 ?  cudaCreateChannelDesc< int >() :
                                   ( sizeof_alias ==  8 ?  cudaCreateChannelDesc< ::int2 >() :
                                  /* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
  resDesc.res.linear.sizeInBytes = alloc_size ;
  resDesc.res.linear.devPtr      = alloc_ptr ;

  CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );

  return tex_obj ;
}
Ejemplo n.º 13
0
int main( int, char ** )
{
  do_main();

  CUDA_SAFE_CALL( cudaDeviceReset() );
  return 0;
}
Ejemplo n.º 14
0
void CudaGridMap::copyGridMapPadded(float *dst,       const Vec3i &numGridPointsDst,
                                    const float *src, const Vec3i &numGridPointsSrc,
                                    cudaMemcpyKind kind)
{
    Vec3i numGridPointsMin = Vec3i(Mathi::Min(numGridPointsDst.x, numGridPointsSrc.x),
                                   Mathi::Min(numGridPointsDst.y, numGridPointsSrc.y),
                                   Mathi::Min(numGridPointsDst.z, numGridPointsSrc.z));
    int numGridPointsDstXMulY = numGridPointsDst.x * numGridPointsDst.y;
    int numGridPointsSrcXMulY = numGridPointsSrc.x * numGridPointsSrc.y;

    for (int z = 0; z < numGridPointsMin.z; z++)
    {
        // Set the base of output indices from z
        int outputIndexZBaseDst = z * numGridPointsDstXMulY;
        int outputIndexZBaseSrc = z * numGridPointsSrcXMulY;

        for (int y = 0; y < numGridPointsMin.y; y++)
        {
            // Set the base of output indices from (z,y)
            int outputIndexZYBaseDst = outputIndexZBaseDst + y * numGridPointsDst.x;
            int outputIndexZYBaseSrc = outputIndexZBaseSrc + y * numGridPointsSrc.x;

            // Copy one row in axis X
            CUDA_SAFE_CALL(cudaMemcpyAsync(dst + outputIndexZYBaseDst, src + outputIndexZYBaseSrc, sizeof(float) * numGridPointsMin.x, kind, stream));
        }
    }
}
Ejemplo n.º 15
0
void NodeFieldData::allocate(PlasmaData* _pdata)
{
	pdata = _pdata;

	nx = pdata->nx;
	ny = pdata->ny;
	nz = pdata->nz;


	cpu_fields = new FieldDataCPU();
	cpu_fields -> allocate(pdata);


	if(pdata->node_info->nGPU > 0)
	{
		gpu_fields = (FieldDataGPU*)malloc(pdata->node_info->nGPU * sizeof(FieldDataGPU));
#pragma omp parallel for
		for(int i=0;i<pdata->node_info->nGPU;i++)
		{
			CUDA_SAFE_CALL(cudaSetDevice(pdata->thread_info[pdata->node_info->nspecies+i]->gpu_info->igpu));
			gpu_fields[i] = *(new FieldDataGPU());
			gpu_fields[i].allocate(pdata);
		}
	}

	if(pdata->node_info->nMIC > 0)
	{
		mic_fields = new FieldDataMIC();
		mic_fields -> allocate(pdata);
	}
	bcast_timer = new CPUTimer();
}
Ejemplo n.º 16
0
std::vector<int> host::QueryDevices() {
    int device_count = 0;
    CUDA_SAFE_CALL(cudaGetDeviceCount(&device_count));
    if (device_count < 1) {
        fprintf(stderr, "No suitable CUDA devices found!\n");
        exit(EXIT_FAILURE);
    }

    std::vector<int> device_ids;

    for (int i = 0; i < device_count; i++) {
        cudaDeviceProp device_prop;
        CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_prop, i));

        int compute_cap_major = device_prop.major;
        int compute_cap_minor = device_prop.minor;
        int core_count = ConvertSMVer2Cores(compute_cap_major, compute_cap_minor) * device_prop.multiProcessorCount;
        float clock_speed = device_prop.clockRate * 1e-6f;

        float mem_size = device_prop.totalGlobalMem / 1024.0f / 1024.0f;

        if (compute_cap_major >= 2) {
            device_ids.push_back(i);
            printf("\t[%d] %s (%d.%d, %d cores, %.2f GHz, %.2f MB)\n",
                i,
                device_prop.name,
                compute_cap_major,
                compute_cap_minor,
                core_count,
                clock_speed,
                mem_size);
        } else {
            printf("\t[%d] %s (%d.%d not usable)\n",
                i,
                device_prop.name,
                compute_cap_major,
                compute_cap_minor);
        }
    }

    if (device_ids.size() == 0) {
        fprintf(stderr, "No suitable CUDA devices found!\n");
        exit(EXIT_FAILURE);
    }

    return device_ids;
}
Ejemplo n.º 17
0
void Matrix::allocate_rows(int num_rows)
{
  deallocate_rows();
  set_num_rows(num_rows);
  if( num_rows == 0 )
    return;
  CUDA_SAFE_CALL( cudaMalloc((void**) &m_rows, (num_rows+1)*sizeof(int)) );
}
Ejemplo n.º 18
0
void Matrix::allocate_vals(int num_vals)
{
  deallocate_vals();
  set_num_vals(num_vals);
  if( num_vals == 0 )
    return;
  CUDA_SAFE_CALL( cudaMalloc((void**) &m_vals, 16*num_vals*sizeof(double)) );
}
Ejemplo n.º 19
0
void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
{
  void * ptr = NULL;

  CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );

  return ptr ;
}
Ejemplo n.º 20
0
void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
{
  void * ptr = NULL;

  CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );

  return ptr ;
}
Ejemplo n.º 21
0
void * CudaSpace::allocate( const size_t arg_alloc_size ) const
{
  void * ptr = NULL;

  CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );

  return ptr ;
}
Ejemplo n.º 22
0
void Matrix::allocate_cols(int num_cols)
{
  deallocate_cols();
  set_num_cols(num_cols);
  if( num_cols == 0 )
    return;
  CUDA_SAFE_CALL( cudaMalloc((void**) &m_cols, num_cols*sizeof(int)) );
}
Ejemplo n.º 23
0
CudaFloatTexture1D::CudaFloatTexture1D(int width, const double *data, CudaAction action, cudaStream_t stream, CudaInternalAPI *api)
{
    channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

    // Allocate the texture on the GPU...
    CUDA_SAFE_CALL(cudaMallocArray(&deviceArray, &channelDesc, width, 1));
    // ... and in page-locked system memory
    CUDA_SAFE_CALL(cudaMallocHost((void**)&hostMem, sizeof(float) * width));

    // Convert doubles to floats and save them to page-locked system memory
    std::transform(data, data + width, hostMem, typecast<float, double>);

    // Copy floats from the page-locked memory to the GPU
    CUDA_SAFE_CALL(cudaMemcpyToArrayAsync(deviceArray, 0, 0, hostMem, sizeof(float) * width, cudaMemcpyHostToDevice, stream));

    if (action == BindToKernel)
        api->setDistDepDielTexture(deviceArray, &channelDesc);
}
Ejemplo n.º 24
0
void InitCUDA(int device)
{
    ///////////////////////////
    // CUDA initialisation
    ///////////////////////////

    int deviceCount;
    CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));

    if (deviceCount == 0) std::cout << "There is no device supporting CUDA" << std::endl;

    CUDA_SAFE_CALL(cudaSetDevice(device));
    cudaDeviceProp deviceProp;
    CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, device));
    std::cout << "Device " << device << ": " << deviceProp.name << std::endl;

    // or
    // CUT_DEVICE_INIT(); // with --device=1 (num device chosen)
}
Ejemplo n.º 25
0
void CompactingHashTable::Release() {
    HashTable::Release();

    CUDA_SAFE_CALL(cudaFree(d_unique_keys_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_cuckoo_keys_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_counts_));
    CUDA_SAFE_CALL(cudaFree(d_scratch_unique_ids_));

    d_unique_keys_         = NULL;
    d_scratch_cuckoo_keys_ = NULL;
    d_scratch_counts_      = NULL;
    d_scratch_unique_ids_  = NULL;

    if (scanplan_) {
      cudppDestroyPlan(scanplan_);
    }
    scanplan_         = 0;
    unique_keys_size_ = 0;
}
Ejemplo n.º 26
0
    CTfactory( const VolumeGPU<T>& src,
               U& texRef,
               const cudaTextureFilterMode fm = cudaFilterModePoint,
               const cudaTextureAddressMode am = cudaAddressModeClamp,
               const int norm = false ) : dca_data(NULL) {

        // Check for valid input
        if( src.d_data.ptr == NULL ) {
            std::cerr << __FUNCTION__
                      << ": Source has no data"
                      << std::endl;
            abort();
        }

        // Allocate memory
        cudaChannelFormatDesc cd = cudaCreateChannelDesc<T>();
        cudaExtent tmpExtent = ExtentFromDims( src.dims );

        CUDA_SAFE_CALL( cudaMalloc3DArray( &(this->dca_data),
                                           &cd,
                                           tmpExtent ) );

        // Do the copy
        cudaMemcpy3DParms cp = {0};

        cp.srcPtr = src.d_data;
        cp.dstArray = this->dca_data;
        cp.extent = tmpExtent;
        cp.kind = cudaMemcpyDeviceToDevice;

        CUDA_SAFE_CALL( cudaMemcpy3D( &cp ) );


        // Bind the texture
        texRef.normalized = norm;
        texRef.addressMode[0] = am;
        texRef.addressMode[1] = am;
        texRef.addressMode[2] = am;
        texRef.filterMode = fm;

        CUDA_SAFE_CALL( cudaBindTextureToArray( texRef, this->dca_data ) );
    }
Ejemplo n.º 27
0
//------------------------------------------------
//free memory on device and set the pointer to NULL
//
//param	: buf
//------------------------------------------------
void D_FREE(void *buf, size_t size)
{
	CUDA_SAFE_CALL(cudaFree(buf));
	buf = NULL;
#ifdef __DEBUG__
#	ifdef __ALLOC__
	BenLog("-d%d bytes\n", size);
#	endif //__ALLOC__
	d_dmemUsage -= size;
#endif
}
Ejemplo n.º 28
0
bool CompactingHashTable::Initialize(const unsigned   max_table_entries,
                                     const float      space_usage,
                                     const unsigned   num_functions)
{                                    
    bool success = HashTable::Initialize(max_table_entries, space_usage, 
                                         num_functions);

    unsigned slots_to_allocate = table_size_ + kStashSize;
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_cuckoo_keys_, 
                               sizeof(unsigned) * slots_to_allocate ));
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_counts_,      
                               sizeof(unsigned) * slots_to_allocate ));
    CUDA_SAFE_CALL(cudaMalloc( (void**)&d_scratch_unique_ids_,  
                               sizeof(unsigned) * slots_to_allocate ));

    success &= d_scratch_cuckoo_keys_ != NULL;
    success &= d_scratch_counts_      != NULL;
    success &= d_scratch_unique_ids_  != NULL;

    return success;
}
Ejemplo n.º 29
0
unsigned int cSystem::getNumGPUs(void)
{
    int     nGPU;

#ifdef __GEM_USE_CUDA__
    CUDA_SAFE_CALL(cudaGetDeviceCount(&nGPU));
#else
    nGPU = 0;
#endif

    return (unsigned int) nGPU;
}
Ejemplo n.º 30
0
float finalizeEvents(hipEvent_t start, hipEvent_t stop){
	CUDA_SAFE_CALL( hipGetLastError() );
	CUDA_SAFE_CALL( hipEventRecord(stop, 0) );
	CUDA_SAFE_CALL( hipEventSynchronize(stop) );
	float kernel_time;
	CUDA_SAFE_CALL( hipEventElapsedTime(&kernel_time, start, stop) );
	CUDA_SAFE_CALL( hipEventDestroy(start) );
	CUDA_SAFE_CALL( hipEventDestroy(stop) );
	return kernel_time;
}