Exemple #1
0
void Application::_init()
{
    // Pick the best CUDA device
    const int deviceIdx = cutGetMaxGflopsDeviceId();
    CudaSafeCall( cudaSetDevice( deviceIdx ) );

    // CUDA configuration
    CudaSafeCall( cudaDeviceSetCacheConfig( cudaFuncCachePreferShared ) );

    return;
}
Exemple #2
0
void initializeCUDARuntime(int device) {
  cudaSetDevice(device);
  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);

  // dummy memcpy to init cuda runtime
  util::Device1D<float> d_dummy(1);
  std::vector<float> h_dummy(1);
  d_dummy.copyFrom(h_dummy);

  if (cudaGetLastError() != cudaSuccess)
    throw std::runtime_error(
        std::string("initializeCUDARuntime: CUDA initialization problem\n"));
}
Exemple #3
0
cudaError_t GridGpu::prepareGPU(QVector<TrajPoint> &trajPoints)
{
    createTrajBlocks(trajPoints);
    copyKernelData();
    copyTrajBlocks();
    mallocGpu();
    cudaDeviceSetCacheConfig(cudaFuncCachePreferShared);

    m_sharedSize = powf(ceilf((float)m_gridSize / m_gpuGridSize), 2) * sizeof(complexGpu);
    qWarning() << "Shared mem size:" << m_sharedSize;

    cudaError_t status = cudaGetLastError();
    if (status != cudaSuccess)
        qWarning() << cudaGetErrorString(status);
}
Exemple #4
0
// selects GPU to use and returns gpu ID or -1 if using CPU
int init_cuda() 
{ 
    // Select the proper device
    const char* devstr = getenv("CUDA_DEVICE");
    const int env_dev = (devstr != NULL) ? atoi(devstr) : 0;
    int dev = env_dev;
    int devcnt; ebf::cudaErrCheck( cudaGetDeviceCount(&devcnt) );
    if( dev >= 0 && dev < devcnt )
       { 
       ebf::cudaErrCheck( cudaSetDevice(dev) ); 
       cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
       }
    else
       {
        dev = -1;
       	std::cerr << "# Cannot select the CUDA device. Using CPU!" << std::endl;
	}
    return dev;
}
Exemple #5
0
/**
 * The intent is to tell CUDA driver to use more cache. But it does
 * not improve performance all the time. 
 * 
 */
void set_more_cache(){
	cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
}
int main(int argc, char ** argv)
{
	std::vector<std::string> parameters;
	parameters.push_back(std::string(argv[1]));
	parameters.push_back(std::string(argv[2]));

	device = eqMivt::getBestDevice();

	cudaFuncCache cacheConfig = cudaFuncCachePreferL1;
	if (cudaSuccess != cudaSetDevice(device) || cudaSuccess != cudaDeviceSetCacheConfig(cacheConfig))
	{
		std::cerr<<"Error setting up best device"<<std::endl;
		return 0;
	}

	std::string colorF = "";

	if (argc == 5)
	{
		try
		{
			std::string n(argv[4]);
			mO = boost::lexical_cast<double>(n);
		}
		catch(...)
		{
			colorF = argv[4];
		}
	}
	else if (argc == 6)
	{
		try
		{
			std::string n(argv[5]);
			mO = boost::lexical_cast<double>(n);
		}
		catch(...)
		{
			colorF = argv[4];
		}
	}

	if (!rM.init(parameters, argv[3], colorF, mO))
	{
		std::cerr<<"Error init resources manager"<<std::endl;
		return 0;
	}

	if (!rM.start())
	{
		std::cerr<<"Error start resources manager"<<std::endl;
		return 0;
	}
	
	std::cout<<"============ Creating pictures ============"<<std::endl;

	if (test2())
	{
		std::cout<<"Test ok"<<std::endl;
	}
	else
	{
		std::cout<<"Test Fail"<<std::endl;
	}

	rM.destroy();

	std::cout<<"End test"<<std::endl;
}
Exemple #7
0
extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata, uint32_t *ptarget,
    uint32_t max_nonce, uint32_t *hashes_done)
{
	static THREAD uint32_t *foundNounce = nullptr;

    uint32_t start_nonce = pdata[19];
	unsigned int intensity = (device_sm[device_map[thr_id]] > 500) ? 24 : 23;
	uint32_t throughputmax = device_intensity(device_map[thr_id], __func__, 1U << intensity);
	uint32_t throughput = min(throughputmax, max_nonce - start_nonce) & 0xfffffc00;

    if (opt_benchmark)
        ptarget[7] = 0x0000000f;

    // init
	static THREAD volatile bool init = false;
	if(!init)
    {
		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));

		groestlcoin_cpu_init(thr_id, throughputmax);
		CUDA_SAFE_CALL(cudaMallocHost(&foundNounce, 2 * 4));
		init = true;
    }

    // Endian Drehung ist notwendig
    uint32_t endiandata[32];
    for (int kk=0; kk < 32; kk++)
        be32enc(&endiandata[kk], pdata[kk]);

    // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
    groestlcoin_cpu_setBlock(thr_id, endiandata);

	do
	{
		// GPU
		const uint32_t Htarg = ptarget[7];

		groestlcoin_cpu_hash(thr_id, throughput, pdata[19], foundNounce, ptarget[7]);

		if(stop_mining) {mining_has_stopped[thr_id] = true; cudaStreamDestroy(gpustream[thr_id]); pthread_exit(nullptr);}
		if(foundNounce[0] < 0xffffffff)
		{
			uint32_t tmpHash[8];
			endiandata[19] = SWAP32(foundNounce[0]);
			groestlhash(tmpHash, endiandata);

			if(tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget))
			{
				int res = 1;
				if(opt_benchmark)
					applog(LOG_INFO, "GPU #%d Found nounce %08x", device_map[thr_id], foundNounce[0]);
				*hashes_done = pdata[19] - start_nonce + throughput;
				if(foundNounce[1] != 0xffffffff)
				{
					endiandata[19] = SWAP32(foundNounce[1]);
					groestlhash(tmpHash, endiandata);
					if(tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget))
					{
						pdata[21] = foundNounce[1];
						res++;
						if(opt_benchmark)
							applog(LOG_INFO, "GPU #%d Found second nounce %08x", device_map[thr_id], foundNounce[1]);
					}
					else
					{
						if(tmpHash[7] != Htarg)
						{
							applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNounce[1]);
						}
					}
				}
				pdata[19] = foundNounce[0];
				return res;
			}
			else
			{
				if(tmpHash[7] != Htarg)
				{
					applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNounce[0]);
				}
			}
		}

		pdata[19] += throughput;
		cudaError_t err = cudaGetLastError();
		if(err != cudaSuccess)
		{
			applog(LOG_ERR, "GPU #%d: %s", device_map[thr_id], cudaGetErrorString(err));
			exit(EXIT_FAILURE);
		}
	} while(!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput)));

    *hashes_done = pdata[19] - start_nonce;
    return 0;
}