예제 #1
0
void RemoteCUDARunner::AllocateResources(const int numb, const int numt)
{
	DeallocateResources();

	m_in=(remote_cuda_in *)malloc(sizeof(remote_cuda_in));
	m_out=(remote_cuda_out *)malloc(numb*numt*sizeof(remote_cuda_out));
	m_metahash=(unsigned char *)malloc(numb*numt*GetStepIterations());

	cutilSafeCall(cudaMalloc((void **)&m_devin,sizeof(remote_cuda_in)));
	cutilSafeCall(cudaMalloc((void **)&m_devout,numb*numt*sizeof(remote_cuda_out)));
	cutilSafeCall(cudaMalloc((void **)&m_devmetahash,numb*numt*GetStepIterations()));

	std::cout << "Done allocating CUDA resources for (" << numb << "," << numt << ")" << std::endl;
}
예제 #2
0
const unsigned long CUDARunner::RunStep()
{
	unsigned int best=0;
	unsigned int bestg=~0;

	if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
	{
		AllocateResources(m_numb,m_numt);
	}

	cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice));

	cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt);

	cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost));

	for(int i=0; i<m_numb*m_numt; i++)
	{
		if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg)
		{
			best=m_out[i].m_bestnonce;
			bestg=m_out[i].m_bestg;
		}
	}

	return CryptoPP::ByteReverse(best);

}
예제 #3
0
const unsigned long CUDARunner::RunStep()
{
	//unsigned int best=0;
	//unsigned int bestg=~0;
	int offset=0;

	if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
	{
		AllocateResources(m_numb,m_numt);
	}
	m_out[0].m_bestnonce=0;
	cuMemcpyHtoD(m_devout,m_out,/*m_numb*m_numt*/sizeof(cuda_out));

	cuMemcpyHtoD(m_devin,m_in,sizeof(cuda_in));

	int loops=GetStepIterations();
	int bits=GetStepBitShift()-1;

	void *ptr=(void *)(size_t)m_devin;
	ALIGN_UP(offset, __alignof(ptr));
	cuParamSetv(m_function,offset,&ptr,sizeof(ptr));
	offset+=sizeof(ptr);

	ptr=(void *)(size_t)m_devout;
	ALIGN_UP(offset, __alignof(ptr));
	cuParamSetv(m_function,offset,&ptr,sizeof(ptr));
	offset+=sizeof(ptr);

	ALIGN_UP(offset, __alignof(loops));
	cuParamSeti(m_function,offset,loops);
	offset+=sizeof(loops);

	ALIGN_UP(offset, __alignof(bits));
	cuParamSeti(m_function,offset,bits);
	offset+=sizeof(bits);

	cuParamSetSize(m_function,offset);

	cuFuncSetBlockShape(m_function,m_numt,1,1);
	cuLaunchGrid(m_function,m_numb,1);

	cuMemcpyDtoH(m_out,m_devout,/*m_numb*m_numt*/sizeof(cuda_out));

	// very unlikely that we will find more than 1 hash with H=0
	// so we'll just return the first one and not even worry about G
	for(int i=0; i<1/*m_numb*m_numt*/; i++)
	{
		if(m_out[i].m_bestnonce!=0)// && m_out[i].m_bestg<bestg)
		{
			return CryptoPP::ByteReverse(m_out[i].m_bestnonce);
			//best=m_out[i].m_bestnonce;
			//bestg=m_out[i].m_bestg;
		}
	}

	return 0;

}
예제 #4
0
const unsigned long RemoteCUDARunner::RunStep()
{

	if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
	{
		AllocateResources(m_numb,m_numt);
	}

	cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(remote_cuda_in),cudaMemcpyHostToDevice));

	remote_cuda_process_helper(m_devin,m_devout,m_devmetahash,GetStepIterations(),GetStepBitShift(),m_numb,m_numt);

	cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(remote_cuda_out),cudaMemcpyDeviceToHost));
	cutilSafeCall(cudaMemcpy(m_metahash,m_devmetahash,m_numb*m_numt*GetStepIterations(),cudaMemcpyDeviceToHost));

	return 0;

}