void RemoteCUDARunner::AllocateResources(const int numb, const int numt) { DeallocateResources(); m_in=(remote_cuda_in *)malloc(sizeof(remote_cuda_in)); m_out=(remote_cuda_out *)malloc(numb*numt*sizeof(remote_cuda_out)); m_metahash=(unsigned char *)malloc(numb*numt*GetStepIterations()); cutilSafeCall(cudaMalloc((void **)&m_devin,sizeof(remote_cuda_in))); cutilSafeCall(cudaMalloc((void **)&m_devout,numb*numt*sizeof(remote_cuda_out))); cutilSafeCall(cudaMalloc((void **)&m_devmetahash,numb*numt*GetStepIterations())); std::cout << "Done allocating CUDA resources for (" << numb << "," << numt << ")" << std::endl; }
const unsigned long CUDARunner::RunStep() { unsigned int best=0; unsigned int bestg=~0; if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0) { AllocateResources(m_numb,m_numt); } cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice)); cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt); cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost)); for(int i=0; i<m_numb*m_numt; i++) { if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg) { best=m_out[i].m_bestnonce; bestg=m_out[i].m_bestg; } } return CryptoPP::ByteReverse(best); }
const unsigned long CUDARunner::RunStep() { //unsigned int best=0; //unsigned int bestg=~0; int offset=0; if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0) { AllocateResources(m_numb,m_numt); } m_out[0].m_bestnonce=0; cuMemcpyHtoD(m_devout,m_out,/*m_numb*m_numt*/sizeof(cuda_out)); cuMemcpyHtoD(m_devin,m_in,sizeof(cuda_in)); int loops=GetStepIterations(); int bits=GetStepBitShift()-1; void *ptr=(void *)(size_t)m_devin; ALIGN_UP(offset, __alignof(ptr)); cuParamSetv(m_function,offset,&ptr,sizeof(ptr)); offset+=sizeof(ptr); ptr=(void *)(size_t)m_devout; ALIGN_UP(offset, __alignof(ptr)); cuParamSetv(m_function,offset,&ptr,sizeof(ptr)); offset+=sizeof(ptr); ALIGN_UP(offset, __alignof(loops)); cuParamSeti(m_function,offset,loops); offset+=sizeof(loops); ALIGN_UP(offset, __alignof(bits)); cuParamSeti(m_function,offset,bits); offset+=sizeof(bits); cuParamSetSize(m_function,offset); cuFuncSetBlockShape(m_function,m_numt,1,1); cuLaunchGrid(m_function,m_numb,1); cuMemcpyDtoH(m_out,m_devout,/*m_numb*m_numt*/sizeof(cuda_out)); // very unlikely that we will find more than 1 hash with H=0 // so we'll just return the first one and not even worry about G for(int i=0; i<1/*m_numb*m_numt*/; i++) { if(m_out[i].m_bestnonce!=0)// && m_out[i].m_bestg<bestg) { return CryptoPP::ByteReverse(m_out[i].m_bestnonce); //best=m_out[i].m_bestnonce; //bestg=m_out[i].m_bestg; } } return 0; }
const unsigned long RemoteCUDARunner::RunStep() { if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0) { AllocateResources(m_numb,m_numt); } cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(remote_cuda_in),cudaMemcpyHostToDevice)); remote_cuda_process_helper(m_devin,m_devout,m_devmetahash,GetStepIterations(),GetStepBitShift(),m_numb,m_numt); cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(remote_cuda_out),cudaMemcpyDeviceToHost)); cutilSafeCall(cudaMemcpy(m_metahash,m_devmetahash,m_numb*m_numt*GetStepIterations(),cudaMemcpyDeviceToHost)); return 0; }