C++ (Cpp) cudaInit примеры использования

Пример #1

0

Показать файл

Файл: particles.cpp Проект: AnkurAnandapu/ocelot-fork

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv) 
{
    numParticles = 1024;
    uint gridDim = 64;
    numIterations = 1;

    cutGetCmdLineArgumenti( argc, (const char**) argv, "n", (int *) &numParticles);
    cutGetCmdLineArgumenti( argc, (const char**) argv, "grid", (int *) &gridDim);
    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);

    bool benchmark = !cutCheckCmdLineFlag(argc, (const char**) argv, "noqatest") != 0;
    cutGetCmdLineArgumenti( argc, (const char**) argv, "i", &numIterations);
    
    cudaInit(argc, argv);

    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
    glutInitWindowSize(640, 480);
    glutCreateWindow("CUDA particles");

    initGL();
    init(numParticles, gridSize);
    initParams();
    initMenus();

    if (benchmark)
    {
        if (numIterations <= 0) 
            numIterations = 300;
        runBenchmark(numIterations);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        glutMainLoop();
    }

    if (psystem)
        delete psystem;

    cudaThreadExit();

    return 0;
}

Пример #2

0

Показать файл

Файл: bw.c Проект: CJacky/qCUDA

void bw(uint64_t size)
{
	uint8_t *H, *D;
	int i;

	printf("%"PRIu64" ", size);
	
	cudaInit();

	printf("0 "); // reg func

	H = (uint8_t*)malloc(sizeof(uint8_t)*size);
	
	time_begin();
	cudaMalloc((void**)&D, sizeof(uint8_t)*size);
	printf("%u ", time_end());

	for(i=0; i<size; i++)
	{
		H[i]=i%255;
	}

	time_begin();
	cudaMemcpy(D, H, size*sizeof(uint8_t), cudaMemcpyHostToDevice);
	printf("%u ", time_end());

	printf("0 "); // exec kernel

	for(i=0; i<size; i++)
	{
		H[i]=0;
	}

	time_begin();
	cudaMemcpy(H, D, size*sizeof(uint8_t), cudaMemcpyDeviceToHost);
	printf("%u ", time_end());


	for(i=0; i<size; i++)
	{
		if(H[i]!=i%255)
			printf("error %d\n", i);
	}

	free(H);
	
	time_begin();
	cudaFree(D);
	printf("%u ", time_end());
	
	cudaFini();
	printf("\n");
}

Пример #3

0

Показать файл

Файл: particles.cpp Проект: abb58/DEM-CUDA

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    // Write the xyz file
    char outfile[256];
    sprintf(outfile, "outfile.xyz");
    if ((fpout = fopen(outfile, "w")) == NULL) {
        printf("Cannot Open File\n");
        exit(1);
    }


    printf("%s Starting...\n\n", sSDKsample);

    numParticles = NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;

    gridSize.x = gridSize.y = gridSize.z = gridDim;

    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
    printf("particles: %d\n", numParticles);

    cudaInit(argc, argv);

    initParticleSystem(numParticles, gridSize);

    printf("%e\n", timestep);
    //psystem->dumpParticles(0, numParticles-1, 0);


    if (numIterations <= 0)
    {
        numIterations = (int)(2.0/timestep);
    }
    std::cout << "1. I am here \n";
    runBenchmark(numIterations, argv[0]);
    std::cout << "2. I am here \n";

    if (psystem) {
        delete psystem;
        cleanup();
    }

    cudaDeviceReset();
    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}

Пример #4

0

Показать файл

Файл: particles.cpp Проект: andemi02/orkid

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv) 
{
//    numParticles = 65536*2;
//    numParticles = 65536;
//    numParticles = 32768;
//    numParticles = 8192;
//    numParticles = 4096;
    numParticles = 2048;
//    numParticles = 1024;
//    numParticles = 256;
//    numParticles = 32;
//    numParticles = 2;
    uint gridDim = 64;
    numIterations = 0;

    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);

    cudaInit(argc, argv);

    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
    glutInitWindowSize(640, 480);
    glutCreateWindow("CUDA particles");

    initGL();
    init(numParticles, gridSize);
    initParams();
    initMenus();

    glutDisplayFunc(display);
    glutReshapeFunc(reshape);
    glutMouseFunc(mouse);
    glutMotionFunc(motion);
    glutKeyboardFunc(key);
    glutSpecialFunc(special);
    glutIdleFunc(idle);

    glutMainLoop();

    if (psystem)
        delete psystem;

    return 0;
}

Пример #5

0

Показать файл

Файл: main.cpp Проект: uDeviceX/uDeviceX

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    numParticles = NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **) argv, "n"))
        {
            numParticles = getCmdLineArgumentInt(argc, (const char **)argv, "n");
        }

        if (checkCmdLineFlag(argc, (const char **) argv, "grid"))
        {
            gridDim = getCmdLineArgumentInt(argc, (const char **) argv, "grid");
        }
    }

    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
    printf("particles: %d\n", numParticles);


    if (checkCmdLineFlag(argc, (const char **) argv, "i"))
    {
        numIterations = getCmdLineArgumentInt(argc, (const char **) argv, "i");
    }

    cudaInit(argc, argv);

    initParticleSystem(numParticles, gridSize);
    initParams();

    if (numIterations <= 0) numIterations = 300;
    runBenchmark(numIterations, argv[0]);

    if (psystem)
    {
        delete psystem;
    }

    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}

Пример #6

0

Показать файл

Файл: main.cpp Проект: macsyums92/screen-space-fluid-rendering

int main ( int argc, char **argv )
{
	#ifdef BUILD_CUDA
		// Initialize CUDA
		cudaInit( argc, argv );
	#endif

	// set up the window
	glutInit( &argc, &argv[0] ); 
	glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH );
	glutInitWindowPosition( 100, 100 );
	glutInitWindowSize( (int) window_width, (int) window_height );
	glutCreateWindow ( "Fluids v.1 (c) 2008, R. Hoetzlein (ZLib)" );

//	glutFullScreen ();
 
	// initialize parameters
	init();

	// wait for something to happen
	glutMainLoop();

  return 0;
}

Пример #7

0

Показать файл

Файл: particles.cpp Проект: ryankeedy/BIFF

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    printf("%s Starting...\n\n", sSDKsample);

    numParticles = NUM_PARTICLES;
    maxNumParticles = MAX_NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;
    printf("Surely I can get this far\n");

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **) argv, "n"))
        {
            numParticles = getCmdLineArgumentInt(argc, (const char **)argv, "n");
        }

        if (checkCmdLineFlag(argc, (const char **) argv, "grid"))
        {
            gridDim = getCmdLineArgumentInt(argc, (const char **) argv, "grid");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &g_refFile);
            fpsLimit = frameCheckNumber;
            numIterations = 1;
        }
    }

    //*******************************************************
    // RMK Hard code for cylindrical coords (y=theta=1)
    // DomainSize
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-nothot-big-refine/RVert.txt";
	char Zfile[] = "/home/rkeedy/CFD/BuoyantStrumJet85-big-refine-lighter/ZVert.txt";
	char Rfile[] = "/home/rkeedy/CFD/BuoyantStrumJet85-big-refine-lighter/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet62-big-refine-lighter/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet62-big-refine-lighter/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet85-big-refine/RVert.txt";
	//char Zfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet63-big-refine/ZVert.txt";
	//char Rfile[] = "/home/rkeedy/Dropbox/CFD/BuoyantStrumJet63-big-refine/RVert.txt";
	numVelNodes.x = filecount(Rfile); //-1;
	numVelNodes.z = filecount(Zfile); //-1;
	numVelNodes.y = 1;
	numCells.x = 80; //47; //24; //29;
	numCells.y = 1;
	numCells.z = 160; //188; //95; //88;
	numParticles = numCells.x*numCells.z*20; //avgnumparticles = 40
	srand( time( NULL ) );
	//numParticles = numCells.x*numCells.z*40;
	printf("vel grid: %d x %d x %d = %d cells\n", numVelNodes.x, numVelNodes.y, numVelNodes.z, numVelNodes.x*numVelNodes.y*numVelNodes.z);
	printf("    grid: %d x %d x %d = %d cells\n", numCells.x, numCells.y, numCells.z, numCells.x*numCells.y*numCells.z);
	//printf("vel grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);

    bool benchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark") != 0;

    if (checkCmdLineFlag(argc, (const char **) argv, "i"))
    {
        numIterations = getCmdLineArgumentInt(argc, (const char **) argv, "i");
    }
    if (g_refFile)
    {
        cudaInit(argc, argv);
    }
    else
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -file=<*.bin>\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        initGL(&argc, argv);
        cudaGLInit(argc, argv);
    }

    // Moved code snippet to CellSystem
    //initCellSystem(gridSize);
    // now moved to particlesystem

    printf("Begin initialization\n");

    //initParticleSystem(numParticles, gridSize, g_refFile==NULL);
    initParticleSystem(maxNumParticles, numParticles, numVelNodes, numCells, g_refFile==NULL);
    //printf("Finished with initParticleSystem, %d\n",g_refFile==NULL);
    //cin.ignore();
    initParams();

    printf("Finished with initialization\n");

    if (!g_refFile)
    {
        initMenus();
    }

    if (benchmark || g_refFile)
    {
        if (numIterations <= 0)
        {
            numIterations = 300;
        }

        runBenchmark(numIterations, argv[0]);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        atexit(cleanup);

        glutMainLoop();
    }

    if (psystem)
    {
        delete psystem;
    }

    cudaDeviceReset();
    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}

Пример #8

0

Показать файл

Файл: particles.cpp Проект: drolfe00/CUDAVerificationkernels

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");

    numParticles = NUM_PARTICLES;
    uint gridDim = GRID_SIZE;
    numIterations = 0;

    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **) argv, "n"))
        {
            numParticles = getCmdLineArgumentInt(argc, (const char **)argv, "n");
        }

        if (checkCmdLineFlag(argc, (const char **) argv, "grid"))
        {
            gridDim = getCmdLineArgumentInt(argc, (const char **) argv, "grid");
        }

        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &g_refFile);
            fpsLimit = frameCheckNumber;
            numIterations = 1;
        }
    }

    gridSize.x = gridSize.y = gridSize.z = gridDim;
    printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
    printf("particles: %d\n", numParticles);

    bool benchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark") != 0;

    if (checkCmdLineFlag(argc, (const char **) argv, "i"))
    {
        numIterations = getCmdLineArgumentInt(argc, (const char **) argv, "i");
    }

    if (g_refFile)
    {
        cudaInit(argc, argv);
    }
    else
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "device"))
        {
            printf("[%s]\n", argv[0]);
            printf("   Does not explicitly support -device=n in OpenGL mode\n");
            printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
            printf(" > %s -device=n -file=<*.bin>\n", argv[0]);
            printf("exiting...\n");
            exit(EXIT_SUCCESS);
        }

        initGL(&argc, argv);
        cudaGLInit(argc, argv);
    }

    initParticleSystem(numParticles, gridSize, g_refFile==NULL);
    initParams();

    if (!g_refFile)
    {
        initMenus();
    }

    if (benchmark || g_refFile)
    {
        if (numIterations <= 0)
        {
            numIterations = 300;
        }

        runBenchmark(numIterations, argv[0]);
    }
    else
    {
        glutDisplayFunc(display);
        glutReshapeFunc(reshape);
        glutMouseFunc(mouse);
        glutMotionFunc(motion);
        glutKeyboardFunc(key);
        glutSpecialFunc(special);
        glutIdleFunc(idle);

        glutCloseFunc(cleanup);

        glutMainLoop();
    }

    if (psystem)
    {
        delete psystem;
    }

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(g_TotalErrors > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}

Пример #9

0

Показать файл

Файл: bitecoin_miner.cpp Проект: o0rebelious0o/IC_HPCE_6b

    void MakeBid(
        std::shared_ptr<Packet_ServerBeginRound> roundInfo,   // Information about this particular round
        const std::shared_ptr<Packet_ServerRequestBid> request,     // The specific request we received
        double period,                                                                          // How long this bidding period will last
        double skewEstimate,                                                                // An estimate of the time difference between us and the server (positive -> we are ahead)
        std::vector<uint32_t> &solution,                                                // Our vector of indices describing the solution
        uint32_t *pProof                                                                        // Will contain the "proof", which is just the value
    )
    {
        double tSafetyMargin = 0.5; // accounts for uncertainty in network conditions
        /* This is when the server has said all bids must be produced by, plus the
            adjustment for clock skew, and the safety margin
        */
        double tFinish = request->timeStampReceiveBids * 1e-9 + skewEstimate - tSafetyMargin;

        Log(Log_Verbose, "MakeBid - start, total period=%lg.", period);

        /*
            We will use this to track the best solution we have created so far.
        */
        roundInfo->maxIndices = 4;
        std::vector<uint32_t> bestSolution(roundInfo->maxIndices);
        std::vector<uint32_t> gpuBestSolution(roundInfo->maxIndices);
        bigint_t bestProof, gpuBestProof;

        wide_ones(BIGINT_WORDS, bestProof.limbs);

        // Incorporate the existing block chain data - in a real system this is the
        // list of transactions we are signing. This is the FNV hash:
        // http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
        hash::fnv<64> hasher;
        uint64_t chainHash = hasher((const char *)&roundInfo->chainData[0], roundInfo->chainData.size());

        bigint_t x;
        wide_x_init(&x.limbs[0], uint32_t(0), roundInfo->roundId, roundInfo->roundSalt, chainHash);

        std::vector<uint32_t> indices(roundInfo->maxIndices);

        //Define TBB arrays
        uint32_t *parallel_Indices = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT);
        uint32_t *parallel_BestSolutions = (uint32_t *)malloc(sizeof(uint32_t) * TBB_PARALLEL_COUNT * roundInfo->maxIndices);
        uint32_t *parallel_Proofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT);
        uint32_t *parallel_BestProofs = (uint32_t *)malloc(sizeof(uint32_t) * 8 * TBB_PARALLEL_COUNT);

        //Define GPU arrays
        uint32_t *d_ParallelBestSolutions;

        checkCudaErrors(cudaMalloc((void **)&d_ParallelBestSolutions, sizeof(uint32_t) * CUDA_DIM * CUDA_DIM * roundInfo->maxIndices));

        checkCudaErrors(cudaMemcpy(d_hashConstant, &roundInfo->c[0], sizeof(uint32_t) * 4, cudaMemcpyHostToDevice));

        unsigned gpuTrials = 0;
        unsigned cpuTrials = 0;

        unsigned maxNum = uint32_t(0xFFFFFFFF);

        auto runGPU = [ = , &gpuTrials]
        {
            cudaInit(CUDA_DIM, d_ParallelBestProofs);

            do
            {
                cudaIteration(d_ParallelIndices, d_ParallelProofs, d_ParallelBestProofs, d_ParallelBestSolutions, x, d_hashConstant, roundInfo->hashSteps, CUDA_DIM, gpuTrials, CUDA_TRIALS, roundInfo->maxIndices);

                gpuTrials += CUDA_TRIALS;
            }
            while ((tFinish - now() * 1e-9) > 0);
        };

        std::thread runGPUThread(runGPU);

        auto tbbInitial = [ = ](unsigned i)
        {
            bigint_t ones;
            wide_ones(8, ones.limbs);
            wide_copy(8, &parallel_BestProofs[i * 8], ones.limbs);
        };

        tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbInitial);

        do
        {
            auto tbbIteration = [ = ](unsigned i)
            {
                uint32_t index = maxNum - (TBB_PARALLEL_COUNT<<2) - cpuTrials + (i<<1);

                bigint_t proof = tbbHash(roundInfo.get(),
                                         index,
                                         x);

                wide_copy(8, &parallel_Proofs[i * 8], proof.limbs);
                parallel_Indices[i] = index;
            };

            tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbIteration);

            auto tbbCrossHash = [ = ](unsigned i)
            {
                for (unsigned xorStride = 1; xorStride < TBB_PARALLEL_COUNT >> 2; xorStride++)
                {
                    if (i + (roundInfo->maxIndices * xorStride) < TBB_PARALLEL_COUNT)
                    {
                        bigint_t candidateBestProof;
                        wide_copy(8, candidateBestProof.limbs, &parallel_Proofs[i * 8]);

                        for (unsigned indexNum = 1; indexNum < roundInfo->maxIndices; indexNum++)
                        {
                            wide_xor(8, candidateBestProof.limbs, candidateBestProof.limbs, &parallel_Proofs[(i + (indexNum * xorStride)) * 8]);
                        }

                        if (wide_compare(8, candidateBestProof.limbs, &parallel_BestProofs[i * 8]) < 0)
                        {
                            wide_copy(8, &parallel_BestProofs[i * 8], candidateBestProof.limbs);
                            for (unsigned ID = 0; ID < roundInfo->maxIndices; ID++)
                            {
                                parallel_BestSolutions[(i * roundInfo->maxIndices) + ID] = parallel_Indices[i + (ID * xorStride)];
                            }
                        }
                    }
                }
            };

            tbb::parallel_for<unsigned>(0, TBB_PARALLEL_COUNT, tbbCrossHash);

            cpuTrials += TBB_PARALLEL_COUNT;
        }
        while ((tFinish - now() * 1e-9) > 0);

        runGPUThread.join();

        auto reduceGPU = [ = , &gpuBestSolution, &gpuBestProof]
        {
            cudaParallelReduce(CUDA_DIM, roundInfo->maxIndices, d_ParallelBestProofs, d_ParallelBestSolutions, &gpuBestSolution[0], gpuBestProof.limbs);
        };

        std::thread reduceThread(reduceGPU);

        //TBB
        for (int toDo = TBB_PARALLEL_COUNT / 2; toDo >= 1; toDo >>= 1)
        {
            auto tbbReduce = [ = ](unsigned i)
            {
                if (wide_compare(BIGINT_WORDS, &parallel_BestProofs[(i + toDo) * 8], &parallel_BestProofs[i * 8]) < 0)
                {
                    wide_copy(8, &parallel_BestProofs[i * 8], &parallel_BestProofs[(i + toDo) * 8]);
                    wide_copy(roundInfo->maxIndices, &parallel_BestSolutions[i * roundInfo->maxIndices], &parallel_BestSolutions[(i + toDo) * roundInfo->maxIndices]);
                }
            };

            tbb::parallel_for<unsigned>(0, toDo, tbbReduce);
        }

        wide_copy(BIGINT_WORDS, bestProof.limbs, &parallel_BestProofs[0]);
        wide_copy(roundInfo->maxIndices, &bestSolution[0], &parallel_BestSolutions[0]);

        reduceThread.join();

        if (wide_compare(BIGINT_WORDS, gpuBestProof.limbs, bestProof.limbs) < 0)
        {
            Log(Log_Verbose, "Accepting GPU Solution");
            wide_copy(8, bestProof.limbs, gpuBestProof.limbs);
            wide_copy(roundInfo->maxIndices, &bestSolution[0], &gpuBestSolution[0]);
        }

        solution = bestSolution;
        wide_copy(BIGINT_WORDS, pProof, bestProof.limbs);

        free(parallel_Indices);
        free(parallel_BestSolutions);
        free(parallel_Proofs);
        free(parallel_BestProofs);

        checkCudaErrors(cudaFree(d_ParallelBestSolutions));

        Log(Log_Verbose, "MakeBid - finish. Total trials %d, cpu: %d, gpu %d", cpuTrials + gpuTrials, cpuTrials, gpuTrials);
    }