Beispiel #1
0
BpdeMainWindow::BpdeMainWindow(RInside& R, const QString& sourceFile, QObject *parent)
    : R(R),
    sourceFile(sourceFile), x(), y(), H(), solver(NULL)
{
    tempfile = QString::fromStdString(Rcpp::as<std::string>(R.parseEval("tfile <- tempfile()")));
    svgfile = QString::fromStdString(Rcpp::as<std::string>(R.parseEval("sfile <- tempfile()")));

    QWidget *window = new QWidget;
    window->setWindowTitle("BpdeGUI");
    setCentralWidget(window);

    QGroupBox *runParameters = new QGroupBox("Параметры запуска");
    openMPEnabled = new QRadioButton("&OpenMP");
    openClEnabled = new QRadioButton("&OpenCL");

    openMPEnabled->setChecked(true);

    connect(openMPEnabled, SIGNAL(clicked()), this, SLOT(loadSource()));
    connect(openClEnabled, SIGNAL(clicked()), this, SLOT(loadSource()));

    QVBoxLayout *vbox = new QVBoxLayout;
    vbox->addWidget(openMPEnabled);
    vbox->addWidget(openClEnabled);


    QLabel *threadsLabel = new QLabel("Количество потоков");
    threadsLineEdit = new QLineEdit("4");
    QHBoxLayout *threadNumber = new QHBoxLayout;
    threadNumber->addWidget(threadsLabel);
    threadNumber->addWidget(threadsLineEdit);

    QHBoxLayout *deviceLayout = new QHBoxLayout;
    QLabel *deviceLabel = new QLabel("Устройство");
    deviceComboBox = new QComboBox();

    scanDevices();
    for (std::vector<cl::Device>::iterator it = devices.begin(); it != devices.end(); it++)
        deviceComboBox->addItem((*it).getInfo<CL_DEVICE_NAME>().c_str());

    connect(deviceComboBox, SIGNAL(currentIndexChanged(int)), this, SLOT(loadSource()));

    deviceLayout->addWidget(deviceLabel);
    deviceLayout->addWidget(deviceComboBox);

    QHBoxLayout* runLayout = new QHBoxLayout;
    runButton = new QPushButton("Начать вычисления", this);
    qDebug() << "Connect : " <<
    connect(runButton, SIGNAL(clicked()), this, SLOT(solve()));
    runLayout->addWidget(runButton);

    QVBoxLayout* ulLayout = new QVBoxLayout;
    ulLayout->addLayout(vbox);
    ulLayout->addLayout(threadNumber);
    ulLayout->addLayout(deviceLayout);
    ulLayout->addLayout(runLayout);

    runParameters->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed);
    runParameters->setLayout(ulLayout);

    QButtonGroup *kernelGroup = new QButtonGroup;
    kernelGroup->addButton(openMPEnabled, 0);
    kernelGroup->addButton(openClEnabled, 1);

    QGroupBox *solveParamBox = new QGroupBox("Настройки вычислительного метода");

    QHBoxLayout *iterationsLayout = new QHBoxLayout;
    QHBoxLayout *stepLayout = new QHBoxLayout;
    QLabel *sourceFileLabel = new QLabel("SourceFile");
    sourceFileEdit = new QLineEdit(sourceFile);
    iterationsEdit = new QLineEdit("10000");
    stepEdit = new QLineEdit("3600");
    QLabel *iterationsLabel = new QLabel("Итерации");
    QLabel *stepLabel = new QLabel("Шаг            ");
    QHBoxLayout *exportLayout = new QHBoxLayout;
    exportImage = new QPushButton("Экспорт изотерм");
    export3D = new QPushButton("Экспорт 3D модели");
    connect(exportImage, SIGNAL(clicked()), this, SLOT(exportIsoterms()));
    connect(export3D, SIGNAL(clicked()), this, SLOT(export3DModel()));

    iterationsLayout->addWidget(iterationsLabel);
    iterationsLayout->addWidget(iterationsEdit);
    stepLayout->addWidget(stepLabel);
    stepLayout->addWidget(stepEdit);

    exportLayout->addWidget(exportImage);
    exportLayout->addWidget(export3D);

    svg = new QSvgWidget();
    loadSource();

    QVBoxLayout *solveParamLayout = new QVBoxLayout;
    solveParamLayout->addWidget(sourceFileLabel);
    solveParamLayout->addWidget(sourceFileEdit);
    solveParamLayout->addLayout(iterationsLayout);
    solveParamLayout->addLayout(stepLayout);
    solveParamLayout->addLayout(exportLayout);

    solveParamBox->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed);
    solveParamBox->setLayout(solveParamLayout);

    QHBoxLayout *upperlayout = new QHBoxLayout;
    upperlayout->addWidget(runParameters);
    upperlayout->addWidget(solveParamBox);

    QHBoxLayout *lowerlayout = new QHBoxLayout;
    lowerlayout->addWidget(svg);

    QVBoxLayout *outer = new QVBoxLayout;
    outer->addLayout(upperlayout);
    outer->addLayout(lowerlayout);
    window->setLayout(outer);
}
Beispiel #2
0
void SplatRenderer::init(QGLWidget *qglw)
{
  mIsSupported = true;
  if(qglw)
    qglw->makeCurrent();
  glewInit();

  const char* rs = (const char*)glGetString(GL_RENDERER);
  QString rendererString("");
  if(rs)
    rendererString = QString(rs);
  mWorkaroundATI = rendererString.startsWith("ATI") || rendererString.startsWith("AMD");
  // FIXME: maybe some recent HW correctly supports floating point blending...
  mBuggedAtiBlending = rendererString.startsWith("ATI") || rendererString.startsWith("AMD");

  if (mWorkaroundATI && mDummyTexId==0)
  {
    glActiveTexture(GL_TEXTURE0);
    glGenTextures(1,&mDummyTexId);
    glBindTexture(GL_TEXTURE_2D, mDummyTexId);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, 4, 4, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0);
  }

  // let's check the GPU capabilities
  mSupportedMask = DEPTH_CORRECTION_BIT | BACKFACE_SHADING_BIT;
  if (!QGLFramebufferObject::hasOpenGLFramebufferObjects ())
  {
    std::cout << "SplatRenderer: error OpenGL frame buffer objects are not supported. (please, try to update your drivers)\n";
    mIsSupported = false;
    return;
  }
  if (GLEW_ARB_texture_float)
    mSupportedMask |= FLOAT_BUFFER_BIT;
  else
    std::cout << "SplatRenderer: warning floating point textures are not supported.\n";

  if (GLEW_ARB_draw_buffers && (!mBuggedAtiBlending))
    mSupportedMask |= DEFERRED_SHADING_BIT;
  else
    std::cout << "SplatRenderer: warning deferred shading is not supported.\n";

  if (GLEW_ARB_shadow)
    mSupportedMask |= OUTPUT_DEPTH_BIT;
  else
    std::cerr << "SplatRenderer: warning copy of the depth buffer is not supported.\n";

  mFlags = mFlags & mSupportedMask;

  // load shader source
  mShaderSrcs[0] = loadSource("VisibilityVP","Raycasting.glsl");
  mShaderSrcs[1] = loadSource("VisibilityFP","Raycasting.glsl");
  mShaderSrcs[2] = loadSource("AttributeVP","Raycasting.glsl");
  mShaderSrcs[3] = loadSource("AttributeFP","Raycasting.glsl");
  mShaderSrcs[4] = "";
  mShaderSrcs[5] = loadSource("Finalization","Finalization.glsl");

  mCurrentPass = 2;
  mBindedPass = -1;
  mIsInitialized = true;
  GL_TEST_ERR
}
Beispiel #3
0
int main(void)
{
    GLFWwindow* window;

    /* Initialize the library */
    if (!glfwInit())
        return -1;

    /* Create a windowed mode window and its OpenGL context */
    window = glfwCreateWindow(800, 450, "Mah Base Project", NULL, NULL);
    if (!window)
    {
        glfwTerminate();
        return -1;
    }

    /* Make the window's context current */
    glfwMakeContextCurrent(window);

    glewExperimental = GL_TRUE;
    glewInit();
    // Create Vertex Array Object
    GLuint vao;
    glGenVertexArrays(1, &vao);
    glBindVertexArray(vao);
    // Create a Vertex Buffer Object and copy the vertex data to it
    GLuint vbo;
    glGenBuffers(1, &vbo);
    GLfloat vertices[] = {
        0.0f, 0.5f,
        0.5f, -0.5f,
        -0.5f, -0.5f
    };
    glBindBuffer(GL_ARRAY_BUFFER, vbo);
    glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);

    const char* vertexSource = loadSource("VertexSource.glsl");

    const char* fragmentSource = loadSource("FragmentSource.glsl");

    GLint status = GL_TRUE;

    GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(vertexShader, 1, &vertexSource, NULL);
    glCompileShader(vertexShader);
    glGetShaderiv(vertexShader, GL_COMPILE_STATUS, &status);
    // cout<< "Vertex" << status << endl;

    // Create and compile the fragment shader
    GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
    glShaderSource(fragmentShader, 1, &fragmentSource, NULL);
    glCompileShader(fragmentShader);
    glGetShaderiv(fragmentShader, GL_COMPILE_STATUS, &status);
    // cout<< "Fragment" << status << endl;

    // Link the vertex and fragment shader into a shader program
    GLuint shaderProgram = glCreateProgram();
    glAttachShader(shaderProgram, vertexShader);
    glAttachShader(shaderProgram, fragmentShader);
    glBindFragDataLocation(shaderProgram, 0, "outColor");
    glLinkProgram(shaderProgram);
    glUseProgram(shaderProgram);

    // Specify the layout of the vertex data
    GLint posAttrib = glGetAttribLocation(shaderProgram, "position");
    glEnableVertexAttribArray(posAttrib);
    glVertexAttribPointer(posAttrib, 2, GL_FLOAT, GL_FALSE, 0, 0);


    /* Loop until the user closes the window */
    while (!glfwWindowShouldClose(window))
    {
        /* Render here */
        // Clear the screen to black
        glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
        glClear(GL_COLOR_BUFFER_BIT);
        // Draw a triangle from the 3 vertices
        glDrawArrays(GL_TRIANGLES, 0, 3);
        // Swap buffers

        /* Swap front and back buffers */
        glfwSwapBuffers(window);

        /* Poll for and process events */
        glfwPollEvents();
    }

    glfwTerminate();
    return 0;
}
Beispiel #4
0
/*************************************************************************
 * uLoad
 *   "Parse" a uCode source program (using flex to scan it).
 *************************************************************************/
int uLoad(uINSTRUCTION code[], int label[])
{
  uINSTRUCTION *currInstr;
  int codeLine = 0;

  int i, lab;

  do {
    currInstr = &(code[codeLine]);
    clearInstruction(currInstr);
    codeFlag = valid;
    loadOpcode(currInstr);
    if (codeFlag == valid) {
      switch (yytok) {
        case tHLT:
          break;

        case tRD:
        case tRDF:
        case tRDS:
          loadTarget(currInstr, 0);
          break;

        case tWRT:
        case tWRTLN:
          loadSource(currInstr, 0);
          break;

        case tWRTS:
        case tWRTLNS:
          break;

        case tMOV  :
        case tNEG  :
        case tNEGF :
        case tCASTI:
        case tCASTF:
          loadSource(currInstr, 0);
          loadTarget(currInstr, 1);
          break;

        case tADD :
        case tSUB :
        case tMUL :
        case tDIV :
        case tMOD :
        case tADDF :
        case tSUBF :
        case tMULF :
        case tDIVF :
          loadSource(currInstr, 0);
          loadSource(currInstr, 1);
          loadTarget(currInstr, 2);
          break;

        case tPUSH:
          loadSource(currInstr, 0);
          break;

        case tPOP :
          loadTarget(currInstr, 0);
          break;

        case tNEGS:
        case tADDS:
        case tSUBS:
        case tMULS:
        case tDIVS:
        case tMODS:
        case tNEGSF:
        case tADDSF:
        case tSUBSF:
        case tMULSF:
        case tDIVSF:
        case tCASTSI:
        case tCASTSF:
          break;

        /* Label instruction must be handled at load time */
        case tLAB :
        {
          sscanf(yytext, "%*[Ll]%d:", &lab);

          currInstr->Operand[0].Mode  = mLabel;
          setOperandValue(&(currInstr->Operand[0]), lab); 
          validateLabel(lab);

          if (codeFlag == valid) {
            if (label[lab] != undefinedLabel) {
              sprintf(loadMessage, "L%d REDEFINED", lab);
              codeFlag = badLabel;
            } else {
              label[lab] = codeLine;
            }
          }

          break;
        }
    
        case tANDS:
        case tORS:
        case tNOTS:
          break;

        case tCMPEQS:
        case tCMPGES:
        case tCMPGTS:
        case tCMPLES:
        case tCMPLTS:
        case tCMPNES:
        case tCMPEQSF:
        case tCMPGESF:
        case tCMPGTSF:
        case tCMPLESF:
        case tCMPLTSF:
        case tCMPNESF:
          break;

        case tBRTS:
        case tBRFS:
          loadLabel(currInstr, 0);
          break;

        case tBR  :
          loadLabel(currInstr, 0);
          break;

        case tBEQ :
        case tBGE :
        case tBGT :
        case tBLE :
        case tBLT :
        case tBNE :
        case tBEQF :
        case tBGEF :
        case tBGTF :
        case tBLEF :
        case tBLTF :
        case tBNEF :
          loadSource(currInstr, 0);
          loadSource(currInstr, 1);
          loadLabel (currInstr, 2);
          break;

        case tCALL:
          loadLabel(currInstr, 0);
          break;

        case tRET :
          break;

        case tEOF :
          break;

        case tPRTS:
        case tPRTR:
          break;

        default:
          perror(">>> INTERNAL ERROR IN LOAD_uCODE <<<");
          exit(1);
      }

    loadEnd();
  }


    /* Print an error message if an error occured */
    if (codeFlag != valid) {
      loadError(loadText, loadLine, loadMessage);
      switch (codeFlag) {
        case invalidOpcode:
          break;

        case invalidForm:
          printCorrectForm(currInstr->Opcode);
          break;

        case invalidRegister:
          fprintf(stderr, "\n   VALID RegisterS RANGE FROM 0 to ");
          fprintf(stderr, "%d", registerCount);
          fprintf(stderr, "\n");
          break;

        case invalidLabel:
          fprintf(stderr, "\n   VALID LABELS RANGE FROM 0 to ");
          fprintf(stderr, "%d", labelCount);
          fprintf(stderr, "\n");
          break;

        case badLabel:
        case invalidString:
          break;

        default:
          perror(">>> INTERNAL ERROR IN uLOAD <<<");
          exit(1);
      }
    }

    /* Process debugging information and update counters */
    fillInformation(currInstr, loadText, loadLine);

    loadText[0] = '\0';
    loadLine++;
    codeLine++;

    if (codeLine >= codeSize) {
      fprintf(stderr, ">>> CODE MEMORY OVERFLOW <<<\n");
      exit(1);
    }
  } while (yytok != tEOF);


  /* Resolve labels if source was loaded successfully */
  if (loadFlag == valid) {
    for (i = 0; i < labelCount; i++) {
      lab = label[i];

      while ((lab < 0) && (lab != undefinedLabel)) {
        label[i] = label[-label[i]];
        lab = label[i];
      }
    }
  }

  return (loadFlag == valid) ?1 :0;
}
int CommandGenerate::execute(const std::vector<std::string>& p_args) {
	if(p_args.size() < 10) {
		help();
		return -1;
	}

	unsigned int platformId = atol(p_args[1].c_str());
	unsigned int deviceId = atol(p_args[2].c_str());
	unsigned int staggerSize = atol(p_args[3].c_str());
	unsigned int threadsNumber = atol(p_args[4].c_str());
	unsigned int hashesNumber = atol(p_args[5].c_str());
	unsigned int nonceSize = PLOT_SIZE * staggerSize;

	std::cerr << "Threads number: " << threadsNumber << std::endl;
	std::cerr << "Hashes number: " << hashesNumber << std::endl;

	unsigned int numjobs = (p_args.size() - 5)/4;
	std::cerr << numjobs << " plot(s) to do." << std::endl;
	unsigned int staggerMbSize = staggerSize / 4;
	std::cerr << "Non-GPU memory usage: " << staggerMbSize*numjobs << "MB" << std::endl;
	
	std::vector<std::string> paths(numjobs);
	std::vector<std::ofstream *> out_files(numjobs);
	std::vector<unsigned long long> addresses(numjobs);
	std::vector<unsigned long long> startNonces(numjobs);
	std::vector<unsigned long long> endNonces(numjobs);
	std::vector<unsigned int> noncesNumbers(numjobs);
	std::vector<unsigned char*> buffersCpu(numjobs);
	std::vector<bool> saving_thread_flags(numjobs);
	std::vector<std::future<void>> save_threads(numjobs);
	unsigned long long maxNonceNumber = 0;
	unsigned long long totalNonces = 0;

	int returnCode = 0;

	try {
		for (unsigned int i = 0; i < numjobs; i++) {
			std::cerr << "----" << std::endl;
			std::cerr << "Job number " << i << std::endl;
			unsigned int argstart = 6 + i*4;
			paths[i] = std::string(p_args[argstart]);
			addresses[i] = strtoull(p_args[argstart+1].c_str(), NULL, 10);
			startNonces[i] = strtoull(p_args[argstart+2].c_str(), NULL, 10);
			noncesNumbers[i] = atol(p_args[argstart+3].c_str());
			maxNonceNumber = std::max(maxNonceNumber, (long long unsigned int)noncesNumbers[i]);
			totalNonces += noncesNumbers[i];

			std::ostringstream outFile;
			outFile << paths[i] << "/" << addresses[i] << "_" << startNonces[i] << "_" << \
				noncesNumbers[i] << "_" << staggerSize;
			std::ios_base::openmode file_mode = std::ios::out | std::ios::binary | std::ios::trunc;
			out_files[i] = new std::ofstream(outFile.str(), file_mode);
			assert(out_files[i]);

			if(noncesNumbers[i] % staggerSize != 0) {
				noncesNumbers[i] -= noncesNumbers[i] % staggerSize;
				noncesNumbers[i] += staggerSize;
			}

			endNonces[i] = startNonces[i] + noncesNumbers[i];
			unsigned int noncesGbSize = noncesNumbers[i] / 4 / 1024;
			std::cerr << "Path: " << outFile.str() << std::endl;
			std::cerr << "Nonces: " << startNonces[i] << " to " << endNonces[i] << " (" << noncesGbSize << " GB)" << std::endl;
			std::cerr << "Creating CPU buffer" << std::endl;
			buffersCpu[i] = new unsigned char[nonceSize];
			if(!buffersCpu[i]) {
				throw std::runtime_error("Unable to create the CPU buffer (probably out of host memory.)");
			}
			saving_thread_flags[i] = false;
			std::cerr << "----" << std::endl;
		}

		cl_platform_id platforms[4];
		cl_uint platformsNumber;
		cl_device_id devices[32];
		cl_uint devicesNumber;
		cl_context context = 0;
		cl_command_queue commandQueue = 0;
		cl_mem bufferGpuGen = 0;
		cl_mem bufferGpuScoops = 0;
		cl_program program = 0;
		cl_kernel kernelStep1 = 0;
		cl_kernel kernelStep2 = 0;
		cl_kernel kernelStep3 = 0;

		int error;

		std::cerr << "Retrieving OpenCL platforms" << std::endl;
		error = clGetPlatformIDs(4, platforms, &platformsNumber);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to retrieve the OpenCL platforms");
		}

		if(platformId >= platformsNumber) {
			throw std::runtime_error("No platform found with the provided id");
		}

		std::cerr << "Retrieving OpenCL GPU devices" << std::endl;
		error = clGetDeviceIDs(platforms[platformId], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 32, devices, &devicesNumber);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to retrieve the OpenCL devices");
		}

		if(deviceId >= devicesNumber) {
			throw std::runtime_error("No device found with the provided id");
		}

		std::cerr << "Creating OpenCL context" << std::endl;
		context = clCreateContext(0, 1, &devices[deviceId], NULL, NULL, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL context");
		}

		std::cerr << "Creating OpenCL command queue" << std::endl;
		commandQueue = clCreateCommandQueue(context, devices[deviceId], 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL command queue");
		}

		std::cerr << "Creating OpenCL GPU generation buffer" << std::endl;
		bufferGpuGen = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * GEN_SIZE * staggerSize, 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL GPU generation buffer");
		}

		std::cerr << "Creating OpenCL GPU scoops buffer" << std::endl;
		bufferGpuScoops = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uchar) * nonceSize, 0, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL GPU scoops buffer");
		}

		std::cerr << "Creating OpenCL program" << std::endl;
		std::string source = loadSource("kernel/nonce.cl");
		const char* sources[] = {source.c_str()};
		size_t sourcesLength[] = {source.length()};
		program = clCreateProgramWithSource(context, 1, sources, sourcesLength, &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL program");
		}

		std::cerr << "Building OpenCL program" << std::endl;
		error = clBuildProgram(program, 1, &devices[deviceId], "-I kernel", 0, 0);
		if(error != CL_SUCCESS) {
			size_t logSize;
			clGetProgramBuildInfo(program, devices[deviceId], CL_PROGRAM_BUILD_LOG, 0, 0, &logSize);

			char* log = new char[logSize];
			clGetProgramBuildInfo(program, devices[deviceId], CL_PROGRAM_BUILD_LOG, logSize, (void*)log, 0);
			std::cerr << log << std::endl;
			delete[] log;

			throw OpenclError(error, "Unable to build the OpenCL program");
		}

		std::cerr << "Creating OpenCL step1 kernel" << std::endl;
		kernelStep1 = clCreateKernel(program, "nonce_step1", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step1 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep1, 2, sizeof(cl_mem), (void*)&bufferGpuGen);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		std::cerr << "Creating OpenCL step2 kernel" << std::endl;
		kernelStep2 = clCreateKernel(program, "nonce_step2", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step2 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep2, 1, sizeof(cl_mem), (void*)&bufferGpuGen);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		std::cerr << "Creating OpenCL step3 kernel" << std::endl;
		kernelStep3 = clCreateKernel(program, "nonce_step3", &error);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to create the OpenCL kernel");
		}

		std::cerr << "Setting OpenCL step3 kernel static arguments" << std::endl;
		error = clSetKernelArg(kernelStep3, 0, sizeof(cl_uint), (void*)&staggerSize);
		error = clSetKernelArg(kernelStep3, 1, sizeof(cl_mem), (void*)&bufferGpuGen);
		error = clSetKernelArg(kernelStep3, 2, sizeof(cl_mem), (void*)&bufferGpuScoops);
		if(error != CL_SUCCESS) {
			throw OpenclError(error, "Unable to set the OpenCL kernel arguments");
		}

		size_t globalWorkSize = staggerSize;
		size_t localWorkSize = (staggerSize < threadsNumber) ? staggerSize : threadsNumber;
		time_t startTime = time(0);
		unsigned int totalNoncesCompleted = 0;
		for (unsigned long long nonce_ordinal = 0; nonce_ordinal < maxNonceNumber; nonce_ordinal += staggerSize) {
			for (unsigned int jobnum = 0; jobnum < paths.size(); jobnum += 1) {
				unsigned long long nonce = startNonces[jobnum] + nonce_ordinal;
				if (nonce > endNonces[jobnum]) {
				  break;
				}

				std::cout << "Running with start nonce " << nonce << std::endl;
				// Is a cl_ulong always an unsigned long long?
				unsigned int error = 0;
				error = clSetKernelArg(kernelStep1, 0, sizeof(cl_ulong), (void*)&addresses[jobnum]);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
				}
				error = clSetKernelArg(kernelStep1, 1, sizeof(cl_ulong), (void*)&nonce);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Unable to set the OpenCL step1 kernel arguments");
				}

				error = clEnqueueNDRangeKernel(commandQueue, kernelStep1, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in step1 kernel launch");
				}

				unsigned int hashesSize = hashesNumber * HASH_SIZE;
				for(int hashesOffset = PLOT_SIZE ; hashesOffset > 0 ; hashesOffset -= hashesSize) {
					error = clSetKernelArg(kernelStep2, 0, sizeof(cl_ulong), (void*)&nonce);
					error = clSetKernelArg(kernelStep2, 2, sizeof(cl_uint), (void*)&hashesOffset);
					error = clSetKernelArg(kernelStep2, 3, sizeof(cl_uint), (void*)&hashesNumber);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Unable to set the OpenCL step2 kernel arguments");
					}

					error = clEnqueueNDRangeKernel(commandQueue, kernelStep2, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Error in step2 kernel launch");
					}

					error = clFinish(commandQueue);
					if(error != CL_SUCCESS) {
						throw OpenclError(error, "Error in step2 kernel finish");
					}
				}

				totalNoncesCompleted += staggerSize;
				double percent = 100.0 * (double)totalNoncesCompleted / totalNonces;
				time_t currentTime = time(0);
				double speed = (double)totalNoncesCompleted / difftime(currentTime, startTime) * 60.0;
				double estimatedTime = (double)(totalNonces - totalNoncesCompleted) / speed;
				std::cerr << "\r" << percent << "% (" << totalNoncesCompleted << "/" << totalNonces << " nonces)";
				std::cerr << ", " << speed << " nonces/minutes";
				std::cerr << ", ETA: " << ((int)estimatedTime / 60) << "h" << ((int)estimatedTime % 60) << "m" << ((int)(estimatedTime * 60.0) % 60) << "s";
				std::cerr << "...                    ";

				error = clEnqueueNDRangeKernel(commandQueue, kernelStep3, 1, 0, &globalWorkSize, &localWorkSize, 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in step3 kernel launch");
				}

				if (saving_thread_flags[jobnum]) {
					save_threads[jobnum].wait(); // Wait for last job to finish
					saving_thread_flags[jobnum] = false;
				}

				error = clEnqueueReadBuffer(commandQueue, bufferGpuScoops, CL_TRUE, 0, sizeof(cl_uchar) * nonceSize, buffersCpu[jobnum], 0, 0, 0);
				if(error != CL_SUCCESS) {
					throw OpenclError(error, "Error in synchronous read");
				}
				saving_thread_flags[jobnum] = true;
				save_threads[jobnum] = std::async(std::launch::async, save_nonces, nonceSize, out_files[jobnum], buffersCpu[jobnum]);
			}
		}

		//Clean up
		for (unsigned int i = 0; i < paths.size(); i += 1) {
		  if (saving_thread_flags[i]) {
		    std::cerr << "waiting for final save to " << paths[i] << " to finish" << std::endl;
		    save_threads[i].wait();
		    saving_thread_flags[i] = false;
		    std::cerr << "done waiting for final save" << std::endl;
		    if (buffersCpu[i]) {
		      delete[] buffersCpu[i];
		    }
		  }
		}
		
		if(kernelStep3) { clReleaseKernel(kernelStep3); }
		if(kernelStep2) { clReleaseKernel(kernelStep2); }
		if(kernelStep1) { clReleaseKernel(kernelStep1); }
		if(program) { clReleaseProgram(program); }
		if(bufferGpuGen) { clReleaseMemObject(bufferGpuGen); }
		if(bufferGpuScoops) { clReleaseMemObject(bufferGpuScoops); }
		if(commandQueue) { clReleaseCommandQueue(commandQueue); }
		if(context) { clReleaseContext(context); }


		time_t currentTime = time(0);
		double elapsedTime = difftime(currentTime, startTime) / 60.0;
		double speed = (double)totalNonces / elapsedTime;
		std::cerr << "\r100% (" << totalNonces << "/" << totalNonces << " nonces)";
		std::cerr << ", " << speed << " nonces/minutes";
		std::cerr << ", " << ((int)elapsedTime / 60) << "h" << ((int)elapsedTime % 60) << "m" << ((int)(elapsedTime * 60.0) % 60) << "s";
		std::cerr << "                    " << std::endl;
	} catch(const OpenclError& ex) {
		std::cerr << "[ERROR] [" << ex.getCode() << "] " << ex.what() << std::endl;
		returnCode = -1;
	} catch(const std::exception& ex) {
		std::cerr << "[ERROR] " << ex.what() << std::endl;
		returnCode = -1;
	}
	return returnCode;
}