static int crypt_all(int *pcount, struct db_salt *salt)
{
	int count = *pcount;

	opencl_limit_gws(count);

#ifdef DEBUG
	printf("crypt_all(%d)\n", count);
	printf("LWS = %d, GWS = %d\n",(int)local_work_size, (int)global_work_size);
#endif

	/// Copy data to gpu
	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_in, CL_FALSE, 0,
		global_work_size * sizeof(pass_t), host_pass, 0, NUUL, NULL), "Copy data to gpu");
	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt,
		CL_FALSE, 0, sizeof(salt_t), host_salt, 0, NUUL, NULL),
	    "Copy salt to gpu");

	/// Run kernel
	HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1,
		NUUL, &global_work_size, &local_work_size, 0, NULL,
		profilingEvent), "Run kernel");
	HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish");

	/// Read the result back
	HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_FALSE, 0,
		global_work_size * sizeof(crack_t), host_crack, 0, NUUL, NULL), "Copy result back");

	/// Await completion of all the above
	HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish");

	return count;
}
static int crypt_all(int *pcount, struct db_salt *salt)
{
	int i;
	const int count = *pcount;
	int loops = (host_salt->rounds + HASH_LOOPS - 1) / HASH_LOOPS;

	opencl_limit_gws(count);

#if 0
	printf("crypt_all(%d)\n", count);
	printf("LWS = %d, GWS = %d, loops=%d\n",(int)local_work_size, (int)global_work_size, loops);
#endif

	/// Copy data to gpu
	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0,
		global_work_size * sizeof(pass_t), host_pass, 0, NUUL,
		NULL), "Copy data to gpu");

	/// Run kernel
	HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
		NUUL, &global_work_size, &local_work_size, 0, NULL,
		NULL), "Run kernel");

	for(i = 0; i < loops; i++) {
		HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id],
		        split_kernel,
			1, NULL, &global_work_size, &local_work_size, 0, NULL,
			NULL), "Run split kernel");
		HANDLE_CLERROR(clFinish(queue[gpu_id]), "clFinish");
		opencl_process_event();
	}
	/// Read the result back
	HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0,
		global_work_size * sizeof(crack_t), host_crack, 0, NUUL,
		 NULL), "Copy result back");

	return count;
}