static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
	size_t scalar_gws;
	size_t *lws = local_work_size ? &local_work_size : NULL;

	global_work_size = local_work_size ? ((*pcount + (v_width * local_work_size - 1)) / (v_width * local_work_size)) * local_work_size : *pcount / v_width;
	scalar_gws = global_work_size * v_width;

#if 0
	fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" sgws "Zu" kpc %d/%d\n", __FUNCTION__, *pcount, local_work_size, global_work_size, scalar_gws, me->params.min_keys_per_crypt, me->params.max_keys_per_crypt);
#endif

	/// Copy data to gpu
	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, key_buf_size, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");

	/// Run kernels
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");

	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel");
	BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");
	BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");

	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel");

	/// Read the result back
	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf2_out) * scalar_gws, output, 0, NULL, multi_profilingEvent[4]), "Copy result back");

	return *pcount;
}
Exemplo n.º 2
0
static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
	int count = *pcount;
	size_t *lws = local_work_size ? &local_work_size : NULL;

	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE,
		0, insize, host_pass, 0, NULL, multi_profilingEvent[0]), "Copy memin");

	///Run the init kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], init_kernel, 1,
		NULL, &global_work_size, lws,
		0, NULL, multi_profilingEvent[1]), "Set ND range");

	///Run split kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
		NULL, &global_work_size, lws,
		0, NULL, NULL), "Set ND range");
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
		NULL, &global_work_size, lws,
		0, NULL, multi_profilingEvent[2]), "Set ND range");

	///Run the finish kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], finish_kernel, 1,
		NULL, &global_work_size, lws,
		0, NULL, multi_profilingEvent[3]), "Set ND range");

	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0,
		outsize, host_hash, 0, NULL, multi_profilingEvent[4]),
	    "Copy data back");

	return count;
}
static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
    int count = *pcount;
    size_t scalar_gws;
    size_t *lws = local_work_size ? &local_work_size : NULL;

    global_work_size = local_work_size ? ((count + (v_width * local_work_size - 1)) / (v_width * local_work_size)) * local_work_size : count / v_width;
    scalar_gws = global_work_size * v_width;

#if 0
    fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" sgws "Zu"\n", __FUNCTION__,
            *pcount, local_work_size, global_work_size, scalar_gws);
#endif
    /// Run kernels, no iterations for fast enumeration
    BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, key_buf_size, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");

    BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");

    BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel");
    BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");

    BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run final kernel");

    BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf1_out) * scalar_gws, host_crack, 0, NULL, multi_profilingEvent[4]), "Copy result back");

    return count;
}
static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
	int count = *pcount;
	size_t *lws = local_work_size ? &local_work_size : NULL;

	global_work_size = local_work_size ? (count + local_work_size - 1) / local_work_size * local_work_size : count;

#if 0
	printf("crypt_all_benchmark(%d)\n", count);
	printf("LWS = %d, GWS = %d\n",(int)local_work_size, (int)global_work_size);
#endif

	/// Copy data to gpu
	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0,
		global_work_size * sizeof(pass_t), host_pass, 0, NUUL,
		multi_profilingEvent[0]), "Copy data to gpu");

	/// Run kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
		NUUL, &global_work_size, lws, 0, NULL,
		multi_profilingEvent[1]), "Run kernel");
	BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");


	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
		1, NULL, &global_work_size, lws, 0, NULL,
		NULL), "Run split kernel");
	BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");

	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
		1, NULL, &global_work_size, lws, 0, NULL,
		multi_profilingEvent[2]), "Run split kernel");
	BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");

	/// Read the result back
	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_FALSE, 0,
		global_work_size * sizeof(crack_t), host_crack, 0, NUUL,
		 multi_profilingEvent[3]), "Copy result back");

	/// Await completion of all the above
	BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");

	return count;
}
Exemplo n.º 5
0
static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
	int count = *pcount;
	int i;

	global_work_size = (count + local_work_size - 1) / local_work_size * local_work_size;

	BENCH_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_in, CL_FALSE,
		0, insize, host_pass, 0, NULL, &multi_profilingEvent[0]), "Copy memin");
	BENCH_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt, CL_FALSE,
		0, saltsize, host_salt, 0, NULL, &multi_profilingEvent[1]), "Copy memsalt");

	///Run the init kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], init_kernel, 1,
		NULL, &global_work_size, &local_work_size,
		0, NULL, &multi_profilingEvent[2]), "Set ND range");

	///Run split kernel
	for(i = 0; i < 3; i++)
	{
		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1,
			NULL, &global_work_size, &local_work_size,
			0, NULL, &multi_profilingEvent[split_events[i]]), "Set ND range");  //3, 4, 5
		BENCH_CLERROR(clFinish(queue[ocl_gpu_id]), "Error running loop kernel");
		opencl_process_event();
	}

	///Run the finish kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], finish_kernel, 1,
		NULL, &global_work_size, &local_work_size,
		0, NULL, &multi_profilingEvent[6]), "Set ND range");

	BENCH_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_FALSE, 0,
		outsize, host_hash, 0, NULL, &multi_profilingEvent[7]),
	    "Copy data back");

	///Await completion of all the above
	BENCH_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish error");

	return count;
}
Exemplo n.º 6
0
static int crypt_all_benchmark(int *pcount, struct db_salt *salt)
{
	int count = *pcount;
	size_t *lws = local_work_size ? &local_work_size : NULL;

	global_work_size = local_work_size ? (count + local_work_size - 1) / local_work_size * local_work_size : count;

	// Copy data to gpu
	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0,
		insize, inbuffer, 0, NULL, multi_profilingEvent[0]),
	        "Copy data to gpu");

	// Run 1st kernels
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], sevenzip_init, 1,
		NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]),
		"Run init kernel");

	// Warm-up run
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id],
		crypt_kernel, 1, NULL, &global_work_size, lws, 0,
	        NULL, NULL),
	        "Run loop kernel");

	// Loop kernel
	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id],
		crypt_kernel, 1, NULL, &global_work_size, lws, 0,
	        NULL, multi_profilingEvent[2]),
	        "Run loop kernel");

	// Read the result back
	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0,
		outsize, outbuffer, 0, NULL, multi_profilingEvent[3]),
	        "Copy result back");

	BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");

	return count;
}