static void init(struct fmt_main *self)
{
	cl_ulong maxsize;
	size_t selected_gws;

	opencl_init_opt("$JOHN/kernels/pwsafe_kernel.cl", ocl_gpu_id, NULL);

	init_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_INIT_NAME, &ret_code);
	HANDLE_CLERROR(ret_code, "Error while creating init kernel");

	crypt_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_RUN_NAME, &ret_code);
	HANDLE_CLERROR(ret_code, "Error while creating crypt kernel");

	finish_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_FINISH_NAME, &ret_code);
	HANDLE_CLERROR(ret_code, "Error while creating finish kernel");

	local_work_size = cpu(device_info[ocl_gpu_id]) ? 1 : 64;
	global_work_size = 0;
	opencl_get_user_preferences(CONFIG_NAME);

	//Initialize openCL tuning (library) for this format.
	opencl_init_auto_setup(STEP, ROUNDS_DEFAULT/8, 8, split_events,
		warn, &multi_profilingEvent[3], self, create_clobj, release_clobj,
		sizeof(pwsafe_pass), 0);

	self->methods.crypt_all = crypt_all_benchmark;

	selected_gws = global_work_size;
	/* Note: we ask for the kernels' max sizes, not the device's! */
	maxsize = get_current_work_group_size(ocl_gpu_id, init_kernel);
	maxsize = MIN(get_current_work_group_size(ocl_gpu_id, crypt_kernel),
	              maxsize);
	maxsize = MIN(get_current_work_group_size(ocl_gpu_id, finish_kernel),
	              maxsize);

	while (local_work_size > maxsize)
		local_work_size >>= 1;

	self->params.max_keys_per_crypt = (global_work_size ? global_work_size: MAX_KEYS_PER_CRYPT);

	if (!local_work_size) {
		create_clobj(self->params.max_keys_per_crypt, self);
		find_best_lws(self, ocl_gpu_id);
		release_clobj();
	}
	global_work_size = selected_gws;

	if (global_work_size)
		create_clobj(global_work_size, self);
	else
		//user chose to die of boredom
		find_best_gws(self, ocl_gpu_id);

	self->params.min_keys_per_crypt = local_work_size;
	self->params.max_keys_per_crypt = global_work_size;
	self->methods.crypt_all = crypt_all;

	if (options.verbosity > 2)
		fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size);
}
Example #2
0
static size_t get_task_max_work_group_size(){
    size_t max_available;

        max_available = get_max_work_group_size(ocl_gpu_id);

    if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel))
        return get_current_work_group_size(ocl_gpu_id, crypt_kernel);

    return max_available;
}
/* Can be used to select a 'good' default lws size */
size_t common_get_task_max_work_group_size(int use_local_memory,
	int local_memory_size, cl_kernel crypt_kernel) {

	size_t max_available;

	if (use_local_memory)
		max_available = get_local_memory_size(ocl_gpu_id) /
				(local_memory_size);
	else
		max_available = get_max_work_group_size(ocl_gpu_id);

	if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel))
		return get_current_work_group_size(ocl_gpu_id, crypt_kernel);

	return max_available;
}
static size_t get_task_max_work_group_size(){
    size_t max_available;

    if (use_local(source_in_use))
        max_available = get_local_memory_size(ocl_gpu_id) /
                (sizeof(sha512_password) + sizeof(sha512_ctx) +
                 sizeof(sha512_buffers)) - 1;
    else if (gpu(source_in_use))
        max_available = get_local_memory_size(ocl_gpu_id) /
                sizeof(sha512_password);
    else
        max_available = get_max_work_group_size(ocl_gpu_id);

    if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel))
        return get_current_work_group_size(ocl_gpu_id, crypt_kernel);

    return max_available;
}
static size_t get_task_max_size(){
    size_t max_available;
    max_available = get_max_compute_units(ocl_gpu_id);

    if (cpu(device_info[ocl_gpu_id]))
        return max_available * KEYS_PER_CORE_CPU;

    else
        return max_available * get_current_work_group_size(ocl_gpu_id, crypt_kernel) * 2;
}
/* Can be used to select a 'good' default gws size */
size_t common_get_task_max_size(int multiplier, int keys_per_core_cpu,
	int keys_per_core_gpu, cl_kernel crypt_kernel) {

	size_t max_available;
	max_available = get_max_compute_units(ocl_gpu_id);

	if (cpu(device_info[ocl_gpu_id]))
		return max_available * keys_per_core_cpu;

	else
		return max_available * multiplier * keys_per_core_gpu *
				get_current_work_group_size(ocl_gpu_id, crypt_kernel);
}
/* --
  This function could be used to calculated the best num
  for the workgroup
  Work-items that make up a work-group (also referred to
  as the size of the work-group)
-- */
static void find_best_lws(struct fmt_main * self, int sequential_id) {

	//Call the default function.
	cl_kernel tKernel = init_kernel;
	size_t largest = 0;
	size_t temp = get_current_work_group_size(ocl_gpu_id, init_kernel);
	largest = temp;
	temp = get_current_work_group_size(ocl_gpu_id, crypt_kernel);
	if(temp > largest)
	{
		largest = temp;
		tKernel = crypt_kernel;
	}
	temp = get_current_work_group_size(ocl_gpu_id, finish_kernel);
	if(temp > largest)
	{
		largest = temp;
		tKernel = finish_kernel;
	}
	common_find_best_lws(
		largest,
		sequential_id, tKernel
	);
}