static void init(struct fmt_main *self) { cl_ulong maxsize; size_t selected_gws; opencl_init_opt("$JOHN/kernels/pwsafe_kernel.cl", ocl_gpu_id, NULL); init_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_INIT_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating init kernel"); crypt_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_RUN_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating crypt kernel"); finish_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_FINISH_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating finish kernel"); local_work_size = cpu(device_info[ocl_gpu_id]) ? 1 : 64; global_work_size = 0; opencl_get_user_preferences(CONFIG_NAME); //Initialize openCL tuning (library) for this format. opencl_init_auto_setup(STEP, ROUNDS_DEFAULT/8, 8, split_events, warn, &multi_profilingEvent[3], self, create_clobj, release_clobj, sizeof(pwsafe_pass), 0); self->methods.crypt_all = crypt_all_benchmark; selected_gws = global_work_size; /* Note: we ask for the kernels' max sizes, not the device's! */ maxsize = get_current_work_group_size(ocl_gpu_id, init_kernel); maxsize = MIN(get_current_work_group_size(ocl_gpu_id, crypt_kernel), maxsize); maxsize = MIN(get_current_work_group_size(ocl_gpu_id, finish_kernel), maxsize); while (local_work_size > maxsize) local_work_size >>= 1; self->params.max_keys_per_crypt = (global_work_size ? global_work_size: MAX_KEYS_PER_CRYPT); if (!local_work_size) { create_clobj(self->params.max_keys_per_crypt, self); find_best_lws(self, ocl_gpu_id); release_clobj(); } global_work_size = selected_gws; if (global_work_size) create_clobj(global_work_size, self); else //user chose to die of boredom find_best_gws(self, ocl_gpu_id); self->params.min_keys_per_crypt = local_work_size; self->params.max_keys_per_crypt = global_work_size; self->methods.crypt_all = crypt_all; if (options.verbosity > 2) fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size); }
static size_t get_task_max_work_group_size(){ size_t max_available; max_available = get_max_work_group_size(ocl_gpu_id); if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel)) return get_current_work_group_size(ocl_gpu_id, crypt_kernel); return max_available; }
/* Can be used to select a 'good' default lws size */ size_t common_get_task_max_work_group_size(int use_local_memory, int local_memory_size, cl_kernel crypt_kernel) { size_t max_available; if (use_local_memory) max_available = get_local_memory_size(ocl_gpu_id) / (local_memory_size); else max_available = get_max_work_group_size(ocl_gpu_id); if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel)) return get_current_work_group_size(ocl_gpu_id, crypt_kernel); return max_available; }
static size_t get_task_max_work_group_size(){ size_t max_available; if (use_local(source_in_use)) max_available = get_local_memory_size(ocl_gpu_id) / (sizeof(sha512_password) + sizeof(sha512_ctx) + sizeof(sha512_buffers)) - 1; else if (gpu(source_in_use)) max_available = get_local_memory_size(ocl_gpu_id) / sizeof(sha512_password); else max_available = get_max_work_group_size(ocl_gpu_id); if (max_available > get_current_work_group_size(ocl_gpu_id, crypt_kernel)) return get_current_work_group_size(ocl_gpu_id, crypt_kernel); return max_available; }
static size_t get_task_max_size(){ size_t max_available; max_available = get_max_compute_units(ocl_gpu_id); if (cpu(device_info[ocl_gpu_id])) return max_available * KEYS_PER_CORE_CPU; else return max_available * get_current_work_group_size(ocl_gpu_id, crypt_kernel) * 2; }
/* Can be used to select a 'good' default gws size */ size_t common_get_task_max_size(int multiplier, int keys_per_core_cpu, int keys_per_core_gpu, cl_kernel crypt_kernel) { size_t max_available; max_available = get_max_compute_units(ocl_gpu_id); if (cpu(device_info[ocl_gpu_id])) return max_available * keys_per_core_cpu; else return max_available * multiplier * keys_per_core_gpu * get_current_work_group_size(ocl_gpu_id, crypt_kernel); }
/* -- This function could be used to calculated the best num for the workgroup Work-items that make up a work-group (also referred to as the size of the work-group) -- */ static void find_best_lws(struct fmt_main * self, int sequential_id) { //Call the default function. cl_kernel tKernel = init_kernel; size_t largest = 0; size_t temp = get_current_work_group_size(ocl_gpu_id, init_kernel); largest = temp; temp = get_current_work_group_size(ocl_gpu_id, crypt_kernel); if(temp > largest) { largest = temp; tKernel = crypt_kernel; } temp = get_current_work_group_size(ocl_gpu_id, finish_kernel); if(temp > largest) { largest = temp; tKernel = finish_kernel; } common_find_best_lws( largest, sequential_id, tKernel ); }