/* Can be used to select a 'good' default lws size */ size_t autotune_get_task_max_work_group_size(int use_local_memory, int local_memory_size, cl_kernel crypt_kernel) { size_t max_available; if (use_local_memory) max_available = get_local_memory_size(gpu_id) / (local_memory_size); else max_available = get_device_max_lws(gpu_id); if (max_available > get_kernel_max_lws(gpu_id, crypt_kernel)) return get_kernel_max_lws(gpu_id, crypt_kernel); return max_available; }
/* Can be used to select a 'good' default gws size */ size_t autotune_get_task_max_size(int multiplier, int keys_per_core_cpu, int keys_per_core_gpu, cl_kernel crypt_kernel) { size_t max_available; max_available = get_max_compute_units(gpu_id); if (cpu(device_info[gpu_id])) return max_available * keys_per_core_cpu; else if (gpu_intel(device_info[gpu_id])) return 0; else return max_available * multiplier * keys_per_core_gpu * get_kernel_max_lws(gpu_id, crypt_kernel); }
static void init(struct fmt_main *self) { size_t maxsize; /* Read LWS/GWS prefs from config or environment */ opencl_get_user_preferences(OCL_CONFIG); if (!local_work_size) local_work_size = cpu(device_info[gpu_id]) ? 1 : 64; if (!global_work_size) global_work_size = MAX_KEYS_PER_CRYPT; opencl_init("$JOHN/kernels/sha512_kernel.cl", gpu_id, NULL); gkey = mem_calloc(global_work_size * sizeof(sha512_key)); ghash = mem_calloc(global_work_size * sizeof(sha512_hash)); ///Allocate memory on the GPU mem_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, insize, NULL, &ret_code); HANDLE_CLERROR(ret_code,"Error while allocating memory for passwords"); mem_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, outsize, NULL, &ret_code); HANDLE_CLERROR(ret_code,"Error while allocating memory for hashes"); mem_binary = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, sizeof(uint64_t), NULL, &ret_code); HANDLE_CLERROR(ret_code,"Error while allocating memory for binary"); mem_cmp = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, sizeof(uint32_t), NULL, &ret_code); HANDLE_CLERROR(ret_code,"Error while allocating memory for cmp_all result"); ///Assign crypt kernel parameters crypt_kernel = clCreateKernel(program[gpu_id], KERNEL_NAME, &ret_code); HANDLE_CLERROR(ret_code,"Error while creating crypt_kernel"); clSetKernelArg(crypt_kernel, 0, sizeof(mem_in), &mem_in); clSetKernelArg(crypt_kernel, 1, sizeof(mem_out), &mem_out); ///Assign cmp kernel parameters cmp_kernel = clCreateKernel(program[gpu_id], CMP_KERNEL_NAME, &ret_code); HANDLE_CLERROR(ret_code,"Error while creating cmp_kernel"); clSetKernelArg(cmp_kernel, 0, sizeof(mem_binary), &mem_binary); clSetKernelArg(cmp_kernel, 1, sizeof(mem_out), &mem_out); clSetKernelArg(cmp_kernel, 2, sizeof(mem_cmp), &mem_cmp); /* Note: we ask for the kernel's max size, not the device's! */ maxsize = get_kernel_max_lws(gpu_id, crypt_kernel); if (local_work_size > maxsize) { local_work_size = maxsize; global_work_size = (global_work_size + local_work_size - 1) / local_work_size * local_work_size; } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) opencl_find_best_workgroup(self); self->params.min_keys_per_crypt = local_work_size; if (options.verbosity > 2) fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n",(int)local_work_size, (int)global_work_size); }