static void init(struct fmt_main *self) { cl_ulong maxsize; size_t selected_gws; opencl_init_opt("$JOHN/kernels/pwsafe_kernel.cl", ocl_gpu_id, NULL); init_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_INIT_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating init kernel"); crypt_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_RUN_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating crypt kernel"); finish_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_FINISH_NAME, &ret_code); HANDLE_CLERROR(ret_code, "Error while creating finish kernel"); local_work_size = cpu(device_info[ocl_gpu_id]) ? 1 : 64; global_work_size = 0; opencl_get_user_preferences(CONFIG_NAME); //Initialize openCL tuning (library) for this format. opencl_init_auto_setup(STEP, ROUNDS_DEFAULT/8, 8, split_events, warn, &multi_profilingEvent[3], self, create_clobj, release_clobj, sizeof(pwsafe_pass), 0); self->methods.crypt_all = crypt_all_benchmark; selected_gws = global_work_size; /* Note: we ask for the kernels' max sizes, not the device's! */ maxsize = get_current_work_group_size(ocl_gpu_id, init_kernel); maxsize = MIN(get_current_work_group_size(ocl_gpu_id, crypt_kernel), maxsize); maxsize = MIN(get_current_work_group_size(ocl_gpu_id, finish_kernel), maxsize); while (local_work_size > maxsize) local_work_size >>= 1; self->params.max_keys_per_crypt = (global_work_size ? global_work_size: MAX_KEYS_PER_CRYPT); if (!local_work_size) { create_clobj(self->params.max_keys_per_crypt, self); find_best_lws(self, ocl_gpu_id); release_clobj(); } global_work_size = selected_gws; if (global_work_size) create_clobj(global_work_size, self); else //user chose to die of boredom find_best_gws(self, ocl_gpu_id); self->params.min_keys_per_crypt = local_work_size; self->params.max_keys_per_crypt = global_work_size; self->methods.crypt_all = crypt_all; if (options.verbosity > 2) fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size); }
static void fmt_ssha_init(struct fmt_main *self) { char *temp; cl_ulong maxsize; global_work_size = 0; opencl_init("$JOHN/ssha_kernel.cl", ocl_gpu_id, platform_id); // create kernel to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "sha1_crypt_kernel", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); HANDLE_CLERROR(clGetKernelWorkGroupInfo(crypt_kernel, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize), &maxsize, NULL), "Query max work group size"); if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(temp); if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(temp); if ((temp = getenv("LWS"))) local_work_size = atoi(temp); if ((temp = getenv("GWS"))) global_work_size = atoi(temp); if (!local_work_size) { int temp = global_work_size; local_work_size = maxsize; global_work_size = global_work_size ? global_work_size : 4 * maxsize; create_clobj(global_work_size, self); opencl_find_best_workgroup_limit(self, maxsize); release_clobj(); global_work_size = temp; } if (local_work_size > maxsize) { fprintf(stderr, "LWS %d is too large for this GPU. Max allowed is %d, using that.\n", (int)local_work_size, (int)maxsize); local_work_size = maxsize; } if (!global_work_size) find_best_gws(getenv("GWS") == NULL ? 0 : 1, self); if (global_work_size < local_work_size) global_work_size = local_work_size; fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size); create_clobj(global_work_size, self); atexit(release_clobj); }
/* ------- Initialization ------- */ static void init(struct fmt_main * self) { char * tmp_value; char * task = "$JOHN/sha256_kernel.cl"; opencl_init_dev(ocl_gpu_id, platform_id); source_in_use = device_info[ocl_gpu_id]; if ((tmp_value = getenv("_TYPE"))) source_in_use = atoi(tmp_value); opencl_build_kernel(task, ocl_gpu_id); // create kernel(s) to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); cmp_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_cmp", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_cmp. Double-check kernel name?"); global_work_size = get_task_max_size(); local_work_size = 0; if (source_in_use != device_info[ocl_gpu_id]) { device_info[ocl_gpu_id] = source_in_use; fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); if ((tmp_value = getenv("LWS"))) local_work_size = atoi(tmp_value); //Check if local_work_size is a valid number. if (local_work_size > get_task_max_work_group_size()){ fprintf(stderr, "Error: invalid local work size (LWS). Max value allowed is: %zd\n" , get_task_max_work_group_size()); local_work_size = 0; //Force find a valid number. } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) { local_work_size = get_task_max_work_group_size(); create_clobj(global_work_size, self); find_best_workgroup(self); release_clobj(); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(tmp_value); if ((tmp_value = getenv("GWS"))) global_work_size = atoi(tmp_value); //Check if a valid multiple is used. global_work_size = get_multiple(global_work_size, local_work_size); if (global_work_size) create_clobj(global_work_size, self); else { //user chose to die of boredom global_work_size = get_task_max_size(); find_best_gws(self); } fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n", (int) local_work_size, global_work_size); self->params.max_keys_per_crypt = global_work_size; }
static void init(struct fmt_main *self) { #ifdef CL_VERSION_1_0 char *temp; cl_ulong maxsize; global_work_size = 0; opencl_init("$JOHN/rar_kernel.cl", ocl_gpu_id, platform_id); // create kernel to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "SetCryptKeys", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); /* We mimic the lengths of cRARk for comparisons */ if (get_device_type(ocl_gpu_id) == CL_DEVICE_TYPE_GPU) { #ifndef DEBUG self->params.benchmark_comment = " (6 characters)"; #endif self->params.tests = gpu_tests; #if defined(DEBUG) && !defined(ALWAYS_OPENCL) fprintf(stderr, "Note: will use CPU for some self-tests, and Single mode.\n"); #endif } if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(temp); if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(temp); if ((temp = getenv("LWS"))) local_work_size = atoi(temp); if ((temp = getenv("GWS"))) global_work_size = atoi(temp); /* Note: we ask for this kernel's max size, not the device's! */ HANDLE_CLERROR(clGetKernelWorkGroupInfo(crypt_kernel, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize), &maxsize, NULL), "Query max work group size"); #ifdef DEBUG fprintf(stderr, "Max allowed local work size %d\n", (int)maxsize); #endif if (!local_work_size) { if (get_device_type(ocl_gpu_id) == CL_DEVICE_TYPE_CPU) { if (get_platform_vendor_id(platform_id) == INTEL) local_work_size = 8; else local_work_size = 1; } else { local_work_size = 64; } } if (local_work_size > maxsize) { fprintf(stderr, "LWS %d is too large for this GPU. Max allowed is %d, using that.\n", (int)local_work_size, (int)maxsize); local_work_size = maxsize; } if (!global_work_size) find_best_gws(temp == NULL ? 0 : 1); if (global_work_size < local_work_size) global_work_size = local_work_size; fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size); create_clobj(global_work_size); #ifdef DEBUG { cl_ulong loc_mem_size; HANDLE_CLERROR(clGetKernelWorkGroupInfo(crypt_kernel, devices[ocl_gpu_id], CL_KERNEL_LOCAL_MEM_SIZE, sizeof(loc_mem_size), &loc_mem_size, NULL), "Query local memory usage"); fprintf(stderr, "Kernel using %lu bytes of local memory out of %lu available\n", loc_mem_size, get_local_memory_size(ocl_gpu_id)); } #endif atexit(release_clobj); *mkpc = VF * global_work_size; #endif /* OpenCL */ #if defined (_OPENMP) omp_t = omp_get_max_threads(); self->params.min_keys_per_crypt *= omp_t; #ifndef CL_VERSION_1_0 /* OpenCL gets to decide */ *mkpc = omp_t * OMP_SCALE * MAX_KEYS_PER_CRYPT; #endif init_locks(); #endif /* _OPENMP */ if (options.utf8) self->params.plaintext_length = PLAINTEXT_LENGTH * 3; unpack_data = mem_calloc_tiny(sizeof(unpack_data_t) * omp_t, MEM_ALIGN_WORD); cracked = mem_calloc_tiny(sizeof(*cracked) * *mkpc, MEM_ALIGN_WORD); #ifndef CL_VERSION_1_0 saved_key = mem_calloc_tiny(UNICODE_LENGTH * *mkpc, MEM_ALIGN_NONE); saved_len = mem_calloc_tiny(sizeof(*saved_len) * *mkpc, MEM_ALIGN_WORD); saved_salt = mem_calloc_tiny(8, MEM_ALIGN_NONE); aes_key = mem_calloc_tiny(16 * *mkpc, MEM_ALIGN_NONE); aes_iv = mem_calloc_tiny(16 * *mkpc, MEM_ALIGN_NONE); #endif /* OpenSSL init */ init_aesni(); SSL_load_error_strings(); SSL_library_init(); OpenSSL_add_all_algorithms(); #ifndef __APPLE__ atexit(openssl_cleanup); #endif /* CRC-32 table init, do it before we start multithreading */ { CRC32_t crc; CRC32_Init(&crc); } }
size_t select_device(int jtrUniqDevNo, struct fmt_main *fmt) { cl_int err; const char *errMsg; opencl_init_opt("$JOHN/kernels/pbkdf2_kernel.cl", jtrUniqDevNo, NULL); globalObj[jtrUniqDevNo].krnl[0] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_preprocess", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_preprocess FAILED\n"); return 0; } globalObj[jtrUniqDevNo].krnl[1] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_iter", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_iter FAILED\n"); return 0; } globalObj[jtrUniqDevNo].krnl[2] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_postprocess", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_postprocess FAILED\n"); return 0; } errMsg = "Create Buffer FAILED"; globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_ONLY, 4 * MAX_KEYS_PER_CRYPT * sizeof(cl_uint), NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu == (cl_mem)0) HANDLE_CLERROR(err,errMsg ); globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_ONLY, (MAX_SALT_LENGTH / 2 + 1) * sizeof(cl_uint), NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_WRITE_ONLY, 4 * MAX_KEYS_PER_CRYPT * sizeof(cl_uint), NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_WRITE, MAX_KEYS_PER_CRYPT * sizeof(temp_buf), NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu), "Set Kernel 0 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 1, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu), "Set Kernel 0 Arg 1 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 4, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 0 Arg 4 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[1], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 1 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[2], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 2 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[2], 1, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu), "Set Kernel 2 Arg 1 :FAILED"); if (((!global_work_size) || ((!local_work_size) && global_work_size)) || (active_dev_ctr != 0)) find_best_workgroup(jtrUniqDevNo); else { size_t maxsize, maxsize2; maxsize = get_kernel_preferred_work_group_size(jtrUniqDevNo, globalObj[jtrUniqDevNo].krnl[0]); maxsize2 = get_kernel_preferred_work_group_size(jtrUniqDevNo, globalObj[jtrUniqDevNo].krnl[1]); if (maxsize2 > maxsize) maxsize = maxsize2; maxsize2 = get_kernel_preferred_work_group_size(jtrUniqDevNo, globalObj[jtrUniqDevNo].krnl[2]); if (maxsize2 > maxsize) maxsize = maxsize2; while (local_work_size > maxsize) local_work_size /= 2; if (options.verbosity > 3) fprintf(stderr, "Local worksize (LWS) forced to %zu\n", local_work_size); globalObj[jtrUniqDevNo].lws = local_work_size; } if ((!global_work_size) || (active_dev_ctr != 0)) find_best_gws(jtrUniqDevNo, fmt); else { if (options.verbosity > 3) fprintf(stderr, "Global worksize (GWS) forced to %zu\n", global_work_size); fmt -> params.max_keys_per_crypt = global_work_size; fmt -> params.min_keys_per_crypt = max_lws(); } active_dev_ctr++; return globalObj[jtrUniqDevNo].lws; }
static void init(struct fmt_main *self) { char *temp; cl_ulong maxsize, maxsize2; char build_opts[64]; global_work_size = 0; snprintf(build_opts, sizeof(build_opts), "-DHASH_LOOPS=%u -DUNICODE_LENGTH=%u %s", HASH_LOOPS, UNICODE_LENGTH, (options.flags & FLG_VECTORIZE) ? "-DVECTORIZE" : (options.flags & FLG_SCALAR) ? "-DSCALAR" : ""); opencl_init_opt("$JOHN/office2007_kernel.cl", ocl_gpu_id, platform_id, build_opts); // create kernel to execute GenerateSHA1pwhash = clCreateKernel(program[ocl_gpu_id], "GenerateSHA1pwhash", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); crypt_kernel = clCreateKernel(program[ocl_gpu_id], "HashLoop", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); Generate2007key = clCreateKernel(program[ocl_gpu_id], "Generate2007key", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); if (options.flags & FLG_VECTORIZE) { /* Run vectorized code */ VF = 4; self->params.algorithm_name = "OpenCL 4x"; } if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(temp); if ((temp = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(temp); if ((temp = getenv("LWS"))) local_work_size = atoi(temp); if ((temp = getenv("GWS"))) global_work_size = atoi(temp); /* Note: we ask for the kernels' max sizes, not the device's! */ HANDLE_CLERROR(clGetKernelWorkGroupInfo(GenerateSHA1pwhash, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize), &maxsize, NULL), "Query max work group size"); HANDLE_CLERROR(clGetKernelWorkGroupInfo(crypt_kernel, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size"); if (maxsize2 < maxsize) maxsize = maxsize2; HANDLE_CLERROR(clGetKernelWorkGroupInfo(Generate2007key, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size"); if (maxsize2 < maxsize) maxsize = maxsize2; #if 0 /* Our use of local memory sets a limit for LWS */ maxsize2 = get_local_memory_size(ocl_gpu_id) / (24 * VF); while (maxsize > maxsize2) maxsize >>= 1; #endif /* maxsize is the lowest figure from the three different kernels */ if (!local_work_size) { if (getenv("LWS")) { /* LWS was explicitly set to 0 */ int temp = global_work_size; local_work_size = maxsize; global_work_size = global_work_size ? global_work_size : 4 * maxsize; create_clobj(global_work_size, self); opencl_find_best_workgroup_limit(self, maxsize); release_clobj(); global_work_size = temp; } else { if (cpu(device_info[ocl_gpu_id])) { if (get_platform_vendor_id(platform_id) == DEV_INTEL) local_work_size = MIN(maxsize, 8); else local_work_size = 1; } else local_work_size = MIN(maxsize, 64); } } if (local_work_size > maxsize) { fprintf(stderr, "LWS %d is too large for this GPU. Max allowed is %d, using that.\n", (int)local_work_size, (int)maxsize); local_work_size = maxsize; } if (!global_work_size) find_best_gws(getenv("GWS") == NULL ? 0 : 1, self); if (global_work_size < local_work_size) global_work_size = local_work_size; fprintf(stderr, "Local worksize (LWS) %d, Global worksize (GWS) %d\n", (int)local_work_size, (int)global_work_size); create_clobj(global_work_size, self); atexit(release_clobj); if (options.utf8) self->params.plaintext_length = MIN(125, 3 * PLAINTEXT_LENGTH); }
/* ------- Initialization ------- */ static void init(struct fmt_main * self) { char * tmp_value; char * task = "$JOHN/cryptsha512_kernel_DEFAULT.cl"; uint64_t startTime, runtime; opencl_init_dev(ocl_gpu_id, platform_id); startTime = (unsigned long) time(NULL); source_in_use = device_info[ocl_gpu_id]; if ((tmp_value = getenv("_TYPE"))) source_in_use = atoi(tmp_value); if ((tmp_value = getenv("_FAST"))) fast_mode = TRUE; if (use_local(source_in_use)) task = "$JOHN/cryptsha512_kernel_LOCAL.cl"; else if (gpu(source_in_use)) { fprintf(stderr, "Building the kernel, this could take a while\n"); task = "$JOHN/cryptsha512_kernel_GPU.cl"; } fflush(stdout); opencl_build_kernel(task, ocl_gpu_id); if ((runtime = (unsigned long) (time(NULL) - startTime)) > 2UL) fprintf(stderr, "Elapsed time: %lu seconds\n", runtime); fflush(stdout); // create kernel(s) to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); if (gpu(source_in_use) || use_local(source_in_use)) { prepare_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_prepare", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_prepare. Double-check kernel name?"); final_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_final", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_final. Double-check kernel name?"); } global_work_size = get_task_max_size(); local_work_size = get_default_workgroup(); if (source_in_use != device_info[ocl_gpu_id]) fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); if ((tmp_value = getenv("LWS"))) local_work_size = atoi(tmp_value); //Check if local_work_size is a valid number. if (local_work_size > get_task_max_work_group_size()){ local_work_size = 0; //Force find a valid number. } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) { local_work_size = get_task_max_work_group_size(); create_clobj(global_work_size, self); find_best_workgroup(self); release_clobj(); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(tmp_value); if ((tmp_value = getenv("GWS"))) global_work_size = atoi(tmp_value); //Check if a valid multiple is used. global_work_size = get_multiple(global_work_size, local_work_size); if (global_work_size) create_clobj(global_work_size, self); else { //user chose to die of boredom global_work_size = get_task_max_size(); find_best_gws(self); } fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n", (int) local_work_size, global_work_size); self->params.max_keys_per_crypt = global_work_size; }
size_t select_device(int jtrUniqDevNo, struct fmt_main *fmt) { cl_int err; const char *errMsg; size_t memAllocSz; active_dev_ctr++; opencl_init("$JOHN/kernels/pbkdf2_kernel.cl", jtrUniqDevNo, NULL); globalObj[jtrUniqDevNo].krnl[0] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_preprocess_short", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_preprocess_short FAILED\n"); return 0; } globalObj[jtrUniqDevNo].krnl[1] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_preprocess_long", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_preprocess_long FAILED\n"); return 0; } globalObj[jtrUniqDevNo].krnl[2] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_iter", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_iter FAILED\n"); return 0; } globalObj[jtrUniqDevNo].krnl[3] = clCreateKernel(program[jtrUniqDevNo], "pbkdf2_postprocess", &err); if (err) { fprintf(stderr, "Create Kernel pbkdf2_postprocess FAILED\n"); return 0; } errMsg = "Create Buffer FAILED"; memAllocSz = 4 * MAX_KEYS_PER_CRYPT * sizeof(cl_uint); memAllocSz = memAllocSz < get_max_mem_alloc_size(jtrUniqDevNo) ? memAllocSz : get_max_mem_alloc_size(jtrUniqDevNo) / 4 * 4; globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_ONLY, memAllocSz, NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu == (cl_mem)0) HANDLE_CLERROR(err,errMsg ); globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_ONLY, (MAX_SALT_LENGTH / 2 + 1) * sizeof(cl_uint), NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_WRITE_ONLY, memAllocSz, NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); memAllocSz = MAX_KEYS_PER_CRYPT * sizeof(temp_buf); memAllocSz = memAllocSz < get_max_mem_alloc_size(jtrUniqDevNo) ? memAllocSz : get_max_mem_alloc_size(jtrUniqDevNo) / 4 * 4; globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_WRITE, memAllocSz, NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); memAllocSz = 5 * MAX_KEYS_PER_CRYPT * sizeof(cl_uint); memAllocSz = memAllocSz < get_max_mem_alloc_size(jtrUniqDevNo) ? memAllocSz : get_max_mem_alloc_size(jtrUniqDevNo) / 4 * 4; globalObj[jtrUniqDevNo].gpu_buffer.hmac_sha1_gpu = clCreateBuffer(context[jtrUniqDevNo], CL_MEM_READ_WRITE, memAllocSz, NULL, &err); if (globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu == (cl_mem)0) HANDLE_CLERROR(err, errMsg); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu), "Set Kernel 0 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 1, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.salt_gpu), "Set Kernel 0 Arg 1 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[0], 3, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 0 Arg 3 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[1], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.pass_gpu), "Set Kernel 1 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[1], 1, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 1 Arg 1 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[1], 2, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.hmac_sha1_gpu), "Set Kernel 1 Arg 2 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[2], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 2 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[3], 0, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.temp_buf_gpu), "Set Kernel 3 Arg 0 :FAILED"); HANDLE_CLERROR(clSetKernelArg(globalObj[jtrUniqDevNo].krnl[3], 1, sizeof(cl_mem), &globalObj[jtrUniqDevNo].gpu_buffer.hash_out_gpu), "Set Kernel 3 Arg 1 :FAILED"); if (!local_work_size) find_best_workgroup(jtrUniqDevNo, quick_bechmark(jtrUniqDevNo)); else { size_t maxsize, maxsize2; globalObj[jtrUniqDevNo].lws = local_work_size; // Obey limits HANDLE_CLERROR(clGetKernelWorkGroupInfo(globalObj[jtrUniqDevNo].krnl[0], devices[jtrUniqDevNo], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize), &maxsize, NULL), "Error querying max LWS"); HANDLE_CLERROR(clGetKernelWorkGroupInfo(globalObj[jtrUniqDevNo].krnl[1], devices[jtrUniqDevNo], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Error querying max LWS"); if (maxsize2 > maxsize) maxsize = maxsize2; HANDLE_CLERROR(clGetKernelWorkGroupInfo(globalObj[jtrUniqDevNo].krnl[2], devices[jtrUniqDevNo], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Error querying max LWS"); if (maxsize2 > maxsize) maxsize = maxsize2; HANDLE_CLERROR(clGetKernelWorkGroupInfo(globalObj[jtrUniqDevNo].krnl[3], devices[jtrUniqDevNo], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Error querying max LWS"); if (maxsize2 > maxsize) maxsize = maxsize2; while (globalObj[jtrUniqDevNo].lws > maxsize) globalObj[jtrUniqDevNo].lws /= 2; if (options.verbosity > 3) fprintf(stderr, "Local worksize (LWS) forced to "Zu"\n", globalObj[jtrUniqDevNo].lws); globalObj[jtrUniqDevNo].exec_time_inv = 1; } if (!global_work_size) find_best_gws(jtrUniqDevNo, fmt); else { if (options.verbosity > 3) fprintf(stderr, "Global worksize (GWS) forced to "Zu"\n", global_work_size); fmt -> params.max_keys_per_crypt = global_work_size; fmt -> params.min_keys_per_crypt = max_lws(); } return globalObj[jtrUniqDevNo].lws; }