//Allow me to have a configurable step size. static int get_step(size_t num, int step, int startup){ if (startup) { if (step == 0) return get_multiple(STEP, local_work_size); else return get_multiple(step, local_work_size); } if (step < 1) return num * 2; return num + step; }
/* ------- Salt functions ------- */ static void * get_salt(char *ciphertext) { static sha512_salt out; int len; out.rounds = ROUNDS_DEFAULT; ciphertext += 3; if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; char *endp; unsigned long int srounds = strtoul(num, &endp, 10); if (*endp == '$') { ciphertext = endp + 1; out.rounds = srounds < ROUNDS_MIN ? ROUNDS_MIN : srounds; out.rounds = srounds > ROUNDS_MAX ? ROUNDS_MAX : srounds; } } for (len = 0; ciphertext[len] != '$'; len++); //Assure buffer has no "trash data". memset(out.salt, '\0', SALT_LENGTH); len = (len > SALT_LENGTH ? SALT_LENGTH : len); //Put the tranfered salt on salt buffer. memcpy(out.salt, ciphertext, len); out.length = len; out.initial = get_multiple(out.rounds, HASH_LOOPS); return &out; }
static size_t get_default_workgroup(){ size_t max_available; max_available = get_task_max_work_group_size(); if (gpu_nvidia(device_info[ocl_gpu_id]) || (!cpu(device_info[ocl_gpu_id]) && fast_mode)) { global_work_size = get_multiple(global_work_size, max_available); return max_available; } else return get_safe_workgroup(); }
static char* getListValue(Ihandle *n) { static char *value = NULL; static int maxchar = 31; /* qtos caracteres podem ser armazenados */ int *pos, no; int multiple = iupStrEqualNoCase(get_multiple(n),IUP_YES); if ( !XmListGetSelectedPos( (Widget)handle(n), &pos, &no )) no = 0; if (value == NULL) value = (char *)malloc(maxchar+1); if (multiple) { int i, size = 0; XtVaGetValues((Widget)handle(n), XmNitemCount, &size, NULL); if (!size) return NULL; if (maxchar < size) { value = (char*)realloc( value, size+1 ); } maxchar = size; for (i=0; i<size; i++) value[i]='-'; value[size] = 0; for (i=0; i<no; i++) { value[pos[i]-1] = '+'; } } else { if (no==0) return NULL; sprintf(value, "%d", pos[0] ); } if (no) XtFree((XtPointer)pos); return value; }
/* ------- Initialization ------- */ static void init(struct fmt_main * self) { char * tmp_value; char * task = "$JOHN/sha256_kernel.cl"; opencl_init_dev(ocl_gpu_id, platform_id); source_in_use = device_info[ocl_gpu_id]; if ((tmp_value = getenv("_TYPE"))) source_in_use = atoi(tmp_value); opencl_build_kernel(task, ocl_gpu_id); // create kernel(s) to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); cmp_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_cmp", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_cmp. Double-check kernel name?"); global_work_size = get_task_max_size(); local_work_size = 0; if (source_in_use != device_info[ocl_gpu_id]) { device_info[ocl_gpu_id] = source_in_use; fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); if ((tmp_value = getenv("LWS"))) local_work_size = atoi(tmp_value); //Check if local_work_size is a valid number. if (local_work_size > get_task_max_work_group_size()){ fprintf(stderr, "Error: invalid local work size (LWS). Max value allowed is: %zd\n" , get_task_max_work_group_size()); local_work_size = 0; //Force find a valid number. } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) { local_work_size = get_task_max_work_group_size(); create_clobj(global_work_size, self); find_best_workgroup(self); release_clobj(); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(tmp_value); if ((tmp_value = getenv("GWS"))) global_work_size = atoi(tmp_value); //Check if a valid multiple is used. global_work_size = get_multiple(global_work_size, local_work_size); if (global_work_size) create_clobj(global_work_size, self); else { //user chose to die of boredom global_work_size = get_task_max_size(); find_best_gws(self); } fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n", (int) local_work_size, global_work_size); self->params.max_keys_per_crypt = global_work_size; }
/* -- This function could be used to calculated the best num of keys per crypt for the given format -- */ static void find_best_gws(struct fmt_main * self) { size_t num = 0; cl_ulong run_time, min_time = CL_ULONG_MAX; int optimal_gws = local_work_size, step = STEP; int do_benchmark = 0; unsigned int SHAspeed, bestSHAspeed = 0; unsigned long long int max_run_time = 1000000000ULL; char *tmp_value; if ((tmp_value = getenv("STEP"))){ step = atoi(tmp_value); do_benchmark = 1; } step = get_multiple(step, local_work_size); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, DUR_CONFIG))) max_run_time = atoi(tmp_value) * 1000000000UL; fprintf(stderr, "Calculating best global work size (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, max_run_time / 1000000000ULL); if (do_benchmark) fprintf(stderr, "Raw speed figures including buffer transfers:\n"); for (num = get_step(num, step, 1); num; num = get_step(num, step, 0)) { //Check if hardware can handle the size we are going to try now. if (sizeof(sha256_password) * num * 1.2 > get_max_mem_alloc_size(ocl_gpu_id)) break; if (! (run_time = gws_test(num, self))) continue; if (!do_benchmark) advance_cursor(); SHAspeed = num / (run_time / 1000000000.); if (run_time < min_time) min_time = run_time; if (do_benchmark) { fprintf(stderr, "gws: %8zu\t%12lu c/s %8.3f ms per crypt_all()", num, (long) (num / (run_time / 1000000000.)), (float) run_time / 1000000.); if (run_time > max_run_time) { fprintf(stderr, " - too slow\n"); break; } } else { if (run_time > min_time * 20 || run_time > max_run_time) break; } if (((long) SHAspeed - bestSHAspeed) > 10000) { if (do_benchmark) fprintf(stderr, "+"); bestSHAspeed = SHAspeed; optimal_gws = num; } if (do_benchmark) fprintf(stderr, "\n"); } fprintf(stderr, "Optimal global work size %d\n", optimal_gws); fprintf(stderr, "(to avoid this test on next run, put \"" GWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS SUBSECTION_OPENCL "])\n", optimal_gws); global_work_size = optimal_gws; create_clobj(optimal_gws, self); }
void increase_triangle_show(increase_triangle_t *increase_triangle) { increase_triangle_t *p_increase_triangle=increase_triangle; if(NULL==p_increase_triangle){ return; } int i=0; int layer_value_sum=0; int layers_value_sum=0; int all_value_sum=0; printf("\n"); printf("layers=[%d],base_value=[%d],multiple=[%d];\n",get_layers(p_increase_triangle),get_base_value(p_increase_triangle),get_multiple(p_increase_triangle)); show_layer_index(p_increase_triangle->layer_index); printf("\n"); for(i=0; i<p_increase_triangle->layers; i++){ layer_value_sum=get_layer_value_sum(p_increase_triangle,i+1); layers_value_sum=get_increase_triangle_layers_value_sum(p_increase_triangle,i+1); printf("layer=%-2d,layer_sum=[%-5d] 2*layer_sum=[%-5d];layers_sum=[%-5d],2*layer_sum-layers_num=[%-5d]\n",i+1,layer_value_sum,2*layer_value_sum,layers_value_sum,2*layer_value_sum-layers_value_sum); } printf("\n"); all_value_sum=get_increase_triangle_all_value_sum(p_increase_triangle); printf("all layers sum=[%-5d]\n", all_value_sum); printf("\n"); return; }
/* ------- Initialization ------- */ static void init(struct fmt_main * self) { char * tmp_value; char * task = "$JOHN/cryptsha512_kernel_DEFAULT.cl"; uint64_t startTime, runtime; opencl_init_dev(ocl_gpu_id, platform_id); startTime = (unsigned long) time(NULL); source_in_use = device_info[ocl_gpu_id]; if ((tmp_value = getenv("_TYPE"))) source_in_use = atoi(tmp_value); if ((tmp_value = getenv("_FAST"))) fast_mode = TRUE; if (use_local(source_in_use)) task = "$JOHN/cryptsha512_kernel_LOCAL.cl"; else if (gpu(source_in_use)) { fprintf(stderr, "Building the kernel, this could take a while\n"); task = "$JOHN/cryptsha512_kernel_GPU.cl"; } fflush(stdout); opencl_build_kernel(task, ocl_gpu_id); if ((runtime = (unsigned long) (time(NULL) - startTime)) > 2UL) fprintf(stderr, "Elapsed time: %lu seconds\n", runtime); fflush(stdout); // create kernel(s) to execute crypt_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_crypt", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?"); if (gpu(source_in_use) || use_local(source_in_use)) { prepare_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_prepare", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_prepare. Double-check kernel name?"); final_kernel = clCreateKernel(program[ocl_gpu_id], "kernel_final", &ret_code); HANDLE_CLERROR(ret_code, "Error creating kernel_final. Double-check kernel name?"); } global_work_size = get_task_max_size(); local_work_size = get_default_workgroup(); if (source_in_use != device_info[ocl_gpu_id]) fprintf(stderr, "Selected runtime id %d, source (%s)\n", source_in_use, task); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, LWS_CONFIG))) local_work_size = atoi(tmp_value); if ((tmp_value = getenv("LWS"))) local_work_size = atoi(tmp_value); //Check if local_work_size is a valid number. if (local_work_size > get_task_max_work_group_size()){ local_work_size = 0; //Force find a valid number. } self->params.max_keys_per_crypt = global_work_size; if (!local_work_size) { local_work_size = get_task_max_work_group_size(); create_clobj(global_work_size, self); find_best_workgroup(self); release_clobj(); } if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, GWS_CONFIG))) global_work_size = atoi(tmp_value); if ((tmp_value = getenv("GWS"))) global_work_size = atoi(tmp_value); //Check if a valid multiple is used. global_work_size = get_multiple(global_work_size, local_work_size); if (global_work_size) create_clobj(global_work_size, self); else { //user chose to die of boredom global_work_size = get_task_max_size(); find_best_gws(self); } fprintf(stderr, "Local work size (LWS) %d, global work size (GWS) %zd\n", (int) local_work_size, global_work_size); self->params.max_keys_per_crypt = global_work_size; }
//Do the proper test using different sizes. static cl_ulong gws_test(size_t num, struct fmt_main * self) { cl_event myEvent; cl_int ret_code; cl_uint *tmpbuffer; cl_ulong startTime, endTime, runtime; int i, loops; //Prepare buffers. create_clobj(num, self); tmpbuffer = mem_alloc(sizeof(sha512_hash) * num); if (tmpbuffer == NULL) { fprintf(stderr, "Malloc failure in find_best_gws\n"); exit(EXIT_FAILURE); } queue_prof = clCreateCommandQueue(context[ocl_gpu_id], devices[ocl_gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue"); // Set salt. set_salt(get_salt("$6$saltstring$")); salt->initial = salt->rounds - get_multiple(salt->rounds, HASH_LOOPS); // Set keys for (i = 0; i < num; i++) { set_key("aaabaabaaa", i); } //** Get execution time **// HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, salt_buffer, CL_FALSE, 0, sizeof(sha512_salt), salt, 0, NULL, &myEvent), "Failed in clEnqueueWriteBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime = endTime - startTime; //** Get execution time **// HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, pass_buffer, CL_FALSE, 0, sizeof(sha512_password) * num, plaintext, 0, NULL, &myEvent), "Failed in clEnqueueWriteBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; //** Get execution time **// if (gpu(source_in_use) || use_local(source_in_use)) { ret_code = clEnqueueNDRangeKernel(queue_prof, prepare_kernel, 1, NULL, &num, &local_work_size, 0, NULL, &myEvent); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; } loops = gpu(source_in_use) || use_local(source_in_use) ? (salt->rounds / HASH_LOOPS) : 1; //** Get execution time **// for (i = 0; i < loops; i++) { ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel, 1, NULL, &num, &local_work_size, 0, NULL, &myEvent); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; } //** Get execution time **// HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, hash_buffer, CL_FALSE, 0, sizeof(sha512_hash) * num, tmpbuffer, 0, NULL, &myEvent), "Failed in clEnqueueReadBuffer"); HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL), "Failed in clGetEventProfilingInfo I"); HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL), "Failed in clGetEventProfilingInfo II"); HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent"); runtime += endTime - startTime; MEM_FREE(tmpbuffer); HANDLE_CLERROR(clReleaseCommandQueue(queue_prof), "Failed in clReleaseCommandQueue"); release_clobj(); if (ret_code != CL_SUCCESS) { if (ret_code != CL_INVALID_WORK_GROUP_SIZE) fprintf(stderr, "Error %d\n", ret_code); return 0; } return runtime; }