static void find_best_gws(int do_benchmark, struct fmt_main *self) { int num; cl_ulong run_time, min_time = CL_ULONG_MAX; unsigned int SHAspeed, bestSHAspeed = 0, max_gws; int optimal_gws = local_work_size; const int sha1perkey = 50004; unsigned long long int MaxRunTime = 5000000000ULL; max_gws = get_max_mem_alloc_size(ocl_gpu_id) / (UNICODE_LENGTH * VF); if (do_benchmark) { fprintf(stderr, "Calculating best keys per crypt (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, MaxRunTime / 1000000000UL); fprintf(stderr, "Raw GPU speed figures including buffer transfers:\n"); } for (num = local_work_size; max_gws; num *= 2) { if (!do_benchmark) advance_cursor(); if (!(run_time = gws_test(num, do_benchmark, self))) break; SHAspeed = sha1perkey * (1000000000UL * VF * num / run_time); if (run_time < min_time) min_time = run_time; if (do_benchmark) fprintf(stderr, "gws %6d%8llu c/s%14u sha1/s%8.3f sec per crypt_all()", num, (1000000000ULL * VF * num / run_time), SHAspeed, (float)run_time / 1000000000.); if (((float)run_time / (float)min_time) < ((float)SHAspeed / (float)bestSHAspeed)) { if (do_benchmark) fprintf(stderr, "!\n"); bestSHAspeed = SHAspeed; optimal_gws = num; } else { if (run_time < MaxRunTime && SHAspeed > (bestSHAspeed * 1.01)) { if (do_benchmark) fprintf(stderr, "+\n"); bestSHAspeed = SHAspeed; optimal_gws = num; continue; } if (do_benchmark) fprintf(stderr, "\n"); if (run_time >= MaxRunTime) break; } } global_work_size = optimal_gws; }
/* -- This function could be used to calculated the best num of keys per crypt for the given format -- */ static void find_best_gws(struct fmt_main * self) { size_t num = 0; cl_ulong run_time, min_time = CL_ULONG_MAX; int optimal_gws = local_work_size, step = STEP; int do_benchmark = 0; unsigned int SHAspeed, bestSHAspeed = 0; unsigned long long int max_run_time = 1000000000ULL; char *tmp_value; if ((tmp_value = getenv("STEP"))){ step = atoi(tmp_value); do_benchmark = 1; } step = get_multiple(step, local_work_size); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, DUR_CONFIG))) max_run_time = atoi(tmp_value) * 1000000000UL; fprintf(stderr, "Calculating best global work size (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, max_run_time / 1000000000ULL); if (do_benchmark) fprintf(stderr, "Raw speed figures including buffer transfers:\n"); for (num = get_step(num, step, 1); num; num = get_step(num, step, 0)) { //Check if hardware can handle the size we are going to try now. if (sizeof(sha256_password) * num * 1.2 > get_max_mem_alloc_size(ocl_gpu_id)) break; if (! (run_time = gws_test(num, self))) continue; if (!do_benchmark) advance_cursor(); SHAspeed = num / (run_time / 1000000000.); if (run_time < min_time) min_time = run_time; if (do_benchmark) { fprintf(stderr, "gws: %8zu\t%12lu c/s %8.3f ms per crypt_all()", num, (long) (num / (run_time / 1000000000.)), (float) run_time / 1000000.); if (run_time > max_run_time) { fprintf(stderr, " - too slow\n"); break; } } else { if (run_time > min_time * 20 || run_time > max_run_time) break; } if (((long) SHAspeed - bestSHAspeed) > 10000) { if (do_benchmark) fprintf(stderr, "+"); bestSHAspeed = SHAspeed; optimal_gws = num; } if (do_benchmark) fprintf(stderr, "\n"); } fprintf(stderr, "Optimal global work size %d\n", optimal_gws); fprintf(stderr, "(to avoid this test on next run, put \"" GWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS SUBSECTION_OPENCL "])\n", optimal_gws); global_work_size = optimal_gws; create_clobj(optimal_gws, self); }
static void find_best_gws(int do_benchmark) { int num; cl_ulong run_time, min_time = CL_ULONG_MAX; unsigned int SHAspeed, bestSHAspeed = 0; int optimal_gws = local_work_size; const int sha1perkey = (strlen(rar_fmt.params.tests[0].plaintext) * 2 + 8 + 3) * 0x40000 / 64 + 16; char *conf; unsigned long long int MaxRunTime = 5000000000ULL; if ((conf = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, "rar_MaxDuration"))) MaxRunTime = atoi(conf) * 1000000000UL; #ifndef DEBUG if (do_benchmark) #endif { fprintf(stderr, "Calculating best keys per crypt (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, MaxRunTime / 1000000000UL); fprintf(stderr, "Raw GPU speed figures including buffer transfers:\n"); } for (num = local_work_size; num; num *= 2) { if (!(run_time = gws_test(num))) break; SHAspeed = sha1perkey * (1000000000UL * num * VF / run_time); if (run_time < min_time) min_time = run_time; #ifndef DEBUG if (do_benchmark) #endif fprintf(stderr, "gws %6d\t%4llu c/s%14u sha1/s%8.3f sec per crypt_all()", num, (1000000000ULL * num * VF / run_time), SHAspeed, (float)run_time / 1000000000.); if (((float)run_time / (float)min_time) < ((float)SHAspeed / (float)bestSHAspeed)) { #ifndef DEBUG if (do_benchmark) #endif fprintf(stderr, "!\n"); bestSHAspeed = SHAspeed; optimal_gws = num; } else { if (run_time > MaxRunTime) { #ifndef DEBUG if (do_benchmark) #endif fprintf(stderr, "\n"); break; } if (SHAspeed > bestSHAspeed) { #ifndef DEBUG if (do_benchmark) #endif fprintf(stderr, "+"); bestSHAspeed = SHAspeed; optimal_gws = num; } #ifndef DEBUG if (do_benchmark) #endif fprintf(stderr, "\n"); } } if (get_device_type(ocl_gpu_id) != CL_DEVICE_TYPE_CPU) { fprintf(stderr, "Optimal keys per crypt %d\n",(int)optimal_gws); fprintf(stderr, "(to avoid this test on next run, put \"" GWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS SUBSECTION_OPENCL "])\n", (int)optimal_gws); } *mkpc = VF * (global_work_size = optimal_gws); }