static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { size_t scalar_gws; size_t *lws = local_work_size ? &local_work_size : NULL; global_work_size = local_work_size ? ((*pcount + (v_width * local_work_size - 1)) / (v_width * local_work_size)) * local_work_size : *pcount / v_width; scalar_gws = global_work_size * v_width; #if 0 fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" sgws "Zu" kpc %d/%d\n", __FUNCTION__, *pcount, local_work_size, global_work_size, scalar_gws, me->params.min_keys_per_crypt, me->params.max_keys_per_crypt); #endif /// Copy data to gpu BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, key_buf_size, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); /// Run kernels BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel"); BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel"); BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel"); /// Read the result back BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf2_out) * scalar_gws, output, 0, NULL, multi_profilingEvent[4]), "Copy result back"); return *pcount; }
static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { int count = *pcount; size_t *lws = local_work_size ? &local_work_size : NULL; BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, insize, host_pass, 0, NULL, multi_profilingEvent[0]), "Copy memin"); ///Run the init kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], init_kernel, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Set ND range"); ///Run split kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Set ND range"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Set ND range"); ///Run the finish kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], finish_kernel, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Set ND range"); BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, outsize, host_hash, 0, NULL, multi_profilingEvent[4]), "Copy data back"); return count; }
static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { int count = *pcount; size_t scalar_gws; size_t *lws = local_work_size ? &local_work_size : NULL; global_work_size = local_work_size ? ((count + (v_width * local_work_size - 1)) / (v_width * local_work_size)) * local_work_size : count / v_width; scalar_gws = global_work_size * v_width; #if 0 fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" sgws "Zu"\n", __FUNCTION__, *pcount, local_work_size, global_work_size, scalar_gws); #endif /// Run kernels, no iterations for fast enumeration BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, key_buf_size, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf1_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run final kernel"); BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf1_out) * scalar_gws, host_crack, 0, NULL, multi_profilingEvent[4]), "Copy result back"); return count; }
static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { int count = *pcount; size_t *lws = local_work_size ? &local_work_size : NULL; global_work_size = local_work_size ? (count + local_work_size - 1) / local_work_size * local_work_size : count; #if 0 printf("crypt_all_benchmark(%d)\n", count); printf("LWS = %d, GWS = %d\n",(int)local_work_size, (int)global_work_size); #endif /// Copy data to gpu BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0, NUUL, multi_profilingEvent[0]), "Copy data to gpu"); /// Run kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NUUL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run kernel"); BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run split kernel"); BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish"); BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run split kernel"); BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish"); /// Read the result back BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_FALSE, 0, global_work_size * sizeof(crack_t), host_crack, 0, NUUL, multi_profilingEvent[3]), "Copy result back"); /// Await completion of all the above BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish"); return count; }
static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { int count = *pcount; int i; global_work_size = (count + local_work_size - 1) / local_work_size * local_work_size; BENCH_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_in, CL_FALSE, 0, insize, host_pass, 0, NULL, &multi_profilingEvent[0]), "Copy memin"); BENCH_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt, CL_FALSE, 0, saltsize, host_salt, 0, NULL, &multi_profilingEvent[1]), "Copy memsalt"); ///Run the init kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], init_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &multi_profilingEvent[2]), "Set ND range"); ///Run split kernel for(i = 0; i < 3; i++) { BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &multi_profilingEvent[split_events[i]]), "Set ND range"); //3, 4, 5 BENCH_CLERROR(clFinish(queue[ocl_gpu_id]), "Error running loop kernel"); opencl_process_event(); } ///Run the finish kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], finish_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &multi_profilingEvent[6]), "Set ND range"); BENCH_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_FALSE, 0, outsize, host_hash, 0, NULL, &multi_profilingEvent[7]), "Copy data back"); ///Await completion of all the above BENCH_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish error"); return count; }
static int crypt_all_benchmark(int *pcount, struct db_salt *salt) { int count = *pcount; size_t *lws = local_work_size ? &local_work_size : NULL; global_work_size = local_work_size ? (count + local_work_size - 1) / local_work_size * local_work_size : count; // Copy data to gpu BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, insize, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); // Run 1st kernels BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], sevenzip_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run init kernel"); // Warm-up run BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel"); // Loop kernel BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel"); // Read the result back BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, outsize, outbuffer, 0, NULL, multi_profilingEvent[3]), "Copy result back"); BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); return count; }