/* Our window with cursor has got some input key_cnt == -1 means no input has been taking place at this cursor position key_cnt >= 0 means the same key has been pressed one or more times here */ void win_cursor_input(int new_key){ char c; if (new_key == CURSOR_LEFT){ dec_cursor(); key_cnt = -1; last_key = -1; timer_stop(&char_tmr); return; }; if (new_key == CURSOR_RIGHT){ advance_cursor(); key_cnt = -1; last_key = -1; timer_stop(&char_tmr); return; }; /* A bit tricky. It makes sense to implement this as a kind of delete, i.e. delete the character under the cursor. This only fails if the cursor is at end of text. Then delete the last character. */ if (new_key == CURSOR_BACKSPACE){ key_cnt = -1; last_key = -1; timer_stop(&char_tmr); del_char(pcursor_win, cursor_pos); cursor_pos = min(cursor_pos, pcursor_win->text_len); return; }; timer_set(&char_tmr, WAIT_KEY_TIME, 0); // start auto advance timer /* This is the first time a key is pressed here */ if (key_cnt < 0){ key_cnt = 0; c = key2char(new_key, 0); store_char(pcursor_win, cursor_pos, c); } else { /* We already entered text at this cursor position */ if (new_key == last_key){ // User pressed the same key twice before a time out occured key_cnt++; c = key2char(new_key, key_cnt); store_char(pcursor_win, cursor_pos, c); } else { // User pressed a different key, advance to the next cursor position, store new char key_cnt = 0; c = key2char(new_key, 0); advance_cursor(); store_char(pcursor_win, cursor_pos, c); }; }; last_key = new_key; };
// around 700us - 800us per character uint8_t LCD_print_char(uint8_t value) { if(current_lcd_state[printing_lcd_x][printing_lcd_y] != value) { if(current_lcd_x != printing_lcd_x || current_lcd_y != printing_lcd_y) { LCD_moveTo(printing_lcd_x, printing_lcd_y); } send_byte(value, LCD_SEND_Rs); current_lcd_state[current_lcd_x][current_lcd_y] = value; advance_cursor(¤t_lcd_x, ¤t_lcd_y); } advance_cursor(&printing_lcd_x, &printing_lcd_y); return 1; }
void psystem_weight_srcs(double weights[]) { intptr_t i; for(i = 0; i < psys.nsrcs; i++) { advance_cursor(i); /* treat two types the same essentially because they are both Ngamma_sec*/ if(psys.srcs[i].type == PSYS_SRC_POINT) { weights[i] = psys_Ngamma_dot(i) * (psys.tick - psys.srcs[i].lastemit) * psys.tick_time; } else if(psys.srcs[i].type == PSYS_SRC_PLANE) { weights[i] = psys_Ngamma_dot(i) * (psys.tick - psys.srcs[i].lastemit) * psys.tick_time; } } }
static void find_best_gws(int do_benchmark, struct fmt_main *self) { int num; cl_ulong run_time, min_time = CL_ULONG_MAX; unsigned int SHAspeed, bestSHAspeed = 0, max_gws; int optimal_gws = local_work_size; const int sha1perkey = 50004; unsigned long long int MaxRunTime = 5000000000ULL; max_gws = get_max_mem_alloc_size(ocl_gpu_id) / (UNICODE_LENGTH * VF); if (do_benchmark) { fprintf(stderr, "Calculating best keys per crypt (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, MaxRunTime / 1000000000UL); fprintf(stderr, "Raw GPU speed figures including buffer transfers:\n"); } for (num = local_work_size; max_gws; num *= 2) { if (!do_benchmark) advance_cursor(); if (!(run_time = gws_test(num, do_benchmark, self))) break; SHAspeed = sha1perkey * (1000000000UL * VF * num / run_time); if (run_time < min_time) min_time = run_time; if (do_benchmark) fprintf(stderr, "gws %6d%8llu c/s%14u sha1/s%8.3f sec per crypt_all()", num, (1000000000ULL * VF * num / run_time), SHAspeed, (float)run_time / 1000000000.); if (((float)run_time / (float)min_time) < ((float)SHAspeed / (float)bestSHAspeed)) { if (do_benchmark) fprintf(stderr, "!\n"); bestSHAspeed = SHAspeed; optimal_gws = num; } else { if (run_time < MaxRunTime && SHAspeed > (bestSHAspeed * 1.01)) { if (do_benchmark) fprintf(stderr, "+\n"); bestSHAspeed = SHAspeed; optimal_gws = num; continue; } if (do_benchmark) fprintf(stderr, "\n"); if (run_time >= MaxRunTime) break; } } global_work_size = optimal_gws; }
static void find_best_kpc(void){ int num; cl_event myEvent; cl_ulong startTime, endTime, tmpTime; int kernelExecTimeNs = 6969; cl_int ret_code; int optimal_kpc=2048; int i = 0; cl_uint *tmpbuffer; printf("Calculating best keys per crypt, this will take a while "); for( num=SSHA_NUM_KEYS; num > 4096 ; num -= 16384){ release_clobj(); create_clobj(num); advance_cursor(); queue_prof = clCreateCommandQueue( context[gpu_id], devices[gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); for (i=0; i < num; i++){ memcpy(&(saved_plain[i*PLAINTEXT_LENGTH]),"abacaeaf",PLAINTEXT_LENGTH); } clEnqueueWriteBuffer(queue_prof, mysalt, CL_TRUE, 0, SALT_SIZE, saved_salt, 0, NULL, NULL); clEnqueueWriteBuffer(queue_prof, buffer_keys, CL_TRUE, 0, (PLAINTEXT_LENGTH) * num, saved_plain, 0, NULL, NULL); ret_code = clEnqueueNDRangeKernel( queue_prof, crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &myEvent); if(ret_code != CL_SUCCESS){ printf("Error %d\n",ret_code); continue; } clFinish(queue_prof); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = endTime-startTime; tmpbuffer = malloc(sizeof(cl_uint) * num); clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0, sizeof(cl_uint) * num, tmpbuffer, 0, NULL, &myEvent); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = tmpTime + (endTime-startTime); if( ((int)( ((float) (tmpTime) / num) * 10 )) <= kernelExecTimeNs) { kernelExecTimeNs = ((int) (((float) (tmpTime) / num) * 10) ) ; optimal_kpc = num; } free(tmpbuffer); clReleaseCommandQueue(queue_prof); } printf("Optimal keys per crypt %d\n(to avoid this test on next run, put \"" KPC_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS SUBSECTION_OPENCL "])\n", optimal_kpc, optimal_kpc); max_keys_per_crypt = optimal_kpc; release_clobj(); create_clobj(optimal_kpc); }
static void find_best_kpc(void){ int num; cl_event myEvent; cl_ulong startTime, endTime, tmpTime; int kernelExecTimeNs = 6969; cl_int ret_code; int optimal_kpc=2048; int i = 0; cl_uint *tmpbuffer; fprintf(stderr, "Calculating best keys per crypt, this will take a while "); for( num=MD4_NUM_KEYS; num > 4096 ; num -= 4096){ release_clobj(); create_clobj(num); advance_cursor(); queue_prof = clCreateCommandQueue( context[ocl_gpu_id], devices[ocl_gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); for (i=0; i < num; i++){ memcpy(&(saved_plain[i * (PLAINTEXT_LENGTH + 1)]), "abcaaeaf", PLAINTEXT_LENGTH + 1); saved_plain[i * (PLAINTEXT_LENGTH + 1) + 8] = 0x80; } clEnqueueWriteBuffer(queue_prof, data_info, CL_TRUE, 0, sizeof(unsigned int)*2, datai, 0, NULL, NULL); clEnqueueWriteBuffer(queue_prof, buffer_keys, CL_TRUE, 0, (PLAINTEXT_LENGTH + 1) * num, saved_plain, 0, NULL, NULL); ret_code = clEnqueueNDRangeKernel( queue_prof, crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &myEvent); if(ret_code != CL_SUCCESS) { HANDLE_CLERROR(ret_code, "Error running kernel in find_best_KPC()"); continue; } clFinish(queue_prof); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = endTime-startTime; tmpbuffer = malloc(sizeof(cl_uint) * num); clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0, sizeof(cl_uint) * num, tmpbuffer, 0, NULL, &myEvent); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = tmpTime + (endTime-startTime); if( ((int)( ((float) (tmpTime) / num) * 10 )) <= kernelExecTimeNs) { kernelExecTimeNs = ((int) (((float) (tmpTime) / num) * 10) ) ; optimal_kpc = num; } free(tmpbuffer); clReleaseCommandQueue(queue_prof); } fprintf(stderr, "Optimal keys per crypt %d\n(to avoid this test on next run do \"export GWS=%d\")\n",optimal_kpc,optimal_kpc); max_keys_per_crypt = optimal_kpc; release_clobj(); create_clobj(optimal_kpc); }
static void find_best_kpc(void){ int num; cl_event myEvent; cl_ulong startTime, endTime, tmpTime; int kernelExecTimeNs = INT_MAX; cl_int ret_code; int optimal_kpc=2048; int i = 0; cl_uint *tmpbuffer; fprintf(stderr, "Calculating best keys per crypt, this will take a while "); for( num=MAX_KEYS_PER_CRYPT; num >= 4096 ; num -= 4096){ release_clobj(); create_clobj(num); advance_cursor(); queue_prof = clCreateCommandQueue( context[ocl_gpu_id], devices[ocl_gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); for (i=0; i < num; i++){ strcpy(&(saved_plain[i * keybuf_size]), tests[0].plaintext); } clEnqueueWriteBuffer(queue_prof, buffer_keys, CL_TRUE, 0, keybuf_size * num, saved_plain, 0, NULL, NULL); ret_code = clEnqueueNDRangeKernel( queue_prof, crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &myEvent); if(ret_code != CL_SUCCESS){ fprintf(stderr, "Error %d\n",ret_code); continue; } clFinish(queue_prof); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = endTime-startTime; tmpbuffer = mem_alloc(sizeof(cl_uint) * num); clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0, sizeof(cl_uint) * num, tmpbuffer, 0, NULL, &myEvent); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = tmpTime + (endTime-startTime); if( ((int)( ((float) (tmpTime) / num) * 10 )) <= kernelExecTimeNs) { kernelExecTimeNs = ((int) (((float) (tmpTime) / num) * 10) ) ; optimal_kpc = num; } MEM_FREE(tmpbuffer); clReleaseCommandQueue(queue_prof); } fprintf(stderr, "Optimal keys per crypt %d\n(to avoid this test on next run do export GWS=%d)\n",optimal_kpc,optimal_kpc); global_work_size = optimal_kpc; release_clobj(); create_clobj(optimal_kpc); }
static void find_best_gws(int do_benchmark, struct fmt_main *self) { cl_event myEvent; cl_ulong startTime, endTime, tmpTime; int kernelExecTimeNs = INT_MAX; cl_int ret_code; int optimal_kpc=2048; int gws, i = 0; cl_uint *tmpbuffer; for(gws = local_work_size << 2; gws <= 8*1024*1024; gws <<= 1) { create_clobj(gws, self); advance_cursor(); queue_prof = clCreateCommandQueue( context[ocl_gpu_id], devices[ocl_gpu_id], CL_QUEUE_PROFILING_ENABLE, &ret_code); for (i=0; i < gws; i++) { memcpy(&(saved_plain[i*PLAINTEXT_LENGTH]),"abacaeaf",PLAINTEXT_LENGTH); } clEnqueueWriteBuffer(queue_prof, mysalt, CL_TRUE, 0, SALT_SIZE, saved_salt, 0, NULL, NULL); clEnqueueWriteBuffer(queue_prof, buffer_keys, CL_TRUE, 0, (PLAINTEXT_LENGTH) * gws, saved_plain, 0, NULL, NULL); ret_code = clEnqueueNDRangeKernel( queue_prof, crypt_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &myEvent); if(ret_code != CL_SUCCESS) { // We hit some resource limit so we end here. release_clobj(); break; } clFinish(queue_prof); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = endTime-startTime; tmpbuffer = malloc(sizeof(cl_uint) * gws); clEnqueueReadBuffer(queue_prof, buffer_out, CL_TRUE, 0, sizeof(cl_uint) * gws, tmpbuffer, 0, NULL, &myEvent); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endTime , NULL); tmpTime = tmpTime + (endTime-startTime); if (do_benchmark) fprintf(stderr, "%10d %10llu c/s %-.2f us\n", gws, gws * 1000000000ULL / tmpTime, tmpTime / 1000.0); if( ((int)( ((float) (tmpTime) / gws) * 10 )) <= kernelExecTimeNs) { kernelExecTimeNs = ((int) (((float) (tmpTime) / gws) * 10) ) ; optimal_kpc = gws; } MEM_FREE(tmpbuffer); clReleaseCommandQueue(queue_prof); release_clobj(); } global_work_size = optimal_kpc; }
/* -- This function could be used to calculated the best num of keys per crypt for the given format -- */ static void find_best_gws(struct fmt_main * self) { size_t num = 0; cl_ulong run_time, min_time = CL_ULONG_MAX; int optimal_gws = local_work_size, step = STEP; int do_benchmark = 0; unsigned int SHAspeed, bestSHAspeed = 0; unsigned long long int max_run_time = 1000000000ULL; char *tmp_value; if ((tmp_value = getenv("STEP"))){ step = atoi(tmp_value); do_benchmark = 1; } step = get_multiple(step, local_work_size); if ((tmp_value = cfg_get_param(SECTION_OPTIONS, SUBSECTION_OPENCL, DUR_CONFIG))) max_run_time = atoi(tmp_value) * 1000000000UL; fprintf(stderr, "Calculating best global work size (GWS) for LWS=%zd and max. %llu s duration.\n\n", local_work_size, max_run_time / 1000000000ULL); if (do_benchmark) fprintf(stderr, "Raw speed figures including buffer transfers:\n"); for (num = get_step(num, step, 1); num; num = get_step(num, step, 0)) { //Check if hardware can handle the size we are going to try now. if (sizeof(sha256_password) * num * 1.2 > get_max_mem_alloc_size(ocl_gpu_id)) break; if (! (run_time = gws_test(num, self))) continue; if (!do_benchmark) advance_cursor(); SHAspeed = num / (run_time / 1000000000.); if (run_time < min_time) min_time = run_time; if (do_benchmark) { fprintf(stderr, "gws: %8zu\t%12lu c/s %8.3f ms per crypt_all()", num, (long) (num / (run_time / 1000000000.)), (float) run_time / 1000000.); if (run_time > max_run_time) { fprintf(stderr, " - too slow\n"); break; } } else { if (run_time > min_time * 20 || run_time > max_run_time) break; } if (((long) SHAspeed - bestSHAspeed) > 10000) { if (do_benchmark) fprintf(stderr, "+"); bestSHAspeed = SHAspeed; optimal_gws = num; } if (do_benchmark) fprintf(stderr, "\n"); } fprintf(stderr, "Optimal global work size %d\n", optimal_gws); fprintf(stderr, "(to avoid this test on next run, put \"" GWS_CONFIG " = %d\" in john.conf, section [" SECTION_OPTIONS SUBSECTION_OPENCL "])\n", optimal_gws); global_work_size = optimal_gws; create_clobj(optimal_gws, self); }