// system interrupt handler extern void sys_handler(void) { unsigned st_status = AT91C_BASE_ST->ST_SR & AT91C_BASE_ST->ST_IMR; unsigned rtc_status = AT91C_BASE_RTC->RTC_SR & AT91C_BASE_RTC->RTC_IMR; unsigned dbgu_status = AT91C_BASE_DBGU->DBGU_CSR & AT91C_BASE_DBGU->DBGU_IMR; static unsigned char counter = '0'; if (dbgu_status & AT91C_US_RXRDY) { // disable RXRDY interrupt in DBGU AT91C_BASE_DBGU->DBGU_IDR |= AT91C_US_RXRDY; // disable rtt interrupt flag AT91C_BASE_ST->ST_IDR = AT91C_ST_RTTINC; if (wait_status == 1) { wait_status = 0; } else { transfer_size = dbgu_xmod_recv((void *)LINUX_BASE_ADDRESS); run_kernel(); } } // handler of rtt - rttinc if (st_status & AT91C_ST_RTTINC) { AT91C_BASE_PIOB->PIO_ODSR ^= AT91C_PIO_PB27; if (wait_status == 1) { put_char(counter); counter++; if (counter == '6') run_kernel(); put_char(' '); } else put_char('C'); } }
bool check(misc::runner const & i_runner, host::generic_program i_program) { chrono::steady_clock::time_point tp = chrono::steady_clock::now(); host::buffer<pfm::int_> bufWrite(i_runner.m_context, item_count); typedef host::buffer<pfm::int_>::const_iterator iterator; i_runner.m_queue( run_kernel(i_program, fill_index(bufWrite), item_count)); i_runner.m_queue( run_kernel(i_program, twice(bufWrite), item_count)); auto future = i_runner.m_queue( bufWrite.with_range( [](iterator i_begin, iterator i_end) { return std::accumulate(i_begin, i_end, 0); })); std::future_status result = future.wait_until(tp + chrono::seconds(5)); assert(result == std::future_status::ready); assert(future.get() == arith(2, item_count)); return true; }
END_TEST START_TEST (test_builtins) { uint32_t rs = run_kernel(builtins_source, NormalKind); const char *errstr = 0; switch (rs) { case 1: errstr = "float2 cos(float2) doesn't behave correctly"; break; case 2: errstr = "float cos(float) doesn't behave correctly"; break; case 3: errstr = "float copysign(float) doesn't behave correctly"; break; case 4: errstr = "float2 copysign(float2) doesn't behave correctly"; break; case 5: errstr = "exp2() doesn't behave correctly"; break; default: errstr = default_error(rs); } fail_if( errstr != 0, errstr ); }
END_TEST START_TEST (test_image) { uint32_t rs = run_kernel(image_source, ImageKind); const char *errstr = 0; switch (rs) { case 1: errstr = "Image1 must have width of 4"; break; case 2: errstr = "Image1 must have width of 4"; break; case 3: errstr = "Image2 must have type SIGNED_FLOAT16"; break; case 4: errstr = "Image2 must have channel order RGBA"; break; case 5: errstr = "The value read from the image is not good"; break; default: errstr = default_error(rs); } fail_if( errstr != 0, errstr ); }
void process_cfm_by_gpu(unsigned char *pDataDst, int nDstWidth, int nDstHeight, short *pSrcData, int nSrcWidth, int nSrcHeight) { int i,j; int m,n; #ifdef OPENCL_MU1 LOGD("MU1 ---------------------- input start"); set_input_i_to_kernel(); set_input_o_to_kernel(); LOGD("MU1 ---------------------- run kernel"); run_kernel(); LOGD("MU1 ---------------------- get output"); get_output_from_kernel(); LOGD("MU1 ---------------------- end"); #else LOGD("MU1 ---------------------- start C"); for(i=0; i<512; i++) { for(j=0; j<1024; j++) { table_o[i][j] = (table_i[i][j]+3)*(table_q[i][j]+3)*(table_i[i][j]+2)*(table_q[i][j]+2)*(table_i[i][j]+1)*(table_q[i][j]+1)*(table_i[i][j])*(table_q[i][j])*(table_i[i][j]+3)*(table_q[i][j]+3)*(table_i[i][j]+2)*(table_q[i][j]+2)*(table_i[i][j]+1)*(table_q[i][j]+1)*(table_i[i][j])*(table_q[i][j]); table_o[i][j] +=sin((100.0)/(table_i[i][j]+table_q[i][j]))*1000; table_o[i][j] += sqrt(table_i[i][j]) + sqrt(table_q[i][j]) + sqrt(table_i[i][j]+table_q[i][j]); } } LOGD("MU1 ---------------------- end"); #endif }
END_TEST START_TEST (test_barrier) { uint32_t rs = run_kernel(barrier_source, BarrierKind); fail_if( rs != 0x40, default_error(rs) ); }
bool check(misc::runner const & i_runner, host::generic_program i_program) { chrono::steady_clock::time_point tp = chrono::steady_clock::now(); host::buffer<pfm::int_> bufWrite(i_runner.m_context, item_count); typedef host::buffer<pfm::int_>::const_iterator iterator; // kernel内で使用できる事の確認 i_runner.m_queue( run_kernel( i_program, fill_index(bufWrite), item_count)); auto future = i_runner.m_queue( bufWrite.with_range( [](iterator i_begin, iterator i_end){ return std::accumulate(i_begin, i_end, 0); })); std::future_status result = future.wait_until(tp + chrono::seconds(5)); assert(result == std::future_status::ready); assert(future.get() == arith(2, item_count)); // ホストから呼べない事の確認 try { int const a = 1; //gcc-4.7.2 twice(1)と書くと内部エラー i_runner.m_queue( host::run_kernel( i_program, twice(a), 1) ); assert(false); } catch (cl::Error err) { assert(err.err() == CL_INVALID_KERNEL_NAME); } return true; }
static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) { hashconfig_t *hashconfig = hashcat_ctx->hashconfig; hashes_t *hashes = hashcat_ctx->hashes; status_ctx_t *status_ctx = hashcat_ctx->status_ctx; user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra; cl_int CL_err; int CL_rc; if (hashconfig->st_hash == NULL) return 0; // init : replace hashes with selftest hash device_param->kernel_params[15] = &device_param->d_st_digests_buf; device_param->kernel_params[17] = &device_param->d_st_salts_buf; device_param->kernel_params[18] = &device_param->d_st_esalts_buf; device_param->kernel_params_buf32[31] = 1; device_param->kernel_params_buf32[32] = 0; // password : move the known password into a fake buffer u32 highest_pw_len = 0; if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { device_param->kernel_params_buf32[30] = 1; pw_t pw; memset (&pw, 0, sizeof (pw)); char *pw_ptr = (char *) &pw.i; const size_t pw_len = strlen (hashconfig->st_pass); memcpy (pw_ptr, hashconfig->st_pass, pw_len); pw.pw_len = (u32) pw_len; if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) pw_ptr, pw.pw_len); } CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { device_param->kernel_params_buf32[30] = 1; device_param->kernel_params_buf32[33] = COMBINATOR_MODE_BASE_LEFT; pw_t pw; memset (&pw, 0, sizeof (pw)); char *pw_ptr = (char *) &pw.i; const size_t pw_len = strlen (hashconfig->st_pass); memcpy (pw_ptr, hashconfig->st_pass, pw_len - 1); pw.pw_len = (u32) pw_len - 1; if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) pw_ptr, pw.pw_len); } pw_t comb; memset (&comb, 0, sizeof (comb)); char *comb_ptr = (char *) &comb.i; memcpy (comb_ptr, hashconfig->st_pass + pw_len - 1, 1); comb.pw_len = 1; if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) comb_ptr, comb.pw_len); } if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) { comb_ptr[comb.pw_len] = 0x01; } if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) { comb_ptr[comb.pw_len] = 0x80; } CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { device_param->kernel_params_buf32[30] = 1; if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) { pw_t pw; memset (&pw, 0, sizeof (pw)); char *pw_ptr = (char *) &pw.i; const size_t pw_len = strlen (hashconfig->st_pass); memcpy (pw_ptr, hashconfig->st_pass, pw_len); if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) pw_ptr, pw_len); } pw.pw_len = (u32) pw_len; CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; } else { bf_t bf; memset (&bf, 0, sizeof (bf)); char *bf_ptr = (char *) &bf.i; memcpy (bf_ptr, hashconfig->st_pass, 1); if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16LE) { memset (bf_ptr, 0, 4); for (int i = 0, j = 0; i < 1; i += 1, j += 2) { bf_ptr[j + 0] = hashconfig->st_pass[i]; bf_ptr[j + 1] = 0; } } else if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16BE) { memset (bf_ptr, 0, 4); for (int i = 0, j = 0; i < 1; i += 1, j += 2) { bf_ptr[j + 0] = 0; bf_ptr[j + 1] = hashconfig->st_pass[i]; } } if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) bf_ptr, 4); } if (hashconfig->opts_type & OPTS_TYPE_PT_GENERATE_BE) { bf.i = byte_swap_32 (bf.i); } CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; pw_t pw; memset (&pw, 0, sizeof (pw)); char *pw_ptr = (char *) &pw.i; const size_t pw_len = strlen (hashconfig->st_pass); memcpy (pw_ptr + 1, hashconfig->st_pass + 1, pw_len - 1); size_t new_pass_len = pw_len; if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16LE) { memset (pw_ptr, 0, pw_len); for (size_t i = 1, j = 2; i < new_pass_len; i += 1, j += 2) { pw_ptr[j + 0] = hashconfig->st_pass[i]; pw_ptr[j + 1] = 0; } new_pass_len *= 2; } else if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16BE) { memset (pw_ptr, 0, pw_len); for (size_t i = 1, j = 2; i < new_pass_len; i += 1, j += 2) { pw_ptr[j + 0] = 0; pw_ptr[j + 1] = hashconfig->st_pass[i]; } new_pass_len *= 2; } if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) { uppercase ((u8 *) pw_ptr, new_pass_len); } if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) { if (hashconfig->opti_type & OPTI_TYPE_APPENDED_SALT) { memcpy (pw_ptr + new_pass_len, (char *) hashes->st_salts_buf[0].salt_buf, 64 - new_pass_len); new_pass_len += hashes->st_salts_buf[0].salt_len; } } pw.pw_len = (u32) new_pass_len; if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) { pw_ptr[new_pass_len] = 0x01; } if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) { pw_ptr[new_pass_len] = 0x80; } if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) { pw.i[14] = (u32) new_pass_len * 8; pw.i[15] = 0; } if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) { pw.i[14] = 0; pw.i[15] = (u32) new_pass_len * 8; } if (hashconfig->opts_type & OPTS_TYPE_PT_GENERATE_BE) { for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]); } CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; highest_pw_len = pw.pw_len; } } } else { pw_t pw; memset (&pw, 0, sizeof (pw)); char *pw_ptr = (char *) &pw.i; const size_t pw_len = strlen (hashconfig->st_pass); memcpy (pw_ptr, hashconfig->st_pass, pw_len); pw.pw_len = (u32) pw_len; CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; } // main : run the kernel if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) { if (highest_pw_len < 16) { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 1, false, 0); if (CL_rc == -1) return -1; } else if (highest_pw_len < 32) { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 1, false, 0); if (CL_rc == -1) return -1; } else { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_3, 1, false, 0); if (CL_rc == -1) return -1; } } else { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_4, 1, false, 0); if (CL_rc == -1) return -1; } } else { // missing handling hooks CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 1, false, 0); if (CL_rc == -1) return -1; if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_12, 1, false, 0); if (CL_rc == -1) return -1; CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); if (CL_rc == -1) return -1; // do something with data CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); if (CL_rc == -1) return -1; } const u32 salt_pos = 0; salt_t *salt_buf = &hashes->st_salts_buf[salt_pos]; const u32 kernel_loops_fixed = hashconfig_get_kernel_loops (hashcat_ctx); const u32 loop_step = (kernel_loops_fixed) ? kernel_loops_fixed : 1; const u32 iter = salt_buf->salt_iter; for (u32 loop_pos = 0; loop_pos < iter; loop_pos += loop_step) { u32 loop_left = iter - loop_pos; loop_left = MIN (loop_left, loop_step); device_param->kernel_params_buf32[28] = loop_pos; device_param->kernel_params_buf32[29] = loop_left; CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 1, false, 0); if (CL_rc == -1) return -1; } if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_23, 1, false, 0); if (CL_rc == -1) return -1; CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); if (CL_rc == -1) return -1; /* * The following section depends on the hash mode */ switch (hashconfig->hash_mode) { // for 7z we only need device_param->hooks_buf, but other hooks could use any info from device_param. All of them should/must update hooks_buf case 11600: seven_zip_hook_func (device_param, hashes->st_hook_salts_buf, 0, 1); break; } /* * END of hash mode specific hook operations */ CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); if (CL_rc == -1) return -1; } if (hashconfig->opts_type & OPTS_TYPE_INIT2) { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, 1, false, 0); if (CL_rc == -1) return -1; } if (hashconfig->opts_type & OPTS_TYPE_LOOP2) { const u32 iter2 = salt_buf->salt_iter2; for (u32 loop_pos = 0; loop_pos < iter2; loop_pos += loop_step) { u32 loop_left = iter2 - loop_pos; loop_left = MIN (loop_left, loop_step); device_param->kernel_params_buf32[28] = loop_pos; device_param->kernel_params_buf32[29] = loop_left; CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, 1, false, 0); if (CL_rc == -1) return -1; } } if ((hashconfig->hash_mode == 2500) || (hashconfig->hash_mode == 2501)) { device_param->kernel_params_buf32[28] = 0; device_param->kernel_params_buf32[29] = 1; CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_AUX1, 1, false, 0); if (CL_rc == -1) return -1; } else { CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_3, 1, false, 0); if (CL_rc == -1) return -1; } } // check : check if cracked u32 num_cracked; CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); if (CL_err != CL_SUCCESS) return -1; // finish : cleanup and restore device_param->kernel_params_buf32[27] = 0; device_param->kernel_params_buf32[28] = 0; device_param->kernel_params_buf32[29] = 0; device_param->kernel_params_buf32[30] = 0; device_param->kernel_params_buf32[31] = 0; device_param->kernel_params_buf32[32] = 0; device_param->kernel_params_buf32[33] = 0; device_param->kernel_params_buf64[34] = 0; device_param->kernel_params[15] = &device_param->d_digests_buf; device_param->kernel_params[17] = &device_param->d_salt_bufs; device_param->kernel_params[18] = &device_param->d_esalt_bufs; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_digests_shown, device_param->size_shown); if (CL_rc == -1) return -1; CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_result, device_param->size_results); if (CL_rc == -1) return -1; if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) { CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, device_param->size_rules_c); if (CL_rc == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) { CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_combs_c, device_param->size_combs); if (CL_rc == -1) return -1; } else if (user_options_extra->attack_kern == ATTACK_KERN_BF) { CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_bfs_c, device_param->size_bfs); if (CL_rc == -1) return -1; } // check return if (num_cracked == 0) { hc_thread_mutex_lock (status_ctx->mux_display); event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1); event_log_warning (hashcat_ctx, "Your device driver installation is probably broken."); event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); event_log_warning (hashcat_ctx, NULL); hc_thread_mutex_unlock (status_ctx->mux_display); return -1; } return 0; }
int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo) { float timed, gflops; cl_uint workPerWI; cl::NDRange globalSize, localSize; cl_double A = 1.3f; int iters = devInfo.computeIters; if(!isComputeDP) return 0; if(!devInfo.doubleSupported) { cout << NEWLINE TAB TAB "No double precision support! Skipped" << endl; return 0; } try { cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>(); uint globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize); uint t = MIN((globalWIs * sizeof(cl_double)), devInfo.maxAllocSize); t = roundToPowOf2(t); globalWIs = t / sizeof(cl_double); cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_double))); globalSize = globalWIs; localSize = devInfo.maxWGSize; cl::Kernel kernel_v1(prog, "compute_dp_v1"); kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A); cl::Kernel kernel_v2(prog, "compute_dp_v2"); kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A); cl::Kernel kernel_v4(prog, "compute_dp_v4"); kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A); cl::Kernel kernel_v8(prog, "compute_dp_v8"); kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A); cl::Kernel kernel_v16(prog, "compute_dp_v16"); kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A); cout << NEWLINE TAB TAB "Double-precision compute (GFLOPS)" << endl; cout << setprecision(2) << fixed; /////////////////////////////////////////////////////////////////////////// // Vector width 1 cout << TAB TAB TAB "double : "; cout.flush(); workPerWI = 4096; // Indicates flops executed per work-item timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters); gflops = ((float)globalWIs * workPerWI) / timed / 1e3f; cout << gflops << endl; /////////////////////////////////////////////////////////////////////////// // Vector width 2 cout << TAB TAB TAB "double2 : "; cout.flush(); workPerWI = 4096; timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters); gflops = ((float)globalWIs * workPerWI) / timed / 1e3f; cout << gflops << endl; /////////////////////////////////////////////////////////////////////////// // Vector width 4 cout << TAB TAB TAB "double4 : "; cout.flush(); workPerWI = 4096; timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters); gflops = ((float)globalWIs * workPerWI) / timed / 1e3f; cout << gflops << endl; /////////////////////////////////////////////////////////////////////////// // Vector width 8 cout << TAB TAB TAB "double8 : "; cout.flush(); workPerWI = 4096; timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters); gflops = ((float)globalWIs * workPerWI) / timed / 1e3f; cout << gflops << endl; /////////////////////////////////////////////////////////////////////////// // Vector width 16 cout << TAB TAB TAB "double16 : "; cout.flush(); workPerWI = 4096; timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters); gflops = ((float)globalWIs * workPerWI) / timed / 1e3f; cout << gflops << endl; /////////////////////////////////////////////////////////////////////////// } catch(cl::Error error) { cerr << error.what() << "(" << error.err() << ")" << endl; cerr << TAB TAB TAB "Tests skipped" << endl; return -1; } return 0; }
// main function extern void main(void) { unsigned char flag, numb_pages; unsigned i, write_size, read_size, temp_six_vector, six_vector, errors, write_offset; unsigned boot_args[AT45_PAGE_SIZE / 4]; // calculate temp six vector numb_pages = 0; i = AT45_PAGE_NUMB; while(i >>= 1) numb_pages++; temp_six_vector = (numb_pages << 13) + (AT45_PAGE_SIZE << 17); // setup SYS interrupt aic_configure_irq(AT91C_ID_SYS, AT91C_AIC_PRIOR_LOWEST, AT91C_AIC_SRCTYPE_INT_LEVEL_SENSITIVE, aic_asm_sys_handler); // enable SYS interrupt aic_enable_irq(AT91C_ID_SYS); // setup rtt - 1Hz clock AT91C_BASE_ST->ST_RTMR = 0x4000; upoint_r = pt_mem_area, upoint_w = (pt_mem_area + AT45DB642D_SIZE); put_string("Init AT45DB642D and get device information\n"); if (!at45_init()) put_string("Device inited and ready\n"); else put_string("Error!\n"); put_string("Press any key to load boot menu\n: "); // setup rtt interrupt flag AT91C_BASE_ST->ST_IER = AT91C_ST_RTTINC; // enable RXRDY interrupt in DBGU AT91C_BASE_DBGU->DBGU_IER |= AT91C_US_RXRDY; wait_status = 1; get_char(); while (flag != 'q') { put_string("\nload (l), write(w), run kernel(r), quit(q), erase(e): "); flag = get_char(); switch(flag) { // loading data to sdram case 'l': put_string("Please trasfer the boot file:\n"); transfer_size = 0; // setup rtt interrupt flag AT91C_BASE_ST->ST_IER = AT91C_ST_RTTINC; // enable RXRDY interrupt in DBGU AT91C_BASE_DBGU->DBGU_IER |= AT91C_US_RXRDY; while(!transfer_size); delay(100000); if (transfer_size > 0) { put_string("Transfer complete\n"); util_printf("Byte's sended: %x\n", transfer_size); } break; // writing bytes from data flash case 'w': if (transfer_size == 0) { put_string("Please transfer begin, write end\n"); break; } else if (transfer_size > (AT45DB642D_SIZE)) { put_string("Trasfer is larger than flash size\n"); break; } else { if ((unsigned)transfer_size % AT45_PAGE_SIZE) write_size = ((unsigned)transfer_size / AT45_PAGE_SIZE + 1) * AT45_PAGE_SIZE; else write_size = transfer_size; put_string("Write boot(b) or linux kernel(n): "); flag = get_char(); util_printf("%c\n", flag); if (flag == 'b') { write_offset = BOOT_OFFSET; put_string("\nModification of Arm Interrupt Vector #6\n"); six_vector = (write_size / 512) + 1 + temp_six_vector; util_printf("Six vector is 0x%x\n", six_vector); upoint_w[5] = six_vector; } else { write_offset = LINUX_OFFSET; put_string("Writing args\n"); boot_args[0] = write_size; if (!at45_write(BOOT_2_ARGS_OFFSET, boot_args, AT45_PAGE_SIZE)) put_string("Write success\n"); else put_string("Error!\n"); } util_printf("Write 0x%x bytes\n", write_size); if (!at45_write(write_offset, upoint_w, write_size)) put_string("Write success\n"); else put_string("Error!\n"); if (!at45_read(write_offset, upoint_r, write_size)) { put_string("Read success\nStart verification\n"); six_vector = upoint_r[5]; if (write_offset == BOOT_OFFSET) { if ((six_vector & 0xfffff000) - temp_six_vector) { util_printf("Six vector is damage, current 0x%x, original 0x%x\n", six_vector, temp_six_vector); break; } else { put_string("Six vector is correct\nStart code verification\n"); } } errors = 0; for (i = 0; i < write_size / 4; i++) { if (upoint_r[i] != upoint_w[i]) { errors++; util_printf("Addr - %x, write - %x, read - %x\n", i, upoint_w[i], upoint_r[i]); } } put_string("Stop code verification\n"); if (errors) put_string("Verification failed!\n"); else put_string("Verification success!\n"); } } break; // erase first page case 'e': if (!at45_read(0x0, upoint_r, 0x20)) { six_vector = upoint_r[5]; read_size = (six_vector & 0xff) * 512; for (i = 0; i <= read_size; i += AT45_PAGE_SIZE) { if (!at45_page_erase(i)) put_string("Erase success\n"); else put_string("Error!\n"); } } break; // run 2boot code case 'r': run_kernel(); break; // exit of loop case 'q': put_string("\nQuit & Reset\n"); AT91C_BASE_ST->ST_WDMR = 256 | AT91C_ST_RSTEN; AT91C_BASE_ST->ST_CR = AT91C_ST_WDRST; break; // undef default: put_string("\nUndefined command\n"); break; } } // Infinity loop while (1) { // Disable pck for idle cpu mode AT91C_BASE_PMC->PMC_SCDR = AT91C_PMC_PCK; } }
static void run_kernel_loop() { int i; for(i = 0; i < 2000; ++i) run_kernel(); }
int clPeak::runComputeInteger(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo) { float timed, gflops; cl_uint workPerWI; cl::NDRange globalSize, localSize; cl_int A = 4; uint iters = devInfo.computeIters; if(!isComputeInt) return 0; try { log->print(NEWLINE TAB TAB "Integer compute (GIOPS)" NEWLINE); log->xmlOpenTag("integer_compute"); log->xmlAppendAttribs("unit", "gflops"); cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>(); uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize); uint64_t t = MIN((globalWIs * sizeof(cl_int)), devInfo.maxAllocSize) / sizeof(cl_int); globalWIs = roundToMultipleOf(t, devInfo.maxWGSize); cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_int))); globalSize = globalWIs; localSize = devInfo.maxWGSize; cl::Kernel kernel_v1(prog, "compute_integer_v1"); kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A); cl::Kernel kernel_v2(prog, "compute_integer_v2"); kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A); cl::Kernel kernel_v4(prog, "compute_integer_v4"); kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A); cl::Kernel kernel_v8(prog, "compute_integer_v8"); kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A); cl::Kernel kernel_v16(prog, "compute_integer_v16"); kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A); /////////////////////////////////////////////////////////////////////////// // Vector width 1 log->print(TAB TAB TAB "int : "); workPerWI = 2048; // Indicates integer operations executed per work-item timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters); gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f; log->print(gflops); log->print(NEWLINE); log->xmlRecord("int", gflops); /////////////////////////////////////////////////////////////////////////// // Vector width 2 log->print(TAB TAB TAB "int2 : "); workPerWI = 2048; timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters); gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f; log->print(gflops); log->print(NEWLINE); log->xmlRecord("int2", gflops); /////////////////////////////////////////////////////////////////////////// // Vector width 4 log->print(TAB TAB TAB "int4 : "); workPerWI = 2048; timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters); gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f; log->print(gflops); log->print(NEWLINE); log->xmlRecord("int4", gflops); /////////////////////////////////////////////////////////////////////////// // Vector width 8 log->print(TAB TAB TAB "int8 : "); workPerWI = 2048; timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters); gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f; log->print(gflops); log->print(NEWLINE); log->xmlRecord("int8", gflops); /////////////////////////////////////////////////////////////////////////// // Vector width 16 log->print(TAB TAB TAB "int16 : "); workPerWI = 2048; timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters); gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f; log->print(gflops); log->print(NEWLINE); log->xmlRecord("int16", gflops); /////////////////////////////////////////////////////////////////////////// log->xmlCloseTag(); // integer_compute } catch(cl::Error &error) { stringstream ss; ss << error.what() << " (" << error.err() << ")" NEWLINE << TAB TAB TAB "Tests skipped" NEWLINE; log->print(ss.str()); return -1; } return 0; }