/* * SW-C2用性能評価関数 */ void measure_swc2(void) { volatile uint32 i; IDT_TimeCount cnt; WrapperCancelAlarm2(); WrapperSuspendAllInterruptsWithoutOS(); syslog(LOG_NOTICE, "== measure overhead =="); init_hist(2U, MAX_TIME, histarea); for (i = 0U; i < LOOP_COUNT; i++) { begin_measure(2U); end_measure(2U); } print_hist(2U); syslog(LOG_NOTICE, "== Rte_Read_SWC2_RPort_time =="); init_hist(2U, MAX_TIME, histarea); for (i = 0U; i < LOOP_COUNT; i++) { begin_measure(2U); Rte_Read_SWC2_RPort_time(&cnt); end_measure(2U); } print_hist(2U); WrapperResumeAllInterruptsWithoutOS(); WrapperShutdownOS(); }
/* * 計測タスク1(高優先度) */ void task1(intptr_t exinf) { uint_t i; slp_tsk(); end_measure(1); for (i = 1; i < NO_MEASURE; i++) { begin_measure(2); slp_tsk(); end_measure(1); } begin_measure(2); slp_tsk(); }
/* * performance evaluation routine */ void perf_eval(uint_t n) { uint_t i; intptr_t data; PRI pri; ini_pdq(PDQ1); init_hist(1, MAX_TIME, histarea1); for (i = 0; i < n; i++) { data = i; snd_pdq(PDQ1, data, 1); } for (i = 0; i < NO_MEASURE; i++) { data = i; begin_measure(1); snd_pdq(PDQ1, data, 2); end_measure(1); rcv_pdq(PDQ1, &data, &pri); } syslog_1(LOG_NOTICE, "Execution times of snd_pdq" " when %d data are queued.", n); print_hist(1); syslog_flush(); }
/* * 計測ルーチン */ void perf_eval(uint_t n) { uint_t i, j; init_hist(1, MAX_TIME, histarea1); sus_tsk(LOGTASK); /* システムログタスクの動作を止める */ for (i = 0; i < NO_MEASURE; i++) { ini_flg(FLG1); for (j = 0; j < n; j++) { act_tsk(task_list[j]); } chg_pri(TSK_SELF, MAIN_PRIORITY_LOW); /* タスクが待ち状態に入るのを待つ */ chg_pri(TSK_SELF, TPRI_INI); begin_measure(1); set_flg(FLG1, 0x01U); end_measure(1); chg_pri(TSK_SELF, MAIN_PRIORITY_LOW); /* タスクが終了するのを待つ */ chg_pri(TSK_SELF, TPRI_INI); } rsm_tsk(LOGTASK); /* システムログタスクの動作を再開する */ syslog_1(LOG_NOTICE, "Execution times of set_flg" " when %d tasks are released from waiting.", n); print_hist(1); logtask_flush(0U); }
/* * 計測タスク2(高優先度) */ void task2(intptr_t exinf) { end_measure(3); syslog_0(LOG_NOTICE, "end_measure(3)"); task2_count++; ext_tsk(); }
/* * performance evaluation routine */ void perf_eval(uint_t n) { uint_t i, j; init_hist(1, MAX_TIME, histarea1); for (i = 0; i < NO_MEASURE; i++) { ini_flg(FLG1); for (j = 0; j < n; j++) { act_tsk(task_list[j]); } chg_pri(TSK_SELF, MAIN_PRIORITY_LOW); /* let the task in the waiting queue of the event flag */ chg_pri(TSK_SELF, TPRI_INI); begin_measure(1); set_flg(FLG1, 0x01U); end_measure(1); chg_pri(TSK_SELF, MAIN_PRIORITY_LOW); /* wait the task exits */ chg_pri(TSK_SELF, TPRI_INI); } syslog_1(LOG_NOTICE, "Execution times of set_flg" " when %d tasks are released from waiting.", n); print_hist(1); syslog_flush(); }
int main(int argc, char **argv) { read_args(argc, argv); counters timer; start_measure(timer); // declarations Complex ioB(1.0, 1.0); ioBuffer = cl::sycl::buffer<Complex,2>(cl::sycl::range<2> {M, N}); ioABuffer = cl::sycl::buffer<Complex,2>(cl::sycl::range<2> {M, N}); ioBBuffer = cl::sycl::buffer<Complex,1>(&ioB, cl::sycl::range<1> {1}); // initialization for (size_t i = 0; i < M; ++i){ for (size_t j = 0; j < N; ++j){ float tmp = (float) (i*(j+2) + 10) / N; Complex value(tmp, tmp); cl::sycl::id<2> id = {i, j}; ioBuffer.get_access<cl::sycl::access::mode::write>()[id] = value; ioABuffer.get_access<cl::sycl::access::mode::write>()[id] = value; } } // our work coef_var2D<0, 0> c1; coef_var2D<1, 0> c2; coef_var2D<0, 1> c3; coef_var2D<-1, 0> c4; coef_var2D<0, -1> c5; auto st = c1+c2+c3+c4+c5; input_var2D<Complex, &ioABuffer, &ioBBuffer, &fdl_in, &fac> work_in; output_2D<Complex, &ioBuffer, &fdl_out> work_out; auto op_work = work_out << st << work_in; auto st_id = c1.toStencil(); input_var2D<Complex, &ioBuffer, &ioBBuffer, &fdl_in, &fac_id> copy_in; output_2D<Complex, &ioABuffer, &fdl_out> copy_out; auto op_copy = copy_out << st_id << copy_in; end_init(timer); auto begin_op = counters::clock_type::now(); // compute result with "gpu" { cl::sycl::queue myQueue; for (unsigned int i = 0; i < NB_ITER; ++i){ //op_work.doComputation(myQueue); op_work.doLocalComputation(myQueue); op_copy.doComputation(myQueue); } } auto end_op = counters::clock_type::now(); timer.stencil_time = std::chrono::duration_cast<counters::duration_type>(end_op - begin_op); // loading time is not watched end_measure(timer); return 0; }
/* * 計測タスク2(中優先度) */ void task2(intptr_t exinf) { uint_t i; for (i = 0; i < NO_MEASURE; i++) { begin_measure(1); wup_tsk(TASK1); end_measure(2); } wup_tsk(TASK1); }
/* * メインタスク(中優先度) */ void main_task(intptr_t exinf) { uint_t i; syslog_0(LOG_NOTICE, "Performance evaluation program (4)"); init_hist(1, MAX_TIME, histarea1); init_hist(2, MAX_TIME, histarea2); init_hist(3, MAX_TIME, histarea3); logtask_flush(0U); sus_tsk(LOGTASK); /* システムログタスクの動作を止める */ /* * タスク切換えを起こさないact_tskの処理時間の測定 */ for (i = 0; i < NO_MEASURE; i++) { begin_measure(1); act_tsk(TASK3); end_measure(1); slp_tsk(); } /* * タスク切換えを起こすact_tskの処理時間の測定 */ for (i = 0; i < NO_MEASURE; i++) { begin_measure(2); act_tsk(TASK1); } /* * タスク切換えを起こすiact_tskの処理時間の測定(測定回数は10分の1) */ task2_count = 0; sta_cyc(CYC1); while (task2_count < NO_MEASURE / 10) ; stp_cyc(CYC1); rsm_tsk(LOGTASK); /* システムログタスクの動作を再開する */ syslog_0(LOG_NOTICE, "Execution times of act_tsk without task switch"); print_hist(1); syslog_0(LOG_NOTICE, "Execution times of act_tsk with task switch"); print_hist(2); syslog_0(LOG_NOTICE, "Execution times of iact_tsk with task switch"); print_hist(3); ext_ker(); }
/* * main task */ void main_task(intptr_t exinf) { uint_t i; syslog_0(LOG_NOTICE, "Performance evaluation program (0)"); init_hist(1, MAX_TIME, histarea1); syslog_flush(); for (i = 0; i < NO_MEASURE; i++) { begin_measure(1); end_measure(1); } syslog_0(LOG_NOTICE, "Measurement overhead"); print_hist(1); test_finish(); }
/* * 計測タスク1(高優先度) */ void task1(intptr_t exinf) { end_measure(2); ext_tsk(); }
/* * メインタスク */ void main_task(intptr_t exinf) { uint_t i, j; syslog_0(LOG_NOTICE, "Performance evaluation program (5)"); init_hist(1); init_hist(2); init_hist(3); init_hist(4); init_hist(5); init_hist(6); /* * 繰り返し計測 */ for (j = 0; j < NO_MEASURE / 10; j++) { /* * アラームハンドラ0短い時間で動作開始 * * 性能評価中に高分解能タイマが再設定されるのを避けるため. */ sta_alm(ALM0, ALM_RELTIM0); /* * 30個のアラームハンドラを長い時間で動作開始 */ begin_measure(1); for (i = 0; i < 30; i++) { sta_alm(alarm1_list[i], ALM_RELTIM1); } end_measure(1); /* * 30個のアラームハンドラを中間の時間で動作開始 */ begin_measure(2); for (i = 0; i < 30; i++) { sta_alm(alarm2_list[i], ALM_RELTIM2); } end_measure(2); /* * 30個のアラームハンドラを短い時間で動作開始 */ begin_measure(3); for (i = 0; i < 30; i++) { sta_alm(alarm3_list[i], ALM_RELTIM3); } end_measure(3); /* * 短い時間で動作開始した30個のアラームハンドラを動作停止 */ begin_measure(6); for (i = 0; i < 30; i++) { stp_alm(alarm3_list[i]); } end_measure(6); /* * 中間の時間で動作開始した30個のアラームハンドラを動作停止 */ begin_measure(5); for (i = 0; i < 30; i++) { stp_alm(alarm2_list[29 - i]); /* 逆順で動作停止 */ } end_measure(5); /* * 長い時間で動作開始した30個のアラームハンドラを動作停止 */ begin_measure(4); for (i = 0; i < 30; i++) { stp_alm(alarm1_list[29 - i]); /* 逆順で動作停止 */ } end_measure(4); } /* * 測定結果の出力 */ syslog_0(LOG_NOTICE, "Execution times of 30 short sta_alm"); print_hist(1); syslog_0(LOG_NOTICE, "Execution times of 30 medium sta_alm"); print_hist(2); syslog_0(LOG_NOTICE, "Execution times of 30 long sta_alm"); print_hist(3); syslog_0(LOG_NOTICE, "Execution times of 30 short stp_alm"); print_hist(4); syslog_0(LOG_NOTICE, "Execution times of 30 medium stp_alm"); print_hist(5); syslog_0(LOG_NOTICE, "Execution times of 30 long stp_alm"); print_hist(6); check_finish(0); }
int main(int argc, char **argv) { read_args(argc, argv); struct counters timer; start_measure(timer); // declarations float tab_var = 1.0; float *ioB = &tab_var; ioBuffer = cl::sycl::buffer<float,2>(cl::sycl::range<2> {M, N}); ioABuffer = cl::sycl::buffer<float,2>(cl::sycl::range<2> {M, N}); ioBBuffer = cl::sycl::buffer<float,1>(ioB, cl::sycl::range<1> {1}); #if DEBUG_STENCIL float *a_test = (float *) malloc(sizeof(float)*M*N); float *b_test = (float *) malloc(sizeof(float)*M*N); #endif // initialization for (size_t i = 0; i < M; ++i){ for (size_t j = 0; j < N; ++j){ float value = ((float) i*(j+2) + 10) / N; cl::sycl::id<2> id = {i, j}; ioBuffer.get_access<cl::sycl::access::mode::write, cl::sycl::access::target::host_buffer>()[id] = value; ioABuffer.get_access<cl::sycl::access::mode::write, cl::sycl::access::target::host_buffer>()[id] = value; #if DEBUG_STENCIL a_test[i*N+j] = value; b_test[i*N+j] = value; #endif } } // our work coef_var2D<0, 0> c1; coef_var2D<1, 0> c2; coef_var2D<0, 1> c3; coef_var2D<-1, 0> c4; coef_var2D<0, -1> c5; auto st = c1+c2+c3+c4+c5; input_var2D<float, &ioABuffer, &ioBBuffer, &fdl_in, &fac> work_in; output_2D<float, &ioBuffer, &fdl_out> work_out; auto op_work = work_out << st << work_in; auto st_id = c1.toStencil(); input_var2D<float, &ioBuffer, &ioBBuffer, &fdl_in, &fac_id> copy_in; output_2D<float, &ioABuffer, &fdl_out> copy_out; auto op_copy = copy_out << st_id << copy_in; end_init(timer); struct op_time time_op; begin_op(time_op); // compute result with "gpu" { cl::sycl::queue myQueue; for (unsigned int i = 0; i < NB_ITER; ++i){ //op_work.doComputation(myQueue); op_work.doLocalComputation(myQueue); op_copy.doComputation(myQueue); } } end_op(time_op, timer.stencil_time); // loading time is not watched end_measure(timer); #if DEBUG_STENCIL // get the gpu result auto C = (ioABuffer).get_access<cl::sycl::access::mode::read, cl::sycl::access::target::host_buffer>(); ute_and_are(a_test,b_test,C); free(a_test); free(b_test); #endif return 0; }