void a9_compute(int tid, struct buffer* buffers, int start_indx, int end_indx) { int i ; for(i=0 ; i<NUM_ITER ; i++) { if(tid == 0) { call_barrier(0) ; pthread_barrier_wait(&barrier0) ; doitgen(buffers[0].bo[0]->map, buffers[1].bo[0]->map, C41, start_indx, end_indx) ; } else { pthread_barrier_wait(&barrier0) ; doitgen(buffers[0].bo[0]->map, buffers[1].bo[0]->map, C42, start_indx, end_indx) ; } } }
void common_wrapper(UArg arg0, UArg arg1) { taskArgs* t = (taskArgs*)arg0 ; int tid = (int)arg1 ; int i ; for(i=0 ; i<NUM_ITER ; i++) { if(tid == 0) { callBarrier(0, /*lock_id=*/4) ; callLocalBarrier() ; doitgen(t->buffer1, t->buffer2, t->buffer3, t->start_indx, t->end_indx) ; } else { callLocalBarrier() ; doitgen(t->buffer1, t->buffer2, t->buffer3, t->start_indx, t->end_indx) ; } } if(tid == 0) Event_post(edgeDetectEvent, Event_Id_00) ; else Event_post(edgeDetectEvent, Event_Id_01) ; }
int main(void) { double t_start, t_end; int i; DATA_TYPE* A, *A_2; DATA_TYPE* C4, *C4_2; DATA_TYPE* sum, *sum_2; A = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); C4 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE)); sum = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); A_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); C4_2 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE)); sum_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); init_array(A, C4); init_array(A_2, C4_2); read_cl_file(); cl_initialization(); cl_mem_init(A, C4, sum); cl_load_prog(); t_start = rtclock(); int r; for (r = 0; r < NR; r++) { cl_launch_kernel1(r); cl_launch_kernel2(r); } t_end = rtclock(); errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, NR * NQ * NP * sizeof(DATA_TYPE), sum, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); doitgen(sum_2, A_2, C4_2); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(sum, sum_2); cl_clean_up(); return 0; }
Int32 fxnTest1(UInt32 size, UInt32 *data) { UInt32 start_indx, end_indx ; FxnArgs *args = (FxnArgs *)((UInt32)data + sizeof(map_info_type)); int* buffer1 = (int*)args->a ; int* buffer2 = (int*)args->b ; int* buffer3 = (int*)args->e ; start_indx = args->start_indx; end_indx = args->end_indx; int i ; for(i=0 ; i<NUM_ITER ; i++) { callBarrier(0, /*lock_id=*/4) ; doitgen(buffer1, buffer2, buffer3, start_indx, end_indx) ; } return 1 ; }