Esempio n. 1
0
void a9_compute(int tid, struct buffer* buffers, int start_indx, int end_indx)
{
    int i ;
    for(i=0 ; i<NUM_ITER ; i++)
    {
        if(tid == 0) {
            call_barrier(0) ;
            pthread_barrier_wait(&barrier0) ;
            doitgen(buffers[0].bo[0]->map, buffers[1].bo[0]->map, C41,
                    start_indx, end_indx) ;
        } else {
            pthread_barrier_wait(&barrier0) ;
            doitgen(buffers[0].bo[0]->map, buffers[1].bo[0]->map, C42,
                    start_indx, end_indx) ;
        }
    }
}
void common_wrapper(UArg arg0, UArg arg1)
{
	taskArgs* t = (taskArgs*)arg0 ;
	int tid = (int)arg1 ;
	int i ;
	for(i=0 ; i<NUM_ITER ; i++) {
		if(tid == 0) {
			callBarrier(0, /*lock_id=*/4) ;
			callLocalBarrier() ;
			doitgen(t->buffer1, t->buffer2, t->buffer3, t->start_indx, t->end_indx) ;
		} else {
			callLocalBarrier() ;
			doitgen(t->buffer1, t->buffer2, t->buffer3, t->start_indx, t->end_indx) ;
		}
		
	}

	if(tid == 0)
		Event_post(edgeDetectEvent, Event_Id_00) ;
	else
		Event_post(edgeDetectEvent, Event_Id_01) ;
}
Esempio n. 3
0
int main(void) {
	double t_start, t_end;
	int i;

	DATA_TYPE* A, *A_2;
	DATA_TYPE* C4, *C4_2;
	DATA_TYPE* sum, *sum_2;

	A = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE));
	C4 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE));
	sum = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE));

	A_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE));
	C4_2 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE));
	sum_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE));

	init_array(A, C4);
	init_array(A_2, C4_2);

	read_cl_file();
	cl_initialization();
	cl_mem_init(A, C4, sum);
	cl_load_prog();
	t_start = rtclock();

	int r;	

	for (r = 0; r < NR; r++)
	{
		cl_launch_kernel1(r);
		cl_launch_kernel2(r);
	}

	t_end = rtclock();
	errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, NR * NQ * NP * sizeof(DATA_TYPE), sum, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
	
	fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);
	t_start = rtclock();
	doitgen(sum_2, A_2, C4_2);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(sum, sum_2);
	cl_clean_up();
    	return 0;
}
Int32 fxnTest1(UInt32 size, UInt32 *data)
{
	UInt32 start_indx, end_indx ;
    FxnArgs *args = (FxnArgs *)((UInt32)data + sizeof(map_info_type));
    int* buffer1 = (int*)args->a ;
    int* buffer2 = (int*)args->b ;
    int* buffer3 = (int*)args->e ;
    start_indx = args->start_indx;
    end_indx = args->end_indx;
    
    int i ;
    for(i=0 ; i<NUM_ITER ; i++) {
	    callBarrier(0, /*lock_id=*/4) ;
	    doitgen(buffer1, buffer2, buffer3, start_indx, end_indx) ;
    }

    return 1 ;
}