//return bool: if is multiple of maxNumThread //if yes, info[0]: number of blocks, info[1] = maxNumThread //if no, info[0]: number of blocks except of the last block, info[1]: number of thread in the last block void testReduceImpl( int rLen, int OPERATOR, int numThreadPB , int numMaxBlock) { int _CPU_GPU=0; int result=0; int memSize = sizeof(Record)*rLen; void * h_Rin; HOST_MALLOC(h_Rin, memSize ); generateRand((Record *)h_Rin, TEST_MAX - 11111, rLen, 0 ); void* h_Rout; unsigned int numResult = 0; cl_mem d_Rin=NULL; cl_mem d_Rout; CL_MALLOC( &d_Rin, memSize ); cl_writebuffer( d_Rin, h_Rin, memSize,0); numResult= CL_AggMaxOnly( d_Rin, rLen, &d_Rout, numThreadPB, numMaxBlock,_CPU_GPU); HOST_MALLOC(h_Rout, sizeof(Record)*numResult ); cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*numResult,_CPU_GPU); //validateReduce((Record *)h_Rin, rLen,((Record *)h_Rout)[0].y, OPERATOR ); HOST_FREE( h_Rin ); HOST_FREE( h_Rout ); CL_FREE( d_Rin ); CL_FREE( d_Rout ); printf("testReduceFinish\n"); }
void testAggAfterGroupByImpl( int rLen, int OPERATOR, int numThread, int numBlock) { int _CPU_GPU=0; int memSize = sizeof(Record)*rLen; void* h_Rin; void* h_Rout; void* h_Sin; int* h_startPos; HOST_MALLOC( h_Rin, memSize ); HOST_MALLOC( h_Rout, memSize ); HOST_MALLOC( h_Sin, memSize ); generateRand((Record *) h_Rin, 50, rLen, 0 ); generateRand((Record *) h_Sin, TEST_MAX, rLen, 0 ); int numGroup = 0; numGroup = CL_GroupBy((Record *)h_Rin, rLen,(Record *) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU); validateGroupBy((Record*) h_Rin, rLen, (Record*)h_Rout,h_startPos, numGroup ); void* h_aggResults; HOST_MALLOC(h_aggResults, sizeof(int)*numGroup ); switch(OPERATOR){ case REDUCE_MAX: { CL_agg_max_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU); break; } case REDUCE_MIN: { CL_agg_min_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU); break; } case REDUCE_SUM: { CL_agg_sum_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU); break; } case REDUCE_AVERAGE: { CL_agg_avg_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU); break; } } validateAggAfterGroupBy((Record*) h_Rin, rLen, (int*)h_startPos, numGroup,(Record*) h_Sin, (int *)h_aggResults, OPERATOR); }
void testGroupByImpl( int rLen, int numThread, int numBlock) { int _CPU_GPU=0; int memSize = sizeof(Record)*rLen; void* h_Rin; HOST_MALLOC(h_Rin, memSize ); void* h_Rout; HOST_MALLOC(h_Rout, memSize ); generateRand((Record *)h_Rin, 64, rLen, 0 ); int* h_startPos; int numGroup = 0; //group by numGroup=CL_GroupBy((Record *) h_Rin, rLen, (Record*) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU); //copy back validateGroupBy( (Record*)h_Rin, rLen, (Record*)h_Rout, h_startPos, numGroup ); HOST_FREE(h_startPos); HOST_FREE( h_Rin ); HOST_FREE( h_Rout ); }
void testScanImpl(int rLen) { int _CPU_GPU=0; cl_event eventList[2]; int index=0; cl_kernel Kernel; int CPU_GPU; double burden; int result=0; int memSize=sizeof(int)*rLen; int outSize=sizeof(int)*rLen; void *Rin; HOST_MALLOC(Rin, memSize); generateRandInt((int*)Rin, rLen,rLen,0); void *Rout; HOST_MALLOC(Rout, outSize); cl_mem d_Rin; CL_MALLOC(&d_Rin, memSize); cl_mem d_Rout; CL_MALLOC(&d_Rout, outSize); cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU); ScanPara *SP; SP=(ScanPara*)malloc(sizeof(ScanPara)); initScan(rLen,SP); scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU); cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU); clWaitForEvents(1,&eventList[(index-1)%2]); closeScan(SP); deschedule(CPU_GPU,burden); //validateScan( (int*)Rin, rLen, (int*)Rout ); HOST_FREE(Rin); HOST_FREE(Rout); CL_FREE(d_Rin); CL_FREE(d_Rout); clReleaseKernel(Kernel); clReleaseEvent(eventList[0]); clReleaseEvent(eventList[1]); }
/** * ia_css_isys_device_open() - configure ISYS device */ int ia_css_isys_device_open( HANDLE *context, struct ia_css_isys_device_cfg_data *config ) { unsigned int stream_handle; struct ia_css_isys_context *ctx; struct ia_css_fwctrl_devconfig device_config; int retval = 0; ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_open() enter: void\n"); assert(config != NULL); /*Make sure that the size of the cmd struct is as expected */ COMPILATION_ERROR_IF( SIZE_OF_ISYSPOC_CMD_MSG_STRUCT != sizeof(ia_css_isyspoc_cmd_msg_t)); assert(config->mipi.nof_blocks <= STREAM_ID_MAX); for (stream_handle = 0; stream_handle < config->mipi.nof_blocks; stream_handle++) { assert(config->mipi.block_size[stream_handle] > 0); } assert(config->pixel.nof_blocks <= STREAM_ID_MAX); for (stream_handle = 0; stream_handle < config->pixel.nof_blocks; stream_handle++) { assert(config->pixel.block_size[stream_handle] > 0); } ctx = (struct ia_css_isys_context *)HOST_MALLOC(sizeof(struct ia_css_isys_context)); assert(ctx != NULL); if(ctx == NULL) { ia_css_debug_dtrace(IA_CSS_DEBUG_ERROR, "ia_css_isys_device_open(): Failed to allocate ctx memory\n"); return ENOMEM; } memset(ctx, 0, sizeof(struct ia_css_isys_context)); *context = (HANDLE)ctx; for (stream_handle = 0; stream_handle < STREAM_ID_MAX; stream_handle++) { ctx->stream_state_array[stream_handle] = IA_CSS_ISYS_STREAM_STATE_IDLE; ctx->stream_nof_output_pins[stream_handle] = 0; } device_config.firmware_address = config->driver_sys.firmware_address; retval = ia_css_fwctrl_device_open(&device_config); ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_open() return: return_err=%d\n", retval); return retval; }