//return bool: if is multiple of maxNumThread //if yes, info[0]: number of blocks, info[1] = maxNumThread //if no, info[0]: number of blocks except of the last block, info[1]: number of thread in the last block void testReduceImpl( int rLen, int OPERATOR, int numThreadPB , int numMaxBlock) { int _CPU_GPU=0; int result=0; int memSize = sizeof(Record)*rLen; void * h_Rin; HOST_MALLOC(h_Rin, memSize ); generateRand((Record *)h_Rin, TEST_MAX - 11111, rLen, 0 ); void* h_Rout; unsigned int numResult = 0; cl_mem d_Rin=NULL; cl_mem d_Rout; CL_MALLOC( &d_Rin, memSize ); cl_writebuffer( d_Rin, h_Rin, memSize,0); numResult= CL_AggMaxOnly( d_Rin, rLen, &d_Rout, numThreadPB, numMaxBlock,_CPU_GPU); HOST_MALLOC(h_Rout, sizeof(Record)*numResult ); cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*numResult,_CPU_GPU); //validateReduce((Record *)h_Rin, rLen,((Record *)h_Rout)[0].y, OPERATOR ); HOST_FREE( h_Rin ); HOST_FREE( h_Rout ); CL_FREE( d_Rin ); CL_FREE( d_Rout ); printf("testReduceFinish\n"); }
void pyr_free_area(void *ptr) { #ifdef SC_WIN32 free((void*)((char*)ptr - kAlign)); #else HOST_FREE(ptr); #endif }
void testGroupByImpl( int rLen, int numThread, int numBlock) { int _CPU_GPU=0; int memSize = sizeof(Record)*rLen; void* h_Rin; HOST_MALLOC(h_Rin, memSize ); void* h_Rout; HOST_MALLOC(h_Rout, memSize ); generateRand((Record *)h_Rin, 64, rLen, 0 ); int* h_startPos; int numGroup = 0; //group by numGroup=CL_GroupBy((Record *) h_Rin, rLen, (Record*) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU); //copy back validateGroupBy( (Record*)h_Rin, rLen, (Record*)h_Rout, h_startPos, numGroup ); HOST_FREE(h_startPos); HOST_FREE( h_Rin ); HOST_FREE( h_Rout ); }
void testScanImpl(int rLen) { int _CPU_GPU=0; cl_event eventList[2]; int index=0; cl_kernel Kernel; int CPU_GPU; double burden; int result=0; int memSize=sizeof(int)*rLen; int outSize=sizeof(int)*rLen; void *Rin; HOST_MALLOC(Rin, memSize); generateRandInt((int*)Rin, rLen,rLen,0); void *Rout; HOST_MALLOC(Rout, outSize); cl_mem d_Rin; CL_MALLOC(&d_Rin, memSize); cl_mem d_Rout; CL_MALLOC(&d_Rout, outSize); cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU); ScanPara *SP; SP=(ScanPara*)malloc(sizeof(ScanPara)); initScan(rLen,SP); scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU); cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU); clWaitForEvents(1,&eventList[(index-1)%2]); closeScan(SP); deschedule(CPU_GPU,burden); //validateScan( (int*)Rin, rLen, (int*)Rout ); HOST_FREE(Rin); HOST_FREE(Rout); CL_FREE(d_Rin); CL_FREE(d_Rout); clReleaseKernel(Kernel); clReleaseEvent(eventList[0]); clReleaseEvent(eventList[1]); }
int ia_css_isys_device_close( HANDLE context, unsigned int nof_streams ) { struct ia_css_isys_context *ctx = (struct ia_css_isys_context *)context; unsigned int i; int ret = 0; ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_close() enter: void\n"); for (i=0; i < nof_streams; i++) { if (ctx->stream_state_array[i] != IA_CSS_ISYS_STREAM_STATE_IDLE) { return EPERM; } } ret = ia_css_fwctrl_device_close(); HOST_FREE(context); ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_close() return: return_err=%d\n", ret); return ret; }