//return bool: if is multiple of maxNumThread
//if yes, info[0]: number of blocks, info[1] = maxNumThread
//if no, info[0]: number of blocks except of the last block, info[1]: number of thread in the last block
void testReduceImpl( int rLen, int OPERATOR, int numThreadPB , int numMaxBlock)
{
	int _CPU_GPU=0;
	int result=0;
	int memSize = sizeof(Record)*rLen;

	void * h_Rin;
	HOST_MALLOC(h_Rin, memSize );
	generateRand((Record *)h_Rin, TEST_MAX - 11111, rLen, 0 );

	void* h_Rout;

	unsigned int numResult = 0;
	cl_mem d_Rin=NULL;
	cl_mem d_Rout;
	CL_MALLOC( &d_Rin, memSize );
	cl_writebuffer( d_Rin, h_Rin, memSize,0);
	numResult= CL_AggMaxOnly( d_Rin, rLen, &d_Rout, numThreadPB, numMaxBlock,_CPU_GPU);
	HOST_MALLOC(h_Rout, sizeof(Record)*numResult );
	cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*numResult,_CPU_GPU);
	//validateReduce((Record *)h_Rin, rLen,((Record *)h_Rout)[0].y, OPERATOR );	
	HOST_FREE( h_Rin );
	HOST_FREE( h_Rout );
	CL_FREE( d_Rin );
	CL_FREE( d_Rout );
	printf("testReduceFinish\n");
}
Esempio n. 2
0
void pyr_free_area(void *ptr)
{
#ifdef SC_WIN32
    free((void*)((char*)ptr - kAlign));
#else
    HOST_FREE(ptr);
#endif
}
void testGroupByImpl( int rLen, int numThread, int numBlock)
{
	int _CPU_GPU=0;
	int memSize = sizeof(Record)*rLen;

	void* h_Rin;
	HOST_MALLOC(h_Rin, memSize );
	void* h_Rout;
	HOST_MALLOC(h_Rout, memSize );
 	generateRand((Record *)h_Rin, 64, rLen, 0 );
	int* h_startPos;

	int numGroup = 0;
	//group by
	numGroup=CL_GroupBy((Record *) h_Rin, rLen, (Record*) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU);
	//copy back
	validateGroupBy( (Record*)h_Rin, rLen, (Record*)h_Rout, h_startPos, numGroup );	
	HOST_FREE(h_startPos);
	HOST_FREE( h_Rin );
	HOST_FREE( h_Rout );
}
void testScanImpl(int rLen)
{
	int _CPU_GPU=0;
	cl_event eventList[2];
	int index=0;
	cl_kernel Kernel; 
	int CPU_GPU;
	double burden;	
	int result=0;
	int memSize=sizeof(int)*rLen;
	int outSize=sizeof(int)*rLen;
	void *Rin;
	HOST_MALLOC(Rin, memSize);
	generateRandInt((int*)Rin, rLen,rLen,0);
	void *Rout;
	HOST_MALLOC(Rout, outSize);
	cl_mem d_Rin;
	CL_MALLOC(&d_Rin, memSize);
	cl_mem d_Rout;
	CL_MALLOC(&d_Rout, outSize);
	cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	ScanPara *SP;
	SP=(ScanPara*)malloc(sizeof(ScanPara));
	initScan(rLen,SP);
	scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU);	
	cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	clWaitForEvents(1,&eventList[(index-1)%2]);
	closeScan(SP);
	deschedule(CPU_GPU,burden);
	//validateScan( (int*)Rin, rLen, (int*)Rout );
	HOST_FREE(Rin);
	HOST_FREE(Rout);
	CL_FREE(d_Rin);
	CL_FREE(d_Rout);
	clReleaseKernel(Kernel);  
	clReleaseEvent(eventList[0]);
	clReleaseEvent(eventList[1]);
}
int ia_css_isys_device_close(
	HANDLE context,
	unsigned int nof_streams
) {
	struct ia_css_isys_context *ctx = (struct ia_css_isys_context *)context;
	unsigned int i;
	int ret = 0;

	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_close() enter: void\n");

	for (i=0; i < nof_streams; i++) {
		if (ctx->stream_state_array[i] != IA_CSS_ISYS_STREAM_STATE_IDLE) {
			return EPERM;
		}
	}

	ret = ia_css_fwctrl_device_close();
	HOST_FREE(context);

	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_close() return: return_err=%d\n", ret);
	return ret;
}