コード例 #1
0
//return bool: if is multiple of maxNumThread
//if yes, info[0]: number of blocks, info[1] = maxNumThread
//if no, info[0]: number of blocks except of the last block, info[1]: number of thread in the last block
void testReduceImpl( int rLen, int OPERATOR, int numThreadPB , int numMaxBlock)
{
	int _CPU_GPU=0;
	int result=0;
	int memSize = sizeof(Record)*rLen;

	void * h_Rin;
	HOST_MALLOC(h_Rin, memSize );
	generateRand((Record *)h_Rin, TEST_MAX - 11111, rLen, 0 );

	void* h_Rout;

	unsigned int numResult = 0;
	cl_mem d_Rin=NULL;
	cl_mem d_Rout;
	CL_MALLOC( &d_Rin, memSize );
	cl_writebuffer( d_Rin, h_Rin, memSize,0);
	numResult= CL_AggMaxOnly( d_Rin, rLen, &d_Rout, numThreadPB, numMaxBlock,_CPU_GPU);
	HOST_MALLOC(h_Rout, sizeof(Record)*numResult );
	cl_readbuffer( h_Rout, d_Rout, sizeof(Record)*numResult,_CPU_GPU);
	//validateReduce((Record *)h_Rin, rLen,((Record *)h_Rout)[0].y, OPERATOR );	
	HOST_FREE( h_Rin );
	HOST_FREE( h_Rout );
	CL_FREE( d_Rin );
	CL_FREE( d_Rout );
	printf("testReduceFinish\n");
}
コード例 #2
0
void testAggAfterGroupByImpl( int rLen, int OPERATOR, int numThread, int numBlock)
{
	int _CPU_GPU=0;
	int memSize = sizeof(Record)*rLen;
	void* h_Rin;
	void* h_Rout;
	void* h_Sin;
	int* h_startPos;
	HOST_MALLOC( h_Rin, memSize );
	HOST_MALLOC( h_Rout, memSize );
	HOST_MALLOC( h_Sin, memSize );
	generateRand((Record *) h_Rin, 50, rLen, 0 );
	generateRand((Record *) h_Sin, TEST_MAX, rLen, 0 );  
	int numGroup = 0;
	numGroup = CL_GroupBy((Record *)h_Rin, rLen,(Record *) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU);
	validateGroupBy((Record*) h_Rin, rLen, (Record*)h_Rout,h_startPos, numGroup );
	void* h_aggResults;
	HOST_MALLOC(h_aggResults, sizeof(int)*numGroup );
	switch(OPERATOR){
	case REDUCE_MAX:
		{
			CL_agg_max_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU);
			break;
		}
	case REDUCE_MIN:
		{
			CL_agg_min_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU);
			break;
		}
	case REDUCE_SUM:
		{
			CL_agg_sum_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU);
			break;
		}
	case REDUCE_AVERAGE:
		{
			CL_agg_avg_afterGroupBy((Record *)h_Rout,rLen,h_startPos,numGroup,(Record *)h_Sin,(int *)h_aggResults,numThread,_CPU_GPU);
			break;
		}
	}
	validateAggAfterGroupBy((Record*) h_Rin, rLen, (int*)h_startPos, numGroup,(Record*) h_Sin, (int *)h_aggResults, OPERATOR);
}
コード例 #3
0
void testGroupByImpl( int rLen, int numThread, int numBlock)
{
	int _CPU_GPU=0;
	int memSize = sizeof(Record)*rLen;

	void* h_Rin;
	HOST_MALLOC(h_Rin, memSize );
	void* h_Rout;
	HOST_MALLOC(h_Rout, memSize );
 	generateRand((Record *)h_Rin, 64, rLen, 0 );
	int* h_startPos;

	int numGroup = 0;
	//group by
	numGroup=CL_GroupBy((Record *) h_Rin, rLen, (Record*) h_Rout, &h_startPos, numThread, numBlock,_CPU_GPU);
	//copy back
	validateGroupBy( (Record*)h_Rin, rLen, (Record*)h_Rout, h_startPos, numGroup );	
	HOST_FREE(h_startPos);
	HOST_FREE( h_Rin );
	HOST_FREE( h_Rout );
}
コード例 #4
0
void testScanImpl(int rLen)
{
	int _CPU_GPU=0;
	cl_event eventList[2];
	int index=0;
	cl_kernel Kernel; 
	int CPU_GPU;
	double burden;	
	int result=0;
	int memSize=sizeof(int)*rLen;
	int outSize=sizeof(int)*rLen;
	void *Rin;
	HOST_MALLOC(Rin, memSize);
	generateRandInt((int*)Rin, rLen,rLen,0);
	void *Rout;
	HOST_MALLOC(Rout, outSize);
	cl_mem d_Rin;
	CL_MALLOC(&d_Rin, memSize);
	cl_mem d_Rout;
	CL_MALLOC(&d_Rout, outSize);
	cl_writebuffer(d_Rin, Rin, memSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	ScanPara *SP;
	SP=(ScanPara*)malloc(sizeof(ScanPara));
	initScan(rLen,SP);
	scanImpl(d_Rin,rLen,d_Rout,&index,eventList,&Kernel,&CPU_GPU,&burden,SP,_CPU_GPU);	
	cl_readbuffer(Rout, d_Rout, outSize,&index,eventList,&CPU_GPU,&burden,_CPU_GPU);
	clWaitForEvents(1,&eventList[(index-1)%2]);
	closeScan(SP);
	deschedule(CPU_GPU,burden);
	//validateScan( (int*)Rin, rLen, (int*)Rout );
	HOST_FREE(Rin);
	HOST_FREE(Rout);
	CL_FREE(d_Rin);
	CL_FREE(d_Rout);
	clReleaseKernel(Kernel);  
	clReleaseEvent(eventList[0]);
	clReleaseEvent(eventList[1]);
}
コード例 #5
0
/**
 * ia_css_isys_device_open() - configure ISYS device
 */
int ia_css_isys_device_open(
	HANDLE *context,
	struct ia_css_isys_device_cfg_data *config
)
{
	unsigned int stream_handle;
	struct ia_css_isys_context *ctx;
	struct ia_css_fwctrl_devconfig device_config;
	int retval = 0;

	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_open() enter: void\n");
	assert(config != NULL);

	/*Make sure that the size of the cmd struct is as expected */
	COMPILATION_ERROR_IF( SIZE_OF_ISYSPOC_CMD_MSG_STRUCT != sizeof(ia_css_isyspoc_cmd_msg_t));

	assert(config->mipi.nof_blocks <= STREAM_ID_MAX);
	for (stream_handle = 0; stream_handle < config->mipi.nof_blocks; stream_handle++) {
		assert(config->mipi.block_size[stream_handle] > 0);
	}

	assert(config->pixel.nof_blocks <= STREAM_ID_MAX);
	for (stream_handle = 0; stream_handle < config->pixel.nof_blocks; stream_handle++) {
		assert(config->pixel.block_size[stream_handle] > 0);
	}

	ctx = (struct ia_css_isys_context *)HOST_MALLOC(sizeof(struct ia_css_isys_context));
	assert(ctx != NULL);
	if(ctx == NULL) {
		ia_css_debug_dtrace(IA_CSS_DEBUG_ERROR,
			"ia_css_isys_device_open(): Failed to allocate ctx memory\n");
		return ENOMEM;
	}
	memset(ctx, 0, sizeof(struct ia_css_isys_context));
	*context = (HANDLE)ctx;

	for (stream_handle = 0; stream_handle < STREAM_ID_MAX; stream_handle++) {
		ctx->stream_state_array[stream_handle] = IA_CSS_ISYS_STREAM_STATE_IDLE;
		ctx->stream_nof_output_pins[stream_handle] = 0;
	}

	device_config.firmware_address = config->driver_sys.firmware_address;
	retval = ia_css_fwctrl_device_open(&device_config);

	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_isys_device_open() return: return_err=%d\n", retval);
	return retval;
}