示例#1
0
文件: boltScan.c 项目: klois/SAMPO
void segmented_scan_init(size_t _wx, UINT maxN, icl_device *_dev, const char* build_options, icl_create_kernel_flag flag) {
/*
	: clw(clw), wx(wx),
	m0(0), m1(0), m2(0), m3(0),
	k0(0), k1(0), k2(0) {
*/
	dev = _dev;

	wx = _wx;

	// overapproximation for allocation, using maximum allowed size for n
	UINT numWorkGroups = ((maxN + wx - 1) / wx);
	UINT sizeScanBuff = ((numWorkGroups + wx -1) / wx) * wx;

	const UINT buf_1 = sizeof(UINT)*sizeScanBuff;

	preSumArray = icl_create_buffer(dev, CL_MEM_READ_WRITE, buf_1);
	postSumArray = icl_create_buffer(dev, CL_MEM_READ_WRITE, buf_1);
	keySumArray = icl_create_buffer(dev, CL_MEM_READ_WRITE, buf_1);

	perBlockScanByKey = icl_create_kernel(dev, "kernel/boltScan.cl", "perBlockScanByKey", build_options, flag);
	intraBlockInclusiveScanByKey = icl_create_kernel(dev, "kernel/boltScan.cl", "intraBlockInclusiveScanByKey", build_options, flag);
	perBlockAdditionByKey = icl_create_kernel(dev, "kernel/boltScan.cl", "perBlockAdditionByKey", build_options, flag);

	perBlockScanEvent = icl_create_event();
	intraBlockEvent = icl_create_event();
	perBlockAdditionEvent = icl_create_event();
#if TIMING
	perBlockScanTime = 0;
	intraBlockTime = 0;
	perBlockAdditionTime = 0;
	timer = icl_init_timer(ICL_MILLI);
#endif
}
示例#2
0
文件: vec_mul.c 项目: 8l/insieme
int main(int argc, char* argv[]) {
	int size = 1000;

	int* input1 = (int*)malloc(sizeof(int) * size);
	int* input2 = (int*) malloc(sizeof(int) * size);
	int* output = (int *)malloc(sizeof(int) * size);
	
	for(int i=0; i < size; ++i) {
		input1[i] = i;
		input2[i] = 1;
	}

#ifndef INSIEME
	icl_timer* time1 = icl_init_timer(ICL_SEC);
	icl_start_timer(time1);
#endif
	icl_init_devices(ICL_CPU);
#ifndef INSIEME
	printf("TIME for initialization: %f\n", icl_stop_timer(time1));
#endif
	
	if (icl_get_num_devices() != 0) {
		icl_device* dev = icl_get_device(0);

		icl_print_device_short_info(dev);
		icl_kernel* kernel = icl_create_kernel(dev, "vec_mul.cl", "vec_mul", "", ICL_SOURCE);
		
		icl_buffer* buf_input1 = icl_create_buffer(dev, CL_MEM_READ_ONLY, sizeof(int) * size);
		icl_buffer* buf_input2 = icl_create_buffer(dev, CL_MEM_READ_ONLY, sizeof(int) * size);
		icl_buffer* buf_output = icl_create_buffer(dev, CL_MEM_WRITE_ONLY, sizeof(int) * size);

		icl_event* wb1 = icl_create_event();
		icl_event* wb2 = icl_create_event();
		icl_event* rb = icl_create_event();

		icl_write_buffer(buf_input1, CL_FALSE, sizeof(int) * size, &input1[0], NULL, wb1);
		icl_write_buffer(buf_input2, CL_FALSE, sizeof(int) * size, &input2[0], NULL, wb2);
		
		size_t szLocalWorkSize = 256;
		float multiplier = size/(float)szLocalWorkSize;
		if(multiplier > (int)multiplier)
			multiplier += 1;
		size_t szGlobalWorkSize = (int)multiplier * szLocalWorkSize;

		icl_event* rk = icl_create_event();
		icl_event* wb_all = icl_create_event_list(2, wb1, wb2);	
		icl_run_kernel(kernel, 1, &szGlobalWorkSize, &szLocalWorkSize, wb_all, rk, 4,
											(size_t)0, (void *)buf_input1,
											(size_t)0, (void *)buf_input2,
											(size_t)0, (void *)buf_output,
											sizeof(cl_int), (void *)&size);
		
		icl_read_buffer(buf_output, CL_TRUE, sizeof(int) * size, &output[0], rk, rb);
		
		printf("Time wb1 %f\n", icl_profile_event(wb1, ICL_STARTED, ICL_FINISHED, ICL_SEC));		
		printf("Time wb2 %f\n", icl_profile_event(wb2, ICL_STARTED, ICL_FINISHED, ICL_SEC));
		printf("Time rk %f\n", icl_profile_event(rk, ICL_STARTED, ICL_FINISHED, ICL_SEC));
		printf("Time rb %f\n", icl_profile_event(rb, ICL_STARTED, ICL_FINISHED, ICL_SEC));
	
		icl_release_events(5, wb1, wb2, wb_all, rk, rb);
		icl_release_buffers(3, buf_input1, buf_input2, buf_output);
		icl_release_kernel(kernel);
	}
#ifndef INSIEME
	icl_restart_timer(time1);
#endif
	icl_release_devices();
#ifndef INSIEME
	printf("TIME for releasing the devices: %f\n", icl_stop_timer(time1));
	icl_release_timer(time1);
#endif
	
	// CHECK for output
	printf("======================\n= Vector Mul Done\n");
	unsigned int check = 1;
	for(unsigned int i = 0; i < size; ++i) {
		if(output[i] != i*size) {
			check = 0;
			printf("= fail at %d, expected %d / actual %d", i, i*3/2, output[i]);
			break;
		}
	}
	printf("= result check: %s\n======================\n", check ? "OK" : "FAIL");
	free(input1);
	free(input2);
	free(output);
}