Example #1
0
void segmented_scan_release() {
	icl_release_kernel(perBlockScanByKey);
	icl_release_kernel(intraBlockInclusiveScanByKey);
	icl_release_kernel(perBlockAdditionByKey);

	icl_release_buffers(3, preSumArray, postSumArray, keySumArray);

	icl_release_events(3, perBlockScanEvent, intraBlockEvent, perBlockAdditionEvent);

#if TIMING
	printf(" perBlockScanTime \t%f\n intraBlockTime \t%f\n perBlockAdditionTime \t%f\n", perBlockScanTime, intraBlockTime, perBlockAdditionTime);
	printf(" scan time %f\n", timer->current_time);
	icl_release_timer(timer);
#endif
}
Example #2
0
File: vec_mul.c Project: 8l/insieme
int main(int argc, char* argv[]) {
	int size = 1000;

	int* input1 = (int*)malloc(sizeof(int) * size);
	int* input2 = (int*) malloc(sizeof(int) * size);
	int* output = (int *)malloc(sizeof(int) * size);
	
	for(int i=0; i < size; ++i) {
		input1[i] = i;
		input2[i] = 1;
	}

#ifndef INSIEME
	icl_timer* time1 = icl_init_timer(ICL_SEC);
	icl_start_timer(time1);
#endif
	icl_init_devices(ICL_CPU);
#ifndef INSIEME
	printf("TIME for initialization: %f\n", icl_stop_timer(time1));
#endif
	
	if (icl_get_num_devices() != 0) {
		icl_device* dev = icl_get_device(0);

		icl_print_device_short_info(dev);
		icl_kernel* kernel = icl_create_kernel(dev, "vec_mul.cl", "vec_mul", "", ICL_SOURCE);
		
		icl_buffer* buf_input1 = icl_create_buffer(dev, CL_MEM_READ_ONLY, sizeof(int) * size);
		icl_buffer* buf_input2 = icl_create_buffer(dev, CL_MEM_READ_ONLY, sizeof(int) * size);
		icl_buffer* buf_output = icl_create_buffer(dev, CL_MEM_WRITE_ONLY, sizeof(int) * size);

		icl_event* wb1 = icl_create_event();
		icl_event* wb2 = icl_create_event();
		icl_event* rb = icl_create_event();

		icl_write_buffer(buf_input1, CL_FALSE, sizeof(int) * size, &input1[0], NULL, wb1);
		icl_write_buffer(buf_input2, CL_FALSE, sizeof(int) * size, &input2[0], NULL, wb2);
		
		size_t szLocalWorkSize = 256;
		float multiplier = size/(float)szLocalWorkSize;
		if(multiplier > (int)multiplier)
			multiplier += 1;
		size_t szGlobalWorkSize = (int)multiplier * szLocalWorkSize;

		icl_event* rk = icl_create_event();
		icl_event* wb_all = icl_create_event_list(2, wb1, wb2);	
		icl_run_kernel(kernel, 1, &szGlobalWorkSize, &szLocalWorkSize, wb_all, rk, 4,
											(size_t)0, (void *)buf_input1,
											(size_t)0, (void *)buf_input2,
											(size_t)0, (void *)buf_output,
											sizeof(cl_int), (void *)&size);
		
		icl_read_buffer(buf_output, CL_TRUE, sizeof(int) * size, &output[0], rk, rb);
		
		printf("Time wb1 %f\n", icl_profile_event(wb1, ICL_STARTED, ICL_FINISHED, ICL_SEC));		
		printf("Time wb2 %f\n", icl_profile_event(wb2, ICL_STARTED, ICL_FINISHED, ICL_SEC));
		printf("Time rk %f\n", icl_profile_event(rk, ICL_STARTED, ICL_FINISHED, ICL_SEC));
		printf("Time rb %f\n", icl_profile_event(rb, ICL_STARTED, ICL_FINISHED, ICL_SEC));
	
		icl_release_events(5, wb1, wb2, wb_all, rk, rb);
		icl_release_buffers(3, buf_input1, buf_input2, buf_output);
		icl_release_kernel(kernel);
	}
#ifndef INSIEME
	icl_restart_timer(time1);
#endif
	icl_release_devices();
#ifndef INSIEME
	printf("TIME for releasing the devices: %f\n", icl_stop_timer(time1));
	icl_release_timer(time1);
#endif
	
	// CHECK for output
	printf("======================\n= Vector Mul Done\n");
	unsigned int check = 1;
	for(unsigned int i = 0; i < size; ++i) {
		if(output[i] != i*size) {
			check = 0;
			printf("= fail at %d, expected %d / actual %d", i, i*3/2, output[i]);
			break;
		}
	}
	printf("= result check: %s\n======================\n", check ? "OK" : "FAIL");
	free(input1);
	free(input2);
	free(output);
}