Esempio n. 1
0
int
main (int argc, const char **argv)
{
    OclPlatform *ocl;
    cl_int errcode;
    cl_program program;
    const char *inname;
    size_t asize;
    size_t *sizes;
    char **binaries;

    if (argc < 2) {
        printf ("Usage: dump-opencl-binary input.cl\n");
        exit (0);
    }

    inname = argv[1];

    ocl = ocl_new (0, CL_DEVICE_TYPE_GPU);
    asize = sizeof (size_t) * ocl_get_num_devices (ocl);
    sizes = malloc (asize);
    binaries = malloc (sizeof (char *) * ocl_get_num_devices (ocl));

    program = ocl_create_program_from_file (ocl, inname, NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARY_SIZES, asize, sizes, NULL));
    
    for (int i = 0; i < ocl_get_num_devices (ocl); i++)
        binaries[i] = malloc (sizes[i]);

    OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARIES, 0, binaries, NULL));

    for (int i = 0; i < ocl_get_num_devices (ocl); i++) {
        char fname[256];
        FILE *fp;

        snprintf (fname, 256, "%s.%i", inname, i);
        fp = fopen (fname, "wb");
        fwrite (binaries[0], sizes[i], 1, fp);
        fclose (fp);
    }

    OCL_CHECK_ERROR (clReleaseProgram (program));

    free (sizes);
    ocl_free (ocl);
}
Esempio n. 2
0
File: main.c Progetto: tfarago/mgpu
int main(int argc, char *argv[])
{
    static Settings settings = {
        .num_images = -1,
        .width = 1024,
        .height = 1024,
        .do_profile = FALSE
    };

    static GOptionEntry entries[] = {
        { "num-images", 'n', 0, G_OPTION_ARG_INT, &settings.num_images, "Number of images", "N" },
        { "width", 'w', 0, G_OPTION_ARG_INT, &settings.width, "Width of imags", "W" },
        { "height", 'h', 0, G_OPTION_ARG_INT, &settings.height, "Height of images", "H" },
        { "enable-profiling", 'n', 0, G_OPTION_ARG_NONE, &settings.do_profile, "Enable profiling", NULL },
        { NULL }
    };

    GOptionContext *context;
    opencl_desc *ocl;
    Benchmark *benchmark;
    GError *error = NULL;

    context = g_option_context_new (" - test multi GPU performance");
    g_option_context_add_main_entries (context, entries, NULL);

    if (!g_option_context_parse (context, &argc, &argv, &error)) {
        g_print ("Option parsing failed: %s\n", error->message);
        return 1;
    }

    g_print("## %s@%s\n", g_get_user_name(), g_get_host_name());

    g_thread_init (NULL);

    ocl = ocl_new (settings.do_profile);
    benchmark = setup_benchmark (ocl, &settings);

    measure_benchmark ("Single GPU", execute_single_gpu, benchmark);
    measure_benchmark ("Single Threaded, Multi GPU", execute_multi_gpu_single_thread, benchmark);
    measure_benchmark ("Multi Threaded, Multi GPU", execute_multi_gpu_multi_thread, benchmark);

    teardown_benchmark(benchmark);

    ocl_free(ocl);
    return 0;
}
Esempio n. 3
0
int
main (void)
{
    OclPlatform *ocl;
    Data *data;

    ocl = ocl_new (0, CL_DEVICE_TYPE_ALL);

    if (ocl == NULL)
        return 1;

    data = setup_data (ocl, 4096 * 2048);

    run_benchmark (setup_single_blocking_queue, "Single blocking queue: %fs\n", data);
    run_benchmark (setup_ooo_queue, "Single out-of-order queue: %fs\n", data);
    run_benchmark (setup_two_queues, "Two queues: %fs\n", data);
    run_benchmark (setup_three_queues, "Three queues: %fs\n", data);

    free_data (data);
    ocl_free (ocl);

    return 0;
}
Esempio n. 4
0
OclPlatform *
ocl_new_with_queues (unsigned platform,
                     cl_device_type type,
                     cl_command_queue_properties queue_properties)
{
    OclPlatform *ocl;
    cl_int errcode;

    ocl = ocl_new (platform, type);

    if (ocl == NULL)
        return NULL;

    ocl->own_queues = 1;
    ocl->cmd_queues = (_cl_command_queue**) malloc (ocl->num_devices * sizeof(cl_command_queue));

    for (cl_uint i = 0; i < ocl->num_devices; i++) {
        ocl->cmd_queues[i] = clCreateCommandQueue (ocl->context, ocl->devices[i],
                                                   queue_properties, &errcode);
        OCL_CHECK_ERROR (errcode);
    }

    return ocl;
}
Esempio n. 5
0
int main(int argc, char* argv[])
{



		const size_t SIZE_execution_bit = (input_length - 3*filter_length +1);
		const size_t SIZE_input_bit = sizeof(gint32)*(input_length+1);
		const size_t SIZE_settings_bit = sizeof(gint32)*4;

		size_t output_bit_on_counts;
		size_t* SIZE_execution_pointer = &SIZE_execution_bit;

		gint32* filtersettings = (gint32*) malloc(SIZE_settings_bit);
		gint32* input_vector = (gint32*) malloc(SIZE_input_bit);
		gint32* positions = (gint32*) malloc(SIZE_input_bit);

		filtersettings[0] = filter_length;
		filtersettings[1] = threshhold;
		filtersettings[2] = input_length;
		filtersettings[3] = 0;



		//GPU-Init
		ocl = ocl_new(CL_DEVICE_TYPE_GPU,1);
		context = ocl_get_context(ocl);
		queue = ocl_get_cmd_queues (ocl)[0];
		clFinish(queue);

		program = ocl_create_program_from_file(ocl, "edel_kernel_secondder.cl", NULL, &errcode);
		OCL_CHECK_ERROR(errcode);

		filter1 = clCreateKernel(program, "second_filter", &errcode);
		OCL_CHECK_ERROR(errcode);

		//GPU-Buffer which can be done before the Computation
		settings = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, SIZE_settings_bit, filtersettings, &errcode);
		OCL_CHECK_ERROR(errcode);

		input = clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE_input_bit, NULL, &errcode);
		OCL_CHECK_ERROR(errcode);


		if(debugmode != 0)
		{
			srand((unsigned) time( NULL ));
			counter = rand_rects(expected,1,input_length,3*filter_length,3*filter_length,3*filter_length,peak_length,base+peak, input_vector, noise, base, 0,positions);
			if(harddebug != 0)
			{
				for(i = 0; i < input_length;i++)
				{
					if(input_length < 10000)
					{
						printf("input_vector[%i] = %d\n",i,input_vector[i]);
					}
					else
					{
						printf("input_vector[%i] = %d\t",i,input_vector[i]);
					}
				}
			}

			printf("\n counts = %d\n", counter);
			printf("%lu Bits needed for Output-Vector \n", output_bit_on_counts);

		}

		output_bit_on_counts = sizeof(gint32) * safetyfactor * 2*((counter + 2));

		clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, SIZE_input_bit, input_vector, 0, NULL, NULL);

		gint32* energy_time = (gint32*)malloc(output_bit_on_counts);


		for(i = 0; i < safetyfactor * (2*counter+2); i++)
		{
			energy_time[i] = -9999;
		}


		output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, output_bit_on_counts, NULL , &errcode);
		OCL_CHECK_ERROR(errcode);


		OCL_CHECK_ERROR(clSetKernelArg(filter1, 0, sizeof(cl_mem), &input));
		OCL_CHECK_ERROR(clSetKernelArg(filter1, 1, sizeof(cl_mem), &output));
		OCL_CHECK_ERROR(clSetKernelArg(filter1, 2, sizeof(cl_mem), &settings));

		size_t local_item_size;
		size_t global_item_size = (size_t) (input_length - 3*filter_length +1);

		local_item_size = ocl_get_local_size(global_item_size, 2,1);

		             
                if(debugmode != 0)
                {
                        printf("local item size = %lu \n %lu", &local_item_size, local_item_size);
                        if(local_item_size != 0)
                        {
                              printf("This works because you divide %lu / %lu \n and this is %lu", global_item_size,local_item_size, global_item_size/local_item_size);
                        }
                        else
                        {
                              	FILE* attention;
				attention = fopen("filterlengthbad", "a+");
				if(attention == NULL)
				{
					printf("error in opening debug file \n");
					exit(1);
				}
				fprintf(attention, "The filterlength %d is not good for this filter, choose another filterlength ! \n", filter_length);
				fclose(attention);
				printf("There is no way to fit it evenly divided to workgroups, just let OpenCL do it \n");
                        }
                        if(harddebug != 0)
                        {
                                getchar();
                        }

                }


		if(local_item_size == 0)
		{
			OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, NULL, 0, NULL, NULL));	
		}
		else
		{
			OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL));
		}


		
		
		//local_item_size = NULL;	
		clEnqueueReadBuffer(queue, output, CL_TRUE, 0, output_bit_on_counts, energy_time, 0, NULL, NULL);
		clEnqueueReadBuffer(queue, settings, CL_TRUE, 0, SIZE_settings_bit, filtersettings, 0, NULL, NULL);


		//Writing back the data
		for(i = 0; i < filtersettings[3]; i++)
		{
			writing_back(filemode, filename, filename_e,filename_t, energy_time,i);
		}

		if(debugmode != 0)
		{
			printf("The Positions are:\n");
			for(i=0; i < counter; i++)
			{
				printf("%d\t", positions[i]);
				printf("note that this postion is the middle of the rect \n");
			}
		}
		//Safetychanges
		if(filtersettings[3] > counter)
		{
			safetyfactor = safetyfactor + 5*(filtersettings[3] - counter);
			if(safetyfactor <= 0)
			{
				safetyfactor = 10;
			}

			notexpect = filtersettings[3] - expected;
			if(safemode != 0 && notexpect >= notexpect_max)
			{
				printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected.\n", expected, notexpect);
				printf("Safemode is on. Exit program \n");
				OCL_CHECK_ERROR(clReleaseMemObject(input));
				OCL_CHECK_ERROR(clReleaseMemObject(output));
				OCL_CHECK_ERROR(clReleaseMemObject(settings));
				OCL_CHECK_ERROR(clReleaseKernel(filter1));
				OCL_CHECK_ERROR(clReleaseProgram(program));

				ocl_free(ocl);

				free(input_vector);
				free(energy_time);
				free(positions);
				free(filtersettings);

			}
			else
			{
				printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected \n", expected, notexpect);
			}
		}






		OCL_CHECK_ERROR(clReleaseMemObject(input));
		OCL_CHECK_ERROR(clReleaseMemObject(output));
		OCL_CHECK_ERROR(clReleaseMemObject(settings));
		OCL_CHECK_ERROR(clReleaseKernel(filter1));
		OCL_CHECK_ERROR(clReleaseProgram(program));

		ocl_free(ocl);
		free(input_vector);
		free(energy_time);
		free(positions);
		free(filtersettings);




}