void setExeEnvPiCalGMSP(cl_context *context, cl_uint *numDevices, cl_device_id **devices, cl_program *program,cl_uint *numPlatforms,cl_platform_id *platforms,cl_int *err)
	char            pbuff[100];   //holds platform information (platform name)
        char            dbuff[100];   //holds device information (platform name)
	int count;

	printf("\t---------------------------Device Deatils---------------------------\n\n");
        /*  Get the number of OpenCL Platforms Available */
        OPENCL_CHECK_STATUS("error while getting device info",*err);

        assert(((*devices)= (cl_device_id *) malloc( sizeof(cl_device_id ) *(*numDevices))) != NULL);
        *err = clGetDeviceIDs( *platforms, CL_DEVICE_TYPE_GPU, (*numDevices), *devices, 0);

        /* Get device Name */
        *err = clGetDeviceInfo(*devices[0], CL_DEVICE_NAME, sizeof(dbuff), &dbuff, NULL);
        OPENCL_CHECK_STATUS("error while getting device info",*err);
	printf("\tDevice used                              :  %s\n",dbuff);
	/*create context*/
	printf("\tNumber of GPU  devices used              :  %d\n\n",*numDevices);

        if ( *err != CL_SUCCESS || *context == 0)
                printf("\n\t No GPU detected ");
                printf("\n\t Context : %d , Err : %d",context, err);

	/*create program with source*/
	char* programSource = readKernelSource(pieCalKernelPath);
        size_t sourceSize =  strlen(programSource) ;
        *program = clCreateProgramWithSource(*context, 1,(const char **) &programSource, &sourceSize, err);

        OPENCL_CHECK_STATUS("error while creating program",*err);
	/*build program*/
        *err = clBuildProgram(*program,1,devices[0],NULL,NULL,NULL);
        OPENCL_CHECK_STATUS("error while building  program",*err);
int main()
	unsigned long int i, j; /* jokers */
	unsigned long long int k; /* another joker */
	int err;
	extern unsigned long int totalNodes; /* declared in loadNGCE.c */
	node *graph = parseNGCEoutput(GRAPH_PATH, &err); /* create graph */
	if  (graph == NULL)
		return EXIT_FAILURE;
	fprintf(stdout, "Took %4.2f seconds to load graph from file\n", timexGet());

#ifdef COMPUTE_C
	/* computing betweness centrality on cpu */
	fprintf(stdout, "Computing shortest paths and betweeness centrality on cpu, coded in c\n");
	signed long long int **computedSP = malloc(totalNodes * sizeof(signed long long int *));
	signed long long int pathLength;
	for (i = 0; i < totalNodes; i++)
		computedSP[i] = malloc(totalNodes * sizeof(signed long long int)); /* initialization */
		for (j = 0; j < totalNodes; j++)
			if (i == j)
				computedSP[i][j] = 0; /* 0 distance to self */
				computedSP[i][j] = -1; /* unexplored path */
	for (i = 0; i < totalNodes; i++)
		for (j = 0; j < totalNodes; j++)
			if (computedSP[i][j] == -1) /* not known */
				if ((graph[i].peerCount == 0) || (graph[j].peerCount ==0))
				{} /* oops. no connection */
					pathLength = shortestPath(&graph[i], &graph[j], graph); /* calculate */
				computedSP[i][j] = pathLength; /* set this */
				computedSP[j][i] = pathLength; /* and reverse to result */
	fprintf(stdout, "Took %2f seconds to compute shortest paths and betweeness centrality\n", timexGet());
		for (i = 0; i < totalNodes; i++)
			fprintf(stdout, "Node %4lu has a betweeness centrality of %4lu\n", graph[i].name, graph[i].spCount);
	/* creating all shortest paths-pairs */
	k = 0;
	pair *all = malloc(totalNodes * 3 * sizeof(pair));
	for (i = 0; i < totalNodes; i++)
		for (j = 0; j < totalNodes; j++, k++)
			all[k].a = i;
			all[k].b = j;
	/* choosing compute device */
	cl_device_id *devID = malloc(sizeof(cl_device_id));
	devID = availClDevices(&err);
		clDeviceSpecs info;
		err = getDeviceInfo(&info, devID);
		fprintf(stdout, "%s:%d: Working on: %s %s. Memory: %lluM. Work items: %zu. Compute units: %d. Local memory (cache): %lluM. Maximum malloc: %lluM\n",
			__FILE__, __LINE__, info.vendor, info.name, info.memory/1024/1024, info.workGroupSize, info.compUnits, info.localMemory/1024/1024, info.maxMalloc/1024/1024);
	/* demo loading kernel source from text file */
	char *fnSource = readKernelSource(KERNEL_FILE, &err);
	char **kernelSource = malloc(sizeof(char *)); /* opencl bs */
	kernelSource[0] = fnSource;
	if (err != READSOURCE_OK)
		return EXIT_FAILURE;
		fprintf(stdout, "%s:%d: Kernel source code following:\n%s\n", __FILE__, __LINE__, fnSource);
	/* the context */
	cl_context context;
	context = clCreateContext(0, 1, devID, NULL, NULL, &err);
	if (err != CL_SUCCESS)
		fprintf(stderr, "%s:%d: Error creating context: %s\n", __FILE__, __LINE__, strerrorCL(&err));
		return EXIT_FAILURE;

	/* the command queue */
	cl_command_queue commands;
	commands = clCreateCommandQueue(context, *devID, 0, &err);
    if (!commands)
        fprintf(stderr, "%s:%d: Error creating command queue: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;

	/* the actual program, checking */
	cl_program program;
	program = clCreateProgramWithSource(context, 1, (const char **) kernelSource, NULL, &err);
    if (!program)
        fprintf(stderr, "%s:%d: Error creating program: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
	/* the actual program, building */
	err = clBuildProgram(program, 1, devID, NULL, NULL, NULL); /* clBuildProgam(program, 0, NULL, NULL, NULL, NULL); */
    if (err != CL_SUCCESS)
        size_t len;
        char buffer[2048];
        fprintf(stderr, "%s:%d: Error building program executable: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        clGetProgramBuildInfo(program, *devID, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        fprintf(stderr, "%s:%d: Build log following: %s\n", __FILE__, __LINE__, buffer);
        return EXIT_FAILURE;
	/* the kernel */
	cl_kernel kernel;
	kernel = clCreateKernel(program, "betweeness", &err);
	/* cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) */	 
    if (!kernel || err != CL_SUCCESS)
        fprintf(stderr, "%s:%d: Error creating compute kernel: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;

	cl_mem inputGraph;
	cl_mem inputPairs;
    cl_mem outputLengths;
	size_t inputGraphSize = sizeof(node) * totalNodes;
	size_t inputPairsSize = sizeof(pair) * totalNodes * 3;
	size_t outputLengthsSize = sizeof(unsigned long int) * totalNodes;
	inputGraph = clCreateBuffer(context,  CL_MEM_READ_ONLY,  inputGraphSize, NULL, NULL);
	inputPairs = clCreateBuffer(context, CL_MEM_READ_ONLY, inputPairsSize, NULL, NULL);
    outputLengths = clCreateBuffer(context, CL_MEM_WRITE_ONLY, outputLengthsSize, NULL, NULL);
    if (!inputGraph || !inputPairs || !outputLengths)
        fprintf(stderr, "%s:%d: Error allocating device memory: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
    err = clEnqueueWriteBuffer(commands, inputGraph, CL_TRUE, 0, inputGraphSize, graph, 0, NULL, NULL);
    if (err != CL_SUCCESS)
        fprintf(stderr, "%s:%d: Error writing to source array: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
	err = clEnqueueWriteBuffer(commands, inputPairs, CL_TRUE, 0, inputPairsSize, all, 0, NULL, NULL);
    if (err != CL_SUCCESS)
        fprintf(stderr, "%s:%d: Error writing to source array: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
    err = 0;
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputGraph);
	err  = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputPairs);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputLengths);
    err |= clSetKernelArg(kernel, 3, sizeof(unsigned long int), &totalNodes);
    if (err != CL_SUCCESS)
        fprintf(stderr, "%s:%d: Error setting kernela arguments: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
	size_t local;
    err = clGetKernelWorkGroupInfo(kernel, *devID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
    if (err != CL_SUCCESS)
        fprintf(stderr, "%s:%d: Error retrieving kernel work group info: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
    unsigned long long int global = totalNodes * 3;
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
    if (err)
		fprintf(stderr, "%s:%d: Error executing kernel: %s\n", __FILE__, __LINE__, strerrorCL(&err));
        return EXIT_FAILURE;
	unsigned long int *betw = malloc(totalNodes * sizeof(unsigned long int));
    err = clEnqueueReadBuffer( commands, outputLengths, CL_TRUE, 0, sizeof(unsigned long int) * totalNodes, betw, 0, NULL, NULL );  
    if (err != CL_SUCCESS)
        printf("Error: Failed to read output array! %d\n", err);
	for (i = 0; i < totalNodes; i++)
		printf("computed a betweeness centrality of: %lu\n", betw[i]);
//	free(computedSP);
void setExeEnvMatTransposeGMSP(cl_context *context, cl_uint *numDevices, cl_device_id **devices, cl_program *program,cl_uint *numPlatforms,cl_platform_id *selectedPlatform,cl_int *err)
        char            pbuff[100];   //holds platform information (platform name)
        char            dbuff[100];   //holds device information (platform name)
        cl_platform_id *platforms;
        int count;
	printf("\t---------------------------Device Deatils------------------------------\n\n");
         /*  Get the number of OpenCL Platforms Available */
        *err = clGetPlatformIDs ( 0, 0, numPlatforms);
        if( *err != CL_SUCCESS || *numPlatforms == 0) {
                printf(" \n\t\t No Platform Found \n");
                if( *numPlatforms == 0)
                        printf(" \n\t\t No Platform Found \n");
                        /* Allocate the space for available platform*/
                        assert( (platforms = (cl_platform_id *) malloc( sizeof(cl_platform_id) * (*numPlatforms))) != NULL);
                        /*  Get available OpenCL Platforms IDs*/
                        *err = clGetPlatformIDs( *numPlatforms, platforms, NULL);
                        OPENCL_CHECK_STATUS(" Failed to get Platform IDs",*err);
                        for ( count = 0 ; count < *numPlatforms ; count++)
                                /* get platform info*/
                                OPENCL_CHECK_STATUS ("clGetPlatformInfo Failed ",*err);
                                /* get device id and info*/
                                *err = clGetDeviceIDs( platforms[count], CL_DEVICE_TYPE_GPU,0,0,numDevices);
                                if( *err != CL_SUCCESS  || *numDevices == 0)
                                       assert(((*devices)= (cl_device_id *) malloc( sizeof(cl_device_id ) *(*numDevices))) != NULL);
                                        *err = clGetDeviceIDs( platforms[count], CL_DEVICE_TYPE_GPU, (*numDevices), *devices, 0);
                                        /* get selected platform*/
                                        printf("\tPlatform used                            :  %s\n",pbuff);

        /* Get device Name */
        *err = clGetDeviceInfo(*devices[0], CL_DEVICE_NAME, sizeof(dbuff), &dbuff, NULL);
        OPENCL_CHECK_STATUS("error while getting device info",*err);
        printf("\tDevice used                              :  %s\n",dbuff);

        /*create context*/
        printf("\tNumber of GPU  devices used              :  %d\n",*numDevices);
        if ( *err != CL_SUCCESS || *context == 0)
                printf("\n\t No GPU detected ");
                printf("\n\t Context : %d , Err : %d",context, *err);

        /*create program with source*/
	 char* programSource = readKernelSource(transposeMatGlobalMemKernelPath);
        size_t sourceSize =  strlen(programSource) ;
	 *program = clCreateProgramWithSource(*context, 1,(const char **) &programSource, &sourceSize, err);
       // *program = clCreateProgramWithSource(*context,1,(const char **) &ProgramSourceMatMatAdd,NULL,err );
        OPENCL_CHECK_STATUS("error while creating program",*err);

        /*build program*/
        *err = clBuildProgram(*program,1,devices[0],NULL,NULL,NULL);
        OPENCL_CHECK_STATUS("error while building  program",*err);
