Exemple #1
0
int main(int argc, char **argv) {
  wbArg_t args;
  int inputLength;
  float *hostInput1;
  float *hostInput2;
  float *hostOutput;

  args = wbArg_read(argc, argv);

  wbTime_start(Generic, "Importing data and creating memory on host");
  hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
  hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
  hostOutput = (float *)malloc(inputLength * sizeof(float));
  wbTime_stop(Generic, "Importing data and creating memory on host");

  wbLog(TRACE, "The input length is ", inputLength);

	int nbGangs = (inputLength-1)/BLOCK_SIZE + 1;
	
	wbLog(TRACE, "Run ", nbGangs, " gangs of ", BLOCK_SIZE, " workers");
	
	#pragma acc parallel loop copyin(hostInput1[0:inputLength]) copyin(hostInput2[0:inputLength]) copyout(hostOutput[0:inputLength])  num_gangs(nbGangs) num_workers(BLOCK_SIZE)
	for( int cpt = 0; cpt < inputLength; cpt++ )
		hostOutput[cpt] = hostInput1[cpt] + hostInput2[cpt];

  wbSolution(args, hostOutput, inputLength);

  free(hostInput1);
  free(hostInput2);
  free(hostOutput);

  return 0;
}
int main(int argc, char **argv) {
  wbArg_t args;
  int inputLength;
  float *hostInput1;
  float *hostInput2;
  float *hostOutput;
  //float *deviceInput1;
  //float *deviceInput2;
  //float *deviceOutput;

  args = wbArg_read(argc, argv);

  wbTime_start(Generic, "Importing data and creating memory on host");
  hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
  hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
  hostOutput = (float *)malloc(inputLength * sizeof(float));
  wbTime_stop(Generic, "Importing data and creating memory on host");

  wbLog(TRACE, "The input length is ", inputLength);
	vecadd(hostInput1, hostInput2, hostOutput, inputLength);
  wbSolution(args, hostOutput, inputLength);

  free(hostInput1);
  free(hostInput2);
  free(hostOutput);

  return 0;
}
Exemple #3
0
wbBool wbSolution(wbArg_t arg, void * data, int rows, int columns) {
    int ii;
    char * type;
    wbBool res;
    json_t * msg;
    char * expectedOutputFile;
    char * outputFile;
    json_t * inputFileArray;

    expectedOutputFile = wbArg_getExpectedOutputFile(arg);
    outputFile = wbArg_getOutputFile(arg);
    type = wbArg_getType(arg);

    wbAssert(type != NULL);
    wbAssert(expectedOutputFile != NULL);
    wbAssert(outputFile != NULL);

    res = wbSolution(expectedOutputFile, outputFile, type, data, rows, columns);

#if 1
    if (res) {
        _solution_correctQ = json_object();

        msg = json_string("Solution is correct.");

        json_object_set(_solution_correctQ, "CorrectQ", json_true());
        json_object_set(_solution_correctQ, "Message", msg);
    } else {
        msg = _solution_correctQ;
        _solution_correctQ = json_object();
        json_object_set(_solution_correctQ, "CorrectQ", json_false());
        json_object_set(_solution_correctQ, "Message", msg);
    }
#else
    if (res) {
        _solution_correctQ = json_true();
    } else {
        _solution_correctQ = json_false();
    }
#endif

    inputFileArray = json_array();

    for (ii = 0; ii < wbArg_getInputCount(arg); ii++) {
        char * file = wbArg_getInputFile(arg, ii);
        if (file != NULL) {
            json_array_append(inputFileArray, json_string(file));
        }
    }

    /*
    json_object_set(_solution_correctQ, "InputFiles", inputFileArray);
    json_object_set(_solution_correctQ, "ExpectedOutput", json_string(expectedOutputFile));
    json_object_set(_solution_correctQ, "OutputFile", json_string(outputFile));
    */

    return res;
}
Exemple #4
0
void wbArg_delete(wbArg_t arg) {
  if (wbArg_getInputCount(arg) > 0 && wbArg_getInputFiles(arg) != NULL) {
    int ii;
    for (ii = 0; ii < wbArg_getInputCount(arg); ii++) {
      wbDelete(wbArg_getInputFile(arg, ii));
    }
    wbDelete(wbArg_getInputFiles(arg));
    wbArg_setInputCount(arg, 0);
    wbArg_setInputFiles(arg, NULL);
  }
  if (wbArg_getOutputFile(arg)) {
    wbDelete(wbArg_getOutputFile(arg));
    wbArg_setOutputFile(arg, NULL);
  }
  if (wbArg_getExpectedOutputFile(arg)) {
    wbDelete(wbArg_getExpectedOutputFile(arg));
    wbArg_setExpectedOutputFile(arg, NULL);
  }
  if (wbArg_getType(arg)) {
    wbDelete(wbArg_getType(arg));
    wbArg_setType(arg, NULL);
  }
  return;
}
int main(int argc, char **argv) {
    wbArg_t args;
    int inputLength;
    float *hostInput1;
    float *hostInput2;
    float *hostOutput;
    float *deviceInput1;
    float *deviceInput2;
    float *deviceOutput;

    cl_context clctx;
    cl_context_properties properties[3];
    cl_program program;

    cl_device_id device_id;
    cl_uint num_of_platforms;
    cl_uint num_of_devices;
    cl_command_queue command_queue;
    cl_kernel kernel;
    cl_int clerr = CL_SUCCESS;

    args = wbArg_read(argc, argv);

    wbTime_start(Generic, "Importing data and creating memory on host");
    hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
    hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
    hostOutput = (float *)malloc(inputLength * sizeof(float));
    wbTime_stop(Generic, "Importing data and creating memory on host");

    wbLog(TRACE, "The input length is ", inputLength);
    for (int i = 0; i < inputLength; i++)
    {
        printf("position:%d [%f, %f]\n", i, hostInput1[i], hostInput2[i]);
    }

    /*printf("CL_INVALID_PROGRAM: %d\n", CL_INVALID_PROGRAM);
    printf("CL_INVALID_VALUE: %d\n", CL_INVALID_VALUE);
    printf("CL_INVALID_CONTEXT: %d \n", CL_INVALID_CONTEXT);
    */
    wbTime_start(GPU, "Allocating GPU memory.");
    //@@ Allocate GPU memory here
    //get number of platforms
    if(clGetPlatformIDs(0, NULL, &num_of_platforms) != CL_SUCCESS)
    {
        printf("unable to get number of platforms\n");
        return 1;
    }

    //now get all the platforms
    cl_platform_id platform[num_of_platforms];
    if (clGetPlatformIDs(num_of_platforms, platform, NULL)!= CL_SUCCESS)
    {
        printf("Unable to get platform_id\n");
        return 1;
    }

    // try to get a supported GPU device
    /*if (clGetDeviceIDs(&platform, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices) != CL_SUCCESS)
    {
    	printf("Unable to get device_id\n");
    	return 1;
    }*/

    properties[0]= (cl_context_properties) CL_CONTEXT_PLATFORM;
    properties[1]= (cl_context_properties) platform[0];
    properties[2]= (cl_context_properties) 0;

    clctx = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &clerr);
    if(clerr != CL_SUCCESS)
    {
        printf("error creating context.\n");
        return 1;
    }

    size_t stuff;
    clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0, NULL, &stuff);
    if(clerr != CL_SUCCESS)
    {
        printf("error getting context info.\n");
        return 1;
    }

    cl_device_id* cldevs = (cl_device_id *) malloc(stuff);
    clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, stuff, cldevs, NULL);
    if(clerr != CL_SUCCESS)
    {
        printf("could not get context infor.\n");
        return 1;
    }

    command_queue = clCreateCommandQueue(clctx, cldevs[0], 0, &clerr);
    if (clerr != CL_SUCCESS)
    {
        printf("could not create command queue\n");
        return 1;
    }

    program = clCreateProgramWithSource(clctx, 1, &vaddsrc, NULL, &clerr);
    printf("create program function: %d\n", clerr);
    //char clcompileflags[4096];
    //sprintf(clcompileflags, "-cl-mad-enable");

    clerr = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if( clerr != CL_SUCCESS)
    {

        printf("unable to build program.%d \n", clerr);
        return 1;
    }

    kernel = clCreateKernel(program, "vadd", &clerr);


    cl_mem d_A, d_B, d_C;
    int mem_size = inputLength*sizeof(float);
    d_A = clCreateBuffer(clctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, hostInput1, &clerr);
    d_B = clCreateBuffer(clctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, hostInput2, &clerr);
    d_C = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY, mem_size, NULL, &clerr);
    //printf("%f", d_A[0]);

    wbTime_stop(GPU, "Allocating GPU memory.");

    wbTime_start(GPU, "Copying input memory to the GPU.");

    //@@ Copy memory to the GPU here

    //pritnf("%f", &d_A[0]);
    wbTime_stop(GPU, "Copying input memory to the GPU.");

    //@@ Initialize the grid and block dimensions here
    //size_t size = inputLength;
    const size_t grid = (inputLength - 1)/512+1;
    const size_t block = 512;


    wbTime_start(Compute, "Performing CUDA computation");
    //@@ Launch the GPU Kernel here

    kernel = clCreateKernel(program, "vadd", NULL);
    clerr = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_A);
    clerr = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_B);
    clerr = clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_C);
    clerr = clSetKernelArg(kernel, 3, sizeof(size_t), &inputLength);

    cl_event event = NULL;
    clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &grid, &block, 0, NULL, NULL);
    //clFinish(command_queue);
    clerr = clWaitForEvents(1, &event);
    clerr = clReleaseEvent(event);
    cudaThreadSynchronize();
    wbTime_stop(Compute, "Performing CUDA computation");

    wbTime_start(Copy, "Copying output memory to the CPU");
    //@@ Copy the GPU memory back to the CPU here
    clEnqueueReadBuffer(command_queue, d_C, CL_TRUE, 0, mem_size, hostOutput, 0, NULL, NULL);

    for(int i = 0; i < inputLength; i++)
    {
        //hostOutput[i] = hostInput2[i] + hostInput1[i];
        printf("at pos:%d, [%f]\n", i, hostOutput[i]);
    }


    wbSolution(args, hostOutput, inputLength);
    wbTime_start(GPU, "Freeing GPU Memory");
    //@@ Free the GPU memory here
    clReleaseMemObject(d_A);
    clReleaseMemObject(d_B);
    clReleaseMemObject(d_C);

    wbTime_stop(GPU, "Freeing CPU Memory");



    free(hostInput1);
    free(hostInput2);
    free(hostOutput);

    return 0;
}