int main(int argc, char **argv) { wbArg_t args; int inputLength; float *hostInput1; float *hostInput2; float *hostOutput; args = wbArg_read(argc, argv); wbTime_start(Generic, "Importing data and creating memory on host"); hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength); hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength); hostOutput = (float *)malloc(inputLength * sizeof(float)); wbTime_stop(Generic, "Importing data and creating memory on host"); wbLog(TRACE, "The input length is ", inputLength); int nbGangs = (inputLength-1)/BLOCK_SIZE + 1; wbLog(TRACE, "Run ", nbGangs, " gangs of ", BLOCK_SIZE, " workers"); #pragma acc parallel loop copyin(hostInput1[0:inputLength]) copyin(hostInput2[0:inputLength]) copyout(hostOutput[0:inputLength]) num_gangs(nbGangs) num_workers(BLOCK_SIZE) for( int cpt = 0; cpt < inputLength; cpt++ ) hostOutput[cpt] = hostInput1[cpt] + hostInput2[cpt]; wbSolution(args, hostOutput, inputLength); free(hostInput1); free(hostInput2); free(hostOutput); return 0; }
int main(int argc, char **argv) { wbArg_t args; int inputLength; float *hostInput1; float *hostInput2; float *hostOutput; //float *deviceInput1; //float *deviceInput2; //float *deviceOutput; args = wbArg_read(argc, argv); wbTime_start(Generic, "Importing data and creating memory on host"); hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength); hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength); hostOutput = (float *)malloc(inputLength * sizeof(float)); wbTime_stop(Generic, "Importing data and creating memory on host"); wbLog(TRACE, "The input length is ", inputLength); vecadd(hostInput1, hostInput2, hostOutput, inputLength); wbSolution(args, hostOutput, inputLength); free(hostInput1); free(hostInput2); free(hostOutput); return 0; }
static wbBool wbSolution_correctQ(char *expectedOutputFile, wbSolution_t sol) { if (expectedOutputFile == NULL) { _solution_correctQ = "Failed to determined the expected output file."; return wbFalse; } else if (!wbFile_existsQ(expectedOutputFile)) { _solution_correctQ = wbString("The file ", expectedOutputFile, " does not exist."); return wbFalse; } else if (wbString_sameQ(wbSolution_getType(sol), "image")) { wbBool res; wbImage_t solutionImage = NULL; wbImage_t expectedImage = wbImport(expectedOutputFile); if (expectedImage == NULL) { _solution_correctQ = "Failed to open expected output file."; res = wbFalse; } else if (wbImage_getWidth(expectedImage) != wbSolution_getWidth(sol)) { _solution_correctQ = "The image width of the expected image does not " "match that of the solution."; res = wbFalse; } else if (wbImage_getHeight(expectedImage) != wbSolution_getHeight(sol)) { _solution_correctQ = "The image height of the expected image does not " "match that of the solution."; res = wbFalse; } else if (wbImage_getChannels(expectedImage) != wbSolution_getChannels(sol)) { _solution_correctQ = "The image channels of the expected image does not " "match that of the solution."; res = wbFalse; } else { solutionImage = (wbImage_t)wbSolution_getData(sol); wbAssert(solutionImage != NULL); res = wbImage_sameQ(solutionImage, expectedImage, _onUnsameImageFunction); } if (expectedImage != NULL) { wbImage_delete(expectedImage); } return res; } else if (wbString_sameQ(wbSolution_getType(sol), "histogram")) { return wbSolution_listCorrectQ<unsigned char>(expectedOutputFile, sol, "Integer"); } else if (wbString_sameQ(wbSolution_getType(sol), "integral_vector")) { return wbSolution_listCorrectQ<int>(expectedOutputFile, sol, "Integer"); } else if (wbString_sameQ(wbSolution_getType(sol), "vector") || wbString_sameQ(wbSolution_getType(sol), "matrix")) { return wbSolution_listCorrectQ<wbReal_t>(expectedOutputFile, sol, "Real"); } else { wbAssert(wbFalse); return wbFalse; } }
static wbBool wbSolution_correctQ(char * expectedOutputFile, wbSolution_t sol) { wbBool res; if (expectedOutputFile == NULL) { _solution_correctQ = json_string("Failed to determined the expected output file."); return wbFalse; } else if (!wbFile_existsQ(expectedOutputFile)) { string str = wbString("The file ", expectedOutputFile, " does not exist."); _solution_correctQ = json_string(str.c_str()); return wbFalse; } else if (wbString_sameQ(wbSolution_getType(sol), "image")) { wbImage_t solutionImage = NULL; wbImage_t expectedImage = wbImport(expectedOutputFile); if (expectedImage == NULL) { _solution_correctQ = json_string("Failed to open expected output file."); res = wbFalse; } else if (wbImage_getWidth(expectedImage) != wbSolution_getWidth(sol)) { _solution_correctQ = json_string("The image width of the expected image does not match that of the solution."); res = wbFalse; } else if (wbImage_getHeight(expectedImage) != wbSolution_getHeight(sol)) { _solution_correctQ = json_string("The image height of the expected image does not match that of the solution."); res = wbFalse; } else { solutionImage = (wbImage_t) wbSolution_getData(sol); wbAssert(solutionImage != NULL); res = wbImage_sameQ(solutionImage, expectedImage, _onUnsameImageFunction); } if (expectedImage != NULL) { wbImage_delete(expectedImage); } return res; } else if (wbString_sameQ(wbSolution_getType(sol), "vector") || wbString_sameQ(wbSolution_getType(sol), "matrix")) { wbReal_t * expectedData; int expectedRows, expectedColumns; expectedData = (wbReal_t *) wbImport(expectedOutputFile, &expectedRows, &expectedColumns); if (expectedData == NULL) { _solution_correctQ = json_string("Failed to open expected output file."); res = wbFalse; } else if (expectedRows != wbSolution_getRows(sol)) { wbLog(TRACE, "Number of rows in the solution is ", wbSolution_getRows(sol), ". Expected number of rows is ", expectedRows, "."); _solution_correctQ = json_string("The number of rows in the solution did not match that of the expected results."); res = wbFalse; } else if (expectedColumns != wbSolution_getColumns(sol)) { wbLog(TRACE, "Number of columns in the solution is ", wbSolution_getColumns(sol), ". Expected number of columns is ", expectedColumns, "."); _solution_correctQ = json_string("The number of columns in the solution did not match that of the expected results."); res = wbFalse; } else { int ii, jj, idx; wbReal_t * solutionData; solutionData = (wbReal_t *) wbSolution_getData(sol); for (ii = 0; ii < expectedRows; ii++) { for (jj = 0; jj < expectedColumns; jj++) { idx = ii * expectedColumns + jj; if (wbUnequalQ(expectedData[idx], solutionData[idx])) { string str; if (expectedColumns == 1) { str = wbString("The solution did not match the expected results at row ", ii, ". Expecting ", expectedData[idx], " but got ", solutionData[idx], "."); } else { str = wbString("The solution did not match the expected results at column ", jj, " and row ", ii, ". Expecting ", expectedData[idx], " but got ", solutionData[idx], "."); } _solution_correctQ = json_string(str.c_str()); res = wbFalse; goto matrixCleanup; } } } res = wbTrue; } matrixCleanup: if (expectedData != NULL) { wbFree(expectedData); } return res; } else { wbAssert(wbFalse); return wbFalse; } }
int main(int argc, char **argv) { wbArg_t args; int inputLength; float *hostInput1; float *hostInput2; float *hostOutput; float *deviceInput1; float *deviceInput2; float *deviceOutput; cl_context clctx; cl_context_properties properties[3]; cl_program program; cl_device_id device_id; cl_uint num_of_platforms; cl_uint num_of_devices; cl_command_queue command_queue; cl_kernel kernel; cl_int clerr = CL_SUCCESS; args = wbArg_read(argc, argv); wbTime_start(Generic, "Importing data and creating memory on host"); hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength); hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength); hostOutput = (float *)malloc(inputLength * sizeof(float)); wbTime_stop(Generic, "Importing data and creating memory on host"); wbLog(TRACE, "The input length is ", inputLength); for (int i = 0; i < inputLength; i++) { printf("position:%d [%f, %f]\n", i, hostInput1[i], hostInput2[i]); } /*printf("CL_INVALID_PROGRAM: %d\n", CL_INVALID_PROGRAM); printf("CL_INVALID_VALUE: %d\n", CL_INVALID_VALUE); printf("CL_INVALID_CONTEXT: %d \n", CL_INVALID_CONTEXT); */ wbTime_start(GPU, "Allocating GPU memory."); //@@ Allocate GPU memory here //get number of platforms if(clGetPlatformIDs(0, NULL, &num_of_platforms) != CL_SUCCESS) { printf("unable to get number of platforms\n"); return 1; } //now get all the platforms cl_platform_id platform[num_of_platforms]; if (clGetPlatformIDs(num_of_platforms, platform, NULL)!= CL_SUCCESS) { printf("Unable to get platform_id\n"); return 1; } // try to get a supported GPU device /*if (clGetDeviceIDs(&platform, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices) != CL_SUCCESS) { printf("Unable to get device_id\n"); return 1; }*/ properties[0]= (cl_context_properties) CL_CONTEXT_PLATFORM; properties[1]= (cl_context_properties) platform[0]; properties[2]= (cl_context_properties) 0; clctx = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &clerr); if(clerr != CL_SUCCESS) { printf("error creating context.\n"); return 1; } size_t stuff; clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0, NULL, &stuff); if(clerr != CL_SUCCESS) { printf("error getting context info.\n"); return 1; } cl_device_id* cldevs = (cl_device_id *) malloc(stuff); clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, stuff, cldevs, NULL); if(clerr != CL_SUCCESS) { printf("could not get context infor.\n"); return 1; } command_queue = clCreateCommandQueue(clctx, cldevs[0], 0, &clerr); if (clerr != CL_SUCCESS) { printf("could not create command queue\n"); return 1; } program = clCreateProgramWithSource(clctx, 1, &vaddsrc, NULL, &clerr); printf("create program function: %d\n", clerr); //char clcompileflags[4096]; //sprintf(clcompileflags, "-cl-mad-enable"); clerr = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if( clerr != CL_SUCCESS) { printf("unable to build program.%d \n", clerr); return 1; } kernel = clCreateKernel(program, "vadd", &clerr); cl_mem d_A, d_B, d_C; int mem_size = inputLength*sizeof(float); d_A = clCreateBuffer(clctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, hostInput1, &clerr); d_B = clCreateBuffer(clctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, hostInput2, &clerr); d_C = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY, mem_size, NULL, &clerr); //printf("%f", d_A[0]); wbTime_stop(GPU, "Allocating GPU memory."); wbTime_start(GPU, "Copying input memory to the GPU."); //@@ Copy memory to the GPU here //pritnf("%f", &d_A[0]); wbTime_stop(GPU, "Copying input memory to the GPU."); //@@ Initialize the grid and block dimensions here //size_t size = inputLength; const size_t grid = (inputLength - 1)/512+1; const size_t block = 512; wbTime_start(Compute, "Performing CUDA computation"); //@@ Launch the GPU Kernel here kernel = clCreateKernel(program, "vadd", NULL); clerr = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_A); clerr = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_B); clerr = clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_C); clerr = clSetKernelArg(kernel, 3, sizeof(size_t), &inputLength); cl_event event = NULL; clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &grid, &block, 0, NULL, NULL); //clFinish(command_queue); clerr = clWaitForEvents(1, &event); clerr = clReleaseEvent(event); cudaThreadSynchronize(); wbTime_stop(Compute, "Performing CUDA computation"); wbTime_start(Copy, "Copying output memory to the CPU"); //@@ Copy the GPU memory back to the CPU here clEnqueueReadBuffer(command_queue, d_C, CL_TRUE, 0, mem_size, hostOutput, 0, NULL, NULL); for(int i = 0; i < inputLength; i++) { //hostOutput[i] = hostInput2[i] + hostInput1[i]; printf("at pos:%d, [%f]\n", i, hostOutput[i]); } wbSolution(args, hostOutput, inputLength); wbTime_start(GPU, "Freeing GPU Memory"); //@@ Free the GPU memory here clReleaseMemObject(d_A); clReleaseMemObject(d_B); clReleaseMemObject(d_C); wbTime_stop(GPU, "Freeing CPU Memory"); free(hostInput1); free(hostInput2); free(hostOutput); return 0; }
void * wbImport(const char * file, int * rows) { return wbImport(file, rows, NULL); }
void * wbImport(const char * file, int * rows, int * columns) { return wbImport(file, rows, columns, "Real"); }
static wbBool wbSolution_listCorrectQ(const char *expectedOutputFile, wbSolution_t sol, const char *type) { wbBool res; T *expectedData; int expectedRows, expectedColumns; expectedData = (T *)wbImport(expectedOutputFile, &expectedRows, &expectedColumns, type); if (expectedData == NULL) { _solution_correctQ = "Failed to open expected output file."; res = wbFalse; } else if (expectedRows != wbSolution_getRows(sol)) { wbLog(TRACE, "Number of rows in the solution is ", wbSolution_getRows(sol), ". Expected number of rows is ", expectedRows, "."); _solution_correctQ = "The number of rows in the solution did not match " "that of the expected results."; res = wbFalse; } else if (expectedColumns != wbSolution_getColumns(sol)) { wbLog(TRACE, "Number of columns in the solution is ", wbSolution_getColumns(sol), ". Expected number of columns is ", expectedColumns, "."); _solution_correctQ = "The number of columns in the solution did not " "match that of the expected results."; res = wbFalse; } else { int ii, jj, idx; T *solutionData; solutionData = (T *)wbSolution_getData(sol); for (ii = 0; ii < expectedRows; ii++) { for (jj = 0; jj < expectedColumns; jj++) { idx = ii * expectedColumns + jj; if (wbUnequalQ(expectedData[idx], solutionData[idx])) { string str; if (expectedColumns == 1) { str = wbString( "The solution did not match the expected results at row ", ii, ". Expecting ", expectedData[idx], " but got ", solutionData[idx], "."); } else { str = wbString("The solution did not match the expected " "results at column ", jj, " and row ", ii, ". Expecting ", expectedData[idx], " but got ", solutionData[idx], "."); } _solution_correctQ = str; res = wbFalse; goto matrixCleanup; } } } res = wbTrue; matrixCleanup: if (expectedData != NULL) { wbFree(expectedData); } } return res; }