void PA::close() { if (is_open()) { Pa_StopStream(stream_); Pa_CloseStream(stream_); if (stream_type_ == PA_STREAM_TYPE_INPUT) { printf("PA::close() : close input device...in:%d:%s\n", input_device_idx_, input_device_info().name().c_str()); } else if (stream_type_ == PA_STREAM_TYPE_OUTPUT) { printf("PA::close() : close output device...out:%d:%s\n", output_device_idx_, output_device_info().name().c_str()); } else if (stream_type_ == PA_STREAM_TYPE_WIRE) { printf("PA::close() : close wire device...in:%d:%s, out:%d:%s\n", input_device_idx_, input_device_info().name().c_str(), output_device_idx_, output_device_info().name().c_str()); } stream_ = NULL; stream_type_ = PA_STREAM_TYPE_NONE; input_device_idx_ = -1; output_device_idx_ = -1; channels_ = 0; } }
bool PA::open_output(const int &dev_idx, const int &channels, const int &sampling_rate, const int &buf_size) { PaError err; PaStreamParameters output_param; if (is_open()) return false; if (dev_idx < 0 || get_device_count() <= dev_idx) { printf("error : PA::open_output() : out of index...dev_idx=%d", dev_idx); return false; } output_param.device = dev_idx; output_param.channelCount = channels; output_param.sampleFormat = paFloat32; output_param.suggestedLatency = Pa_GetDeviceInfo(output_param.device)->defaultLowOutputLatency; output_param.hostApiSpecificStreamInfo = NULL; err = Pa_OpenStream( &stream_, NULL, &output_param, sampling_rate, buf_size, // frames_per_buffer paClipOff, play_callback_, this); if (err != paNoError) { printf("error : PA::open_output() : Pa_OpenStream() failed...dev_idx=%d", dev_idx); close(); return false; } err = Pa_StartStream(stream_); if (err != paNoError) { printf("error : PA::open_output() : Pa_StartStream() failed...dev_idx=%d", dev_idx); close(); return false; } stream_type_ = PA_STREAM_TYPE_OUTPUT; output_device_idx_ = dev_idx; channels_ = channels; PADeviceInfo info = output_device_info(); printf("PA::open_output() : open device...dev_idx=%d, name=%s\n", output_device_idx_, info.name().c_str()); return true; }
int main(int argc, char** argv) { // Error code returned from openCL calls int err; // A, B and C arrays float *hA = (float *)calloc(LENGTH, sizeof(float)); float *hB = (float *)calloc(LENGTH, sizeof(float)); float *hC = (float *)calloc(LENGTH, sizeof(float)); // Define the scene const unsigned int kScreenWidth = 800; const unsigned int kScreenHeight = 600; float zoomFactor = -4.f; float aliasFactor = 3.f; size_t globalWorkSize = kScreenWidth * kScreenHeight; size_t localWorkSize = 256; // Colours Vec whiteCol; vinit(whiteCol, 8.f, 8.f, 8.f); Vec lowerWhite; vinit(lowerWhite, 0.5f, 0.5f, 0.5f); Vec redCol; vinit(redCol, 0.8f, 1.f, 0.7f); Vec greenCol; vinit(greenCol, 0.4f, 0.5f, 0.7f); Vec col1; vinit(col1, 0.01f, 0.8f, 0.01f); // Setup materials struct Material ballMaterial1; // White Vec bm1Gloss; vassign(bm1Gloss, redCol); Vec bm1Matte; vassign(bm1Matte, greenCol); setMatOpacity(&ballMaterial1, 0.8f); setMatteGlossBalance(&ballMaterial1, 0.2f, &bm1Matte, &bm1Gloss); setMatRefractivityIndex(&ballMaterial1, 1.5500f); struct Material ballMaterial2; // Red Vec bm2Gloss; vassign(bm2Gloss, redCol); Vec bm2Matte; vassign(bm2Matte, greenCol); setMatOpacity(&ballMaterial2, 0.3f); setMatteGlossBalance(&ballMaterial2, 0.95f, &bm2Matte, &bm2Gloss); setMatRefractivityIndex(&ballMaterial2, 1.5500f); struct Material ballMaterial3; // Red Vec bm3Gloss; vassign(bm3Gloss, col1); Vec bm3Matte; vassign(bm3Matte, col1); setMatOpacity(&ballMaterial3, 0.6f); setMatteGlossBalance(&ballMaterial3, 0.0, &bm3Matte, &bm3Gloss); setMatRefractivityIndex(&ballMaterial3, 1.5500f); // Setup spheres unsigned int sphNum = 3; struct Sphere *hSpheres = (struct Sphere *)calloc(sphNum, sizeof(struct Sphere)); hSpheres[0].material = ballMaterial1; vinit(hSpheres[0].pos, -9.f, 0.f, -13.f); hSpheres[0].radius = 5.f; hSpheres[1].material = ballMaterial2; vinit(hSpheres[1].pos, -4.f, 1.5f, -5.f); hSpheres[1].radius = 2.f; hSpheres[2].material = ballMaterial3; vinit(hSpheres[2].pos, 1.f, -1.f, -7.f); hSpheres[2].radius = 3.f; // Setup light sources unsigned int lgtNum = 2; struct Light *hLights = (struct Light *)calloc(lgtNum, sizeof(struct Light)); vinit(hLights[0].pos, -45.f, 10.f, 85.f); vassign(hLights[0].col, lowerWhite); vinit(hLights[1].pos, 20.f, 60.f, -5.f); vassign(hLights[1].col, lowerWhite); // Fill vectors a and b with random float values int count = LENGTH; for (int i = 0; i < count; i++){ hA[i] = rand() / (float)RAND_MAX; hB[i] = rand() / (float)RAND_MAX; } cl_uint numPlatforms; // Find number of platforms err = clGetPlatformIDs(0, NULL, &numPlatforms); checkError(err, "Finding platforms"); if (numPlatforms == 0) { printf("Found 0 platforms!\n"); return EXIT_FAILURE; } // Get all platforms cl_platform_id *platform = (cl_platform_id *)malloc(sizeof(cl_platform_id)* numPlatforms); err = clGetPlatformIDs(numPlatforms, platform, NULL); checkError(err, "Getting platforms"); // Define an ID for the device cl_device_id deviceId = 0; // Secure a GPU for (int i = 0; i < numPlatforms; i++) { err = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, 1, &deviceId, NULL); if (err == CL_SUCCESS) { break; } } // Once a device has been obtained, print out its info err = output_device_info(deviceId); checkError(err, "Printing device output"); // Create a context for the GPU cl_context gpuContext; gpuContext = clCreateContext(NULL, 1, &deviceId, NULL, NULL, &err); checkError(err, "Creating context"); // Create a command queue cl_command_queue commandsGPU; commandsGPU = clCreateCommandQueue(gpuContext, deviceId, NULL, &err); checkError(err, "Creating command queue"); // Load the kernel code std::ifstream sourceFstream("raytrace_kernel.cl"); std::string source((std::istreambuf_iterator<char>(sourceFstream)), std::istreambuf_iterator<char>()); // Create a program from the source const char* str = source.c_str(); cl_program program; program = clCreateProgramWithSource(gpuContext, 1, &str, NULL, &err); checkError(err, "Creating program"); // Compile the program err = clBuildProgram(program, 0, NULL, "-I C:\Drive\Alberto\Projects\Code\C++\raytracer_gamma\raytracer_gamma", NULL, NULL); // If there were compilation errors if (err != CL_SUCCESS) { // Print out compilation log size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n%s\n", err_code(err)); clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); // Exit return EXIT_FAILURE; } // Create the kernel cl_kernel koRTG; koRTG = clCreateKernel(program, "raytrace", &err); checkError(err, "Creating kernel"); // Create the list of spheres and lights in device memory cl_mem dSpheres = clCreateBuffer(gpuContext, CL_MEM_READ_WRITE, sizeof(struct Sphere) * sphNum, NULL, &err); checkError(err, "Creating buffer for spheres"); cl_mem dLights = clCreateBuffer(gpuContext, CL_MEM_READ_WRITE, sizeof(struct Light) * lgtNum, NULL, &err); checkError(err, "Creating buffer for lights"); cl_mem dPixelBuffer = clCreateBuffer(gpuContext, CL_MEM_WRITE_ONLY, kScreenWidth * kScreenHeight * sizeof(Vec), NULL, &err); checkError(err, "Creating buffer for pixels"); // Write data from host into device memory (fill the buffers with // the host arrays) err = clEnqueueWriteBuffer(commandsGPU, dSpheres, CL_TRUE, 0, sizeof(struct Sphere) * sphNum, hSpheres, 0, NULL, NULL); checkError(err, "Copying hSperes in dSpheres"); err = clEnqueueWriteBuffer(commandsGPU, dLights, CL_TRUE, 0, sizeof(struct Light) * lgtNum, hLights, 0, NULL, NULL); checkError(err, "Copying hLights into dLights"); cl_int status; cl_uint maxDims; cl_event events[2]; size_t maxWorkGroupSize; /** * Query device capabilities. Maximum * work item dimensions and the maximmum * work item sizes */ status = clGetDeviceInfo( deviceId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void*)&maxWorkGroupSize, NULL); if (status != CL_SUCCESS) { fprintf(stderr, "Error: Getting Device Info. (clGetDeviceInfo)\n"); return 1; } status = clGetDeviceInfo( deviceId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void*)&maxDims, NULL); if (status != CL_SUCCESS) { fprintf(stderr, "Error: Getting Device Info. (clGetDeviceInfo)\n"); return 1; } localWorkSize = maxWorkGroupSize; if (globalWorkSize % localWorkSize != 0) { globalWorkSize = (globalWorkSize / localWorkSize + 1) * localWorkSize; } // Set kernel arguments err = clSetKernelArg(koRTG, 0, sizeof(cl_mem), &dSpheres); err |= clSetKernelArg(koRTG, 1, sizeof(unsigned int), &sphNum); err |= clSetKernelArg(koRTG, 2, sizeof(cl_mem), &dLights); err |= clSetKernelArg(koRTG, 3, sizeof(unsigned int), &lgtNum); err |= clSetKernelArg(koRTG, 4, sizeof(unsigned int), &kScreenWidth); err |= clSetKernelArg(koRTG, 5, sizeof(unsigned int), &kScreenHeight); err |= clSetKernelArg(koRTG, 6, sizeof(float), &zoomFactor); err |= clSetKernelArg(koRTG, 7, sizeof(float), &aliasFactor); err |= clSetKernelArg(koRTG, 8, sizeof(cl_mem), &dPixelBuffer); err |= clSetKernelArg(koRTG, 9, sizeof(struct Sphere) * sphNum, NULL); err |= clSetKernelArg(koRTG, 10, sizeof(struct Light) * lgtNum, NULL); checkError(err, "Setting kernel arguments"); // Start counting the time between kernel enqueuing and completion std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); // Execute the kernel over the entire range of our 1d input data set // letting the OpenCL runtime choose the work-group size err = clEnqueueNDRangeKernel(commandsGPU, koRTG, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); checkError(err, "Enqueueing kernel"); // Wait for the commands in the queue to be executed err = clFinish(commandsGPU); checkError(err, "Waiting for commands to finish"); // Read the time after the kernel has executed std::chrono::steady_clock::time_point endTime = std::chrono::steady_clock::now(); // Compute the duration double kernelExecTime = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count(); // Print the duration printf("Exec time: %.5f ms", kernelExecTime); // Image //float *imagePtr = (float *)malloc(globalWorkSize * sizeof(Vec)); //// Screen in world coordinates //const float kImageWorldWidth = 16.f; //const float kImageWorldHeight = 12.f; //// Amount to increase each step for the ray direction //const float kRayXStep = kImageWorldWidth / ((float)kScreenWidth); //const float kRayYStep = kImageWorldHeight / ((float)kScreenHeight); //const float aspectRatio = kImageWorldWidth / kImageWorldHeight; //// Variables holding the current step in world coordinates //float rayX = 0.f, rayY = 0.f; //int pixelsCounter = 0; //// Calculate size of an alias step in world coordinates //const float kAliasFactorStepInv = kRayXStep / aliasFactor; //// Calculate total size of samples to be taken //const float kSamplesTot = aliasFactor * aliasFactor; //// Also its inverse //const float kSamplesTotinv = 1.f / kSamplesTot; //for (int y = 0; y < kScreenWidth * kScreenHeight; ++y, pixelsCounter += 3) { // // Retrieve the global ID of the kernel // const unsigned gid = y; // // // Calculate world position of pixel being currently worked on // const float kPxWorldX = ((((float)(gid % kScreenWidth) - // (kScreenWidth * 0.5f))) * kRayXStep); // const float kPxWorldY = ((kScreenHeight *0.5f) - ((float)(gid / kScreenWidth))) * kRayYStep; // // The ray to be shot. The vantage point (camera) is at the origin, // // and its intensity is maximum // struct Ray ray; vinit(ray.origin, 0.f, 0.f, 0.f); vinit(ray.intensity, 1.f, 1.f, 1.f); // // The colour of the pixel to be computed // Vec pixelCol = { 0.f, 0.f, 0.f }; // // Mock background material // struct Material bgMaterial; // Vec black; vinit(black, 0.f, 0.f, 0.f); // setMatteGlossBalance(&bgMaterial, 0.f, &black, &black); // setMatRefractivityIndex(&bgMaterial, 1.00f); // // For each sample to be taken // for (int i = 0; i < aliasFactor; ++i) { // for (int j = 0; j < aliasFactor; ++j) { // // Calculate the direction of the ray // float x = (kPxWorldX + (float)(((float)j) * kAliasFactorStepInv)) * aspectRatio; // float y = (kPxWorldY + (float)(((float)i) * kAliasFactorStepInv)); // // Set the ray's dir and normalise it // vinit(ray.dir, x, y, zoomFactor); vnorm(ray.dir); // // Raytrace for the current sample // Vec currentSampleCol = rayTrace(hSpheres, sphNum, hLights, lgtNum, // ray, bgMaterial, 0); // vsmul(currentSampleCol, kSamplesTotinv, currentSampleCol); // // Compute the average // vadd(pixelCol, pixelCol, currentSampleCol); // } // } // // Write result in destination buffer // *(imagePtr + pixelsCounter) = pixelCol.x; // *(imagePtr + pixelsCounter + 1) = pixelCol.y; // *(imagePtr + pixelsCounter + 2) = pixelCol.z; //} // Create a buffer to hold the result of the computation on the device Vec *pixelsIntermediate = (Vec *)calloc(kScreenHeight * kScreenWidth, sizeof(Vec)); //Vec *pixelsIntermediate = (Vec *)(imagePtr); // Read the results back from the device into the host err = clEnqueueReadBuffer(commandsGPU, dPixelBuffer, CL_TRUE, 0, kScreenWidth * kScreenHeight * sizeof(Vec), pixelsIntermediate, 0, NULL, NULL); // If the reading operation didn't complete successfully if (err != CL_SUCCESS) { printf("Error: Failed to read output buffer!\n%s\n", err_code(err)); // Exit exit(1); } // Calculate the maximum colour value across the whole picture float maxColourValue = maxColourValuePixelBuffer(pixelsIntermediate, kScreenWidth * kScreenHeight); // Cast the buffer to the type accepted by the savePPM function RGB *pixels = (RGB *)(pixelsIntermediate); // Print execution time /*rtime = wtime() - rtime; printf("\nThe kernel ran in %lf seconds\n", rtime);*/ // Cleanup clReleaseMemObject(dPixelBuffer); clReleaseMemObject(dLights); clReleaseMemObject(dSpheres); clReleaseProgram(program); clReleaseKernel(koRTG); clReleaseCommandQueue(commandsGPU); clReleaseContext(gpuContext); // ... Also on host free(hLights); free(hSpheres); free(hA); free(hB); free(hC); free(platform); // Try to save a PPM picture savePPM(pixels, "testPPM.ppm", kScreenWidth, kScreenHeight, maxColourValue); free(pixelsIntermediate); //free(imagePtr); getchar(); return 0; }
int main(int argc, char** argv) { cl_int err; // error code returned from OpenCL calls size_t dataSize = sizeof(float) * LENGTH; float* h_a = (float *)malloc(dataSize); // a vector float* h_b = (float *)malloc(dataSize); // b vector float* h_c = (float *)malloc(dataSize); // c vector (result) float* h_d = (float *)malloc(dataSize); // d vector (result) float* h_e = (float *)malloc(dataSize); // e vector float* h_f = (float *)malloc(dataSize); // f vector (result) float* h_g = (float *)malloc(dataSize); // g vector unsigned int correct; // number of correct results size_t global; // global domain size cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel ko_vadd; // compute kernel cl_mem d_a; // device memory used for the input a vector cl_mem d_b; // device memory used for the input b vector cl_mem d_c; // device memory used for the output c vector cl_mem d_d; // device memory used for the output d vector cl_mem d_e; // device memory used for the input e vector cl_mem d_f; // device memory used for the output f vector cl_mem d_g; // device memory used for the input g vector // Fill vectors a and b with random float values int i = 0; for(i = 0; i < LENGTH; i++){ h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_e[i] = rand() / (float)RAND_MAX; h_g[i] = rand() / (float)RAND_MAX; } // Set up platform and GPU device cl_uint numPlatforms; // Find number of platforms err = clGetPlatformIDs(0, NULL, &numPlatforms); checkError(err, "Finding platforms"); if (numPlatforms == 0) { printf("Found 0 platforms!\n"); return EXIT_FAILURE; } // Get all platforms cl_platform_id Platform[numPlatforms]; err = clGetPlatformIDs(numPlatforms, Platform, NULL); checkError(err, "Getting platforms"); // Secure a GPU for (i = 0; i < numPlatforms; i++) { err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL); if (err == CL_SUCCESS) { break; } } if (device_id == NULL) checkError(err, "Getting device"); err = output_device_info(device_id); checkError(err, "Outputting device info"); // Create a compute context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); checkError(err, "Creating context"); // Create a command queue commands = clCreateCommandQueue(context, device_id, 0, &err); checkError(err, "Creating command queue"); // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); checkError(err, "Creating program"); // Build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n%s\n", err_code(err)); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); return EXIT_FAILURE; } // Create the compute kernel from the program ko_vadd = clCreateKernel(program, "vadd", &err); checkError(err, "Creating kernel"); // Create the input (a, b, e, g) arrays in device memory // NB: we copy the host pointers here too d_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, h_a, &err); checkError(err, "Creating buffer d_a"); d_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, h_b, &err); checkError(err, "Creating buffer d_b"); d_e = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, h_e, &err); checkError(err, "Creating buffer d_e"); d_g = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, h_g, &err); checkError(err, "Creating buffer d_g"); // Create the output arrays in device memory d_c = clCreateBuffer(context, CL_MEM_READ_WRITE, dataSize, NULL, &err); checkError(err, "Creating buffer d_c"); d_d = clCreateBuffer(context, CL_MEM_READ_WRITE, dataSize, NULL, &err); checkError(err, "Creating buffer d_d"); d_f = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err); checkError(err, "Creating buffer d_f"); const int count = LENGTH; // Enqueue kernel - first time // Set the arguments to our compute kernel err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_c); err |= clSetKernelArg(ko_vadd, 3, sizeof(unsigned int), &count); checkError(err, "Setting kernel arguments"); // Execute the kernel over the entire range of our 1d input data set // letting the OpenCL runtime choose the work-group size global = count; err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "Enqueueing kernel 1st time"); // Enqueue kernel - second time // Set different arguments to our compute kernel err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_e); err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_c); err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_d); checkError(err, "Setting kernel arguments"); // Enqueue the kernel again err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "Enqueueing kernel 2nd time"); // Enqueue kernel - third time // Set different (again) arguments to our compute kernel err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_g); err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_d); err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_f); checkError(err, "Setting kernel arguments"); // Enqueue the kernel again err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "Enqueueing kernel 3rd time"); // Read back the result from the compute device err = clEnqueueReadBuffer( commands, d_f, CL_TRUE, 0, sizeof(float) * count, h_f, 0, NULL, NULL ); checkError(err, "Reading back d_f"); // Test the results correct = 0; float tmp; for(i = 0; i < count; i++) { tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i]; // assign element i of a+b+e+g to tmp tmp -= h_f[i]; // compute deviation of expected and output result if(tmp*tmp < TOL*TOL) // correct if square deviation is less than tolerance squared correct++; else { printf(" tmp %f h_a %f h_b %f h_e %f h_g %f h_f %f\n",tmp, h_a[i], h_b[i], h_e[i], h_g[i], h_f[i]); } } // summarize results printf("C = A+B+E+G: %d out of %d results were correct.\n", correct, count); // cleanup then shutdown clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseMemObject(d_d); clReleaseMemObject(d_e); clReleaseMemObject(d_f); clReleaseMemObject(d_g); clReleaseProgram(program); clReleaseKernel(ko_vadd); clReleaseCommandQueue(commands); clReleaseContext(context); free(h_a); free(h_b); free(h_c); free(h_d); free(h_e); free(h_f); free(h_g); return 0; }
/* main program */ int main(int argc, char* argv[]) { cl_float a_data[LENGTH]; cl_float b_data[LENGTH]; cl_float c_res [LENGTH]; cl_int rval; size_t domain_size[1]; /* global domain size */ size_t workgroup_size[1]; cl_device_id device_id; cl_context context; cl_command_queue commands; cl_program program_obj; cl_kernel kernel; cl_mem a_in; /* device memory for input vector a */ cl_mem b_in; /* device memory for input vector b */ cl_mem c_out; /* device memory for output vector c */ cl_uint count = LENGTH; double start_time, end_time; /* fill input vectors with random values */ for(int i = 0; i < count; ++i){ a_data[i] = rand() / (cl_float)RAND_MAX; b_data[i] = rand() / (cl_float)RAND_MAX; } start_time = get_time(); cl_platform_id platform; rval = clGetPlatformIDs(1, &platform, NULL); if (rval != CL_SUCCESS) error_exit("Could not get platform", rval); rval = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); if (rval != CL_SUCCESS) error_exit("Could not get device ID", rval); printf("Compute device:\n"); output_device_info(stdout, device_id, CL_FALSE); /* create compute context */ context = clCreateContext(0, 1, &device_id, NULL, NULL, &rval); if ((rval != CL_SUCCESS) || (context == NULL)) error_exit("Could not create compute context", rval); /* create command queue */ commands = clCreateCommandQueue(context, device_id, 0, &rval); if ((rval != CL_SUCCESS) || (commands == NULL)) error_exit("Could not create command queue", rval); /* create compute program object from source buffer */ program_obj = clCreateProgramWithSource(context, 1, (const char **) &kernel_source, NULL, &rval); if ((rval != CL_SUCCESS) || (program_obj == NULL)) error_exit("Could not create program object from source", rval); /* build program object */ rval = clBuildProgram(program_obj, 0, NULL, NULL, NULL, NULL); if (rval != CL_SUCCESS) { size_t len; char buffer[2048]; describe_error("Could not build executable", rval, stderr); clGetProgramBuildInfo(program_obj, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); fprintf(stderr, "%s\n", buffer); return EXIT_FAILURE; } /* create compute kernel from program object */ kernel = clCreateKernel(program_obj, "vadd", &rval); if ((rval != CL_SUCCESS) || (kernel == NULL)) error_exit("Could not create compute kernel", rval); end_time = get_time(); printf("\nInitialization time %f seconds\n", end_time - start_time); start_time = get_time(); /* create input (a, b) and output (c) arrays in device memory */ a_in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * count, NULL, NULL); b_in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * count, NULL, NULL); c_out= clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * count, NULL, NULL); if ((a_in == NULL) || (b_in == NULL) || (c_out== NULL)) error_exit("Could not allocate device memory", rval); /* write a and b vectors into compute device memory */ rval = clEnqueueWriteBuffer(commands, a_in, CL_TRUE, 0, sizeof(cl_float) * count, a_data, 0, NULL, NULL); if (rval != CL_SUCCESS) error_exit("Could not copy a_data to device memory", rval); rval = clEnqueueWriteBuffer(commands, b_in, CL_TRUE, 0, sizeof(cl_float) * count, b_data, 0, NULL, NULL); if (rval != CL_SUCCESS) error_exit("Could not copy b_data to device memory", rval); end_time = get_time(); printf("Time to copy data %f seconds\n", end_time - start_time); /* set arguments to compute kernel */ rval = 0; rval = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_in); rval |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_in); rval |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_out); rval |= clSetKernelArg(kernel, 3, sizeof(count), &count); if (rval != CL_SUCCESS) error_exit("Could not set kernel arguments", rval); /* get maximum work group size for executing kernel on device */ rval = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroup_size[0]), workgroup_size, NULL); if (rval != CL_SUCCESS) error_exit("Could not retrieve kernel work group info", rval); start_time = get_time(); /* execute kernel over entire range of 1d input data set using maximum number of work group items for device */ domain_size[0] = count; rval = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, domain_size, workgroup_size, 0, NULL, NULL); if (rval != CL_SUCCESS) error_exit("Failed to execute kernel", rval); /* wait for commands to complete before reading back results */ clFinish(commands); end_time = get_time(); printf("TIme for kernel computation %f seconds\n", end_time - start_time); /* read back results from compute device */ rval = clEnqueueReadBuffer(commands, c_out, CL_TRUE, 0, sizeof(cl_float) * count, c_res, 0, NULL, NULL); if (rval != CL_SUCCESS) error_exit("Could not read output array", rval); /* check results */ int correct = 0; cl_float diff; for(cl_uint i = 0; i < count; ++i) { diff = a_data[i] + b_data[i]; diff -= c_res[i]; if(diff*diff < TOL*TOL) ++correct; else { printf("difference %f in element %d: a %f b %f c %f\n", diff, i, a_data[i], b_data[i], c_res[i]); } } /* summarize results */ printf("\nC = A+B: %d out of %d results were correct\n", correct, count); /* clean up */ clReleaseMemObject(a_in); clReleaseMemObject(b_in); clReleaseMemObject(c_out); clReleaseProgram(program_obj); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); return EXIT_SUCCESS; }
int main(void) { float *h_psum; // vector to hold partial sum int in_nsteps = INSTEPS; // default number of steps (updated later to device preferable) int niters = ITERS; // number of iterations int nsteps; float step_size; size_t nwork_groups; size_t max_size, work_group_size = 8; float pi_res; cl_mem d_partial_sums; char *kernelsource = getKernelSource("../pi_ocl.cl"); // Kernel source cl_int err; cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel kernel_pi; // compute kernel // Set up OpenCL context. queue, kernel, etc. cl_uint numPlatforms; // Find number of platforms err = clGetPlatformIDs(0, NULL, &numPlatforms); if (err != CL_SUCCESS || numPlatforms <= 0) { printf("Error: Failed to find a platform!\n%s\n",err_code(err)); return EXIT_FAILURE; } // Get all platforms cl_platform_id Platform[numPlatforms]; err = clGetPlatformIDs(numPlatforms, Platform, NULL); if (err != CL_SUCCESS || numPlatforms <= 0) { printf("Error: Failed to get the platform!\n%s\n",err_code(err)); return EXIT_FAILURE; } // Secure a device for (int i = 0; i < numPlatforms; i++) { err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL); if (err == CL_SUCCESS) break; } if (device_id == NULL) { printf("Error: Failed to create a device group!\n%s\n",err_code(err)); return EXIT_FAILURE; } // Output information err = output_device_info(device_id); // Create a compute context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("Error: Failed to create a compute context!\n%s\n", err_code(err)); return EXIT_FAILURE; } // Create a command queue commands = clCreateCommandQueue(context, device_id, 0, &err); if (!commands) { printf("Error: Failed to create a command commands!\n%s\n", err_code(err)); return EXIT_FAILURE; } // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) & kernelsource, NULL, &err); if (!program) { printf("Error: Failed to create compute program!\n%s\n", err_code(err)); return EXIT_FAILURE; } // Build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n%s\n", err_code(err)); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); return EXIT_FAILURE; } // Create the compute kernel from the program kernel_pi = clCreateKernel(program, "pi", &err); if (!kernel_pi || err != CL_SUCCESS) { printf("Error: Failed to create compute kernel!\n%s\n", err_code(err)); return EXIT_FAILURE; } // Find kernel work-group size err = clGetKernelWorkGroupInfo (kernel_pi, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &work_group_size, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to get kernel work-group info\n%s\n", err_code(err)); return EXIT_FAILURE; } // Now that we know the size of the work-groups, we can set the number of // work-groups, the actual number of steps, and the step size nwork_groups = in_nsteps/(work_group_size*niters); if (nwork_groups < 1) { err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &nwork_groups, NULL); work_group_size = in_nsteps / (nwork_groups * niters); } nsteps = work_group_size * niters * nwork_groups; step_size = 1.0f/(float)nsteps; h_psum = calloc(sizeof(float), nwork_groups); if (!h_psum) { printf("Error: could not allocate host memory for h_psum\n"); return EXIT_FAILURE; } printf(" %ld work-groups of size %ld. %d Integration steps\n", nwork_groups, work_group_size, nsteps); d_partial_sums = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups, NULL, &err); if (err != CL_SUCCESS) { printf("Error: Failed to create buffer\n%s\n", err_code(err)); return EXIT_FAILURE; } // Set kernel arguments err = clSetKernelArg(kernel_pi, 0, sizeof(int), &niters); err |= clSetKernelArg(kernel_pi, 1, sizeof(float), &step_size); err |= clSetKernelArg(kernel_pi, 2, sizeof(float) * work_group_size, NULL); err |= clSetKernelArg(kernel_pi, 3, sizeof(cl_mem), &d_partial_sums); if (err != CL_SUCCESS) { printf("Error: Failed to set kernel arguments!\n"); return EXIT_FAILURE; } // Execute the kernel over the entire range of our 1D input data set // using the maximum number of work items for this device size_t global = nwork_groups * work_group_size; size_t local = work_group_size; double rtime = wtime(); err = clEnqueueNDRangeKernel( commands, kernel_pi, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to execute kernel\n%s\n", err_code(err)); return EXIT_FAILURE; } err = clEnqueueReadBuffer( commands, d_partial_sums, CL_TRUE, 0, sizeof(float) * nwork_groups, h_psum, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read buffer\n%s\n", err_code(err)); return EXIT_FAILURE; } // complete the sum and compute the final integral value on the host pi_res = 0.0f; for (unsigned int i = 0; i < nwork_groups; i++) { pi_res += h_psum[i]; } pi_res *= step_size; rtime = wtime() - rtime; printf("\nThe calculation ran in %lf seconds\n", rtime); printf(" pi = %f for %d steps\n", pi_res, nsteps); // clean up clReleaseMemObject(d_partial_sums); clReleaseProgram(program); clReleaseKernel(kernel_pi); clReleaseCommandQueue(commands); clReleaseContext(context); free(kernelsource); free(h_psum); }
int main(int argc, char** argv) { int rank, size; // MPI rank & size int err; // error code returned from OpenCL calls float h_a[LENGTH]; // a vector float h_b[LENGTH]; // b vector float h_c[LENGTH]; // c vector (a+b) returned from the compute device (local per task) float _h_c[LENGTH]; // c vector (a+b) returned from the compute device (global for master) unsigned int correct; // number of correct results size_t global; // global domain size size_t local; // local domain size cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel ko_vadd; // compute kernel cl_mem d_a; // device memory used for the input a vector cl_mem d_b; // device memory used for the input b vector cl_mem d_c; // device memory used for the output c vector int mycount, i; err = MPI_Init (&argc, &argv); if (err != MPI_SUCCESS) { printf ("MPI_Init failed!\n"); exit (-1); } err = MPI_Comm_rank (MPI_COMM_WORLD, &rank); if (err != MPI_SUCCESS) { printf ("MPI_Comm_rank failed!\n"); exit (-1); } err = MPI_Comm_size (MPI_COMM_WORLD, &size); if (err != MPI_SUCCESS) { printf ("MPI_Comm_size failed\n"); exit (-1); } if (LENGTH % size != 0) { printf ("Number of MPI processes must divide LENGTH (%d)\n", LENGTH); exit (-1); } mycount = LENGTH / size; if (rank == 0) { for (i = 0; i < LENGTH; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_a[i] = i; h_b[i] = i*2; } err = MPI_Bcast (h_a, LENGTH, MPI_FLOAT, 0, MPI_COMM_WORLD); if (err != MPI_SUCCESS) { printf ("MPI_Bcast failed transferring h_a\n"); exit (-1); } err = MPI_Bcast (h_b, LENGTH, MPI_FLOAT, 0, MPI_COMM_WORLD); if (err != MPI_SUCCESS) { printf ("MPI_Bcast failed transferring h_b\n"); exit (-1); } } else { err = MPI_Bcast (h_a, LENGTH, MPI_FLOAT, 0, MPI_COMM_WORLD); if (err != MPI_SUCCESS) { printf ("MPI_Bcast failed receiving h_a\n"); exit (-1); } err = MPI_Bcast (h_b, LENGTH, MPI_FLOAT, 0, MPI_COMM_WORLD); if (err != MPI_SUCCESS) { printf ("MPI_Bcast failed receiving h_b\n"); exit (-1); } } // Set up platform cl_uint numPlatforms; // Find number of platforms err = clGetPlatformIDs(0, NULL, &numPlatforms); if (err != CL_SUCCESS || numPlatforms <= 0) { printf("Error: Failed to find a platform!\n"); return EXIT_FAILURE; } // Get all platforms cl_platform_id Platform[numPlatforms]; err = clGetPlatformIDs(numPlatforms, Platform, NULL); if (err != CL_SUCCESS || numPlatforms <= 0) { printf("Error: Failed to get the platform!\n"); return EXIT_FAILURE; } // Secure a GPU for (i = 0; i < numPlatforms; i++) { err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL); if (err == CL_SUCCESS) break; } if (device_id == NULL) { printf("Error: Failed to create a device group!\n"); return EXIT_FAILURE; } else { if (output_device_info (rank, device_id) != CL_SUCCESS) return EXIT_FAILURE; } // Create a compute context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context) { printf("Error: Failed to create a compute context!\n"); return EXIT_FAILURE; } // Create a command queue commands = clCreateCommandQueue(context, device_id, 0, &err); if (!commands) { printf("Error: Failed to create a command commands!\n"); return EXIT_FAILURE; } // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); if (!program) { printf("Error: Failed to create compute program!\n"); return EXIT_FAILURE; } // Build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); exit(1); } // Create the compute kernel from the program ko_vadd = clCreateKernel(program, "vadd", &err); if (!ko_vadd || err != CL_SUCCESS) { printf("Error: Failed to create compute kernel!\n"); exit(1); } // Create the input (a, b) and output (c) arrays in device memory d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * mycount, NULL, NULL); d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * mycount, NULL, NULL); d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * mycount, NULL, NULL); if (!d_a || !d_b || !d_c) { printf("Error: Failed to allocate device memory!\n"); exit(1); } // Write a and b vectors into compute device memory err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0, sizeof(float) * mycount, &h_a[rank*mycount], 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write h_a to source array!\n"); exit(1); } err = clEnqueueWriteBuffer(commands, d_b, CL_TRUE, 0, sizeof(float) * mycount, &h_b[rank*mycount], 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to write h_b to source array!\n"); exit(1); } // Set the arguments to our compute kernel err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_c); err |= clSetKernelArg(ko_vadd, 3, sizeof(unsigned int), &mycount); if (err != CL_SUCCESS) { printf("Error: Failed to set kernel arguments! %d\n", err); exit(1); } // Get the maximum work group size for executing the kernel on the device err = clGetKernelWorkGroupInfo(ko_vadd, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to retrieve kernel work group info! %d\n", err); exit(1); } // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device global = LENGTH; err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, &local, 0, NULL, NULL); if (err) { printf("Error: Failed to execute kernel!\n"); return EXIT_FAILURE; } // Wait for the commands to complete before reading back results clFinish(commands); // Read back the results from the compute device err = clEnqueueReadBuffer( commands, d_c, CL_TRUE, 0, sizeof(float) * mycount, &h_c, 0, NULL, NULL ); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); exit(1); } err = MPI_Gather (h_c, mycount, MPI_FLOAT, _h_c, mycount, MPI_FLOAT, 0, MPI_COMM_WORLD); if (err != MPI_SUCCESS) { printf ("MPI_Gather failed receiving h_c\n"); exit (-1); } if (rank == 0) { // Test the results correct = 0; float tmp; for(i = 0; i < LENGTH; i++) { tmp = h_a[i] + h_b[i]; // assign element i of a+b to tmp tmp -= _h_c[i]; // compute deviation of expected and output result if(tmp*tmp < TOL*TOL) // correct if square deviation is less than tolerance squared correct++; else printf(" tmp %f h_a %f h_b %f h_c %f \n",tmp, h_a[i], h_b[i], _h_c[i]); } // summarize results printf("C = A+B: %d out of %d results were correct.\n", correct, LENGTH); } // cleanup then shutdown clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseProgram(program); clReleaseKernel(ko_vadd); clReleaseCommandQueue(commands); clReleaseContext(context); err = MPI_Finalize (); if (err != MPI_SUCCESS) { printf ("MPI_Finalize failed!\n"); exit (-1); } return 0; }
int main(void) { //############################################### // // Declare variables for OpenCL // //############################################### int err; // error code returned from OpenCL calls size_t global; // global domain size cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue cl_program program; // compute program cl_kernel ko_calculate_imagerowdots_iterations; // compute kernel cl_kernel ko_calculate_colorrow; // compute kernel cl_mem d_a; // device memory used for the input a vector cl_mem d_b; // device memory int i; //############################################### // // Set values for mandelbrot // //############################################### //plane section values float x_ebene_min = -1; float y_ebene_min = -1; float x_ebene_max = 2; float y_ebene_max = 1; //monitor resolution values const long x_mon = 640; const long y_mon = 480; //Iterations long itr = 100; //abort condition float abort_value = 2; //Number of images per second long fps = 24; //video duration in seconds long video_duration = 3; //zoom speed in percentage float reduction = 5; //zoom dot my_complex_t zoom_dot; //############################################### // // Set up platform and GPU device // //############################################### cl_uint numPlatforms; // Find number of platforms err = clGetPlatformIDs(0, NULL, &numPlatforms); checkError(err, "Finding platforms"); if (numPlatforms == 0) { printf("Found 0 platforms!\n"); return EXIT_FAILURE; } // Get all platforms cl_platform_id Platform[numPlatforms]; err = clGetPlatformIDs(numPlatforms, Platform, NULL); checkError(err, "Getting platforms"); // Secure a GPU for (i = 0; i < numPlatforms; i++) { err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL); if (err == CL_SUCCESS) { break; } } if (device_id == NULL) checkError(err, "Finding a device"); err = output_device_info(device_id); checkError(err, "Printing device output"); //############################################### // // Create context, command queue and kernel // //############################################### // Create a compute context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); checkError(err, "Creating context"); // Create a command queue commands = clCreateCommandQueue(context, device_id, 0, &err); checkError(err, "Creating command queue"); //Read Kernel source FILE *fp; char *source_str; size_t source_size, program_size; fp = fopen("./kernel/calculate_iterations.cl", "r"); if (!fp) { printf("Failed to load kernel\n"); return 1; } fseek(fp, 0, SEEK_END); program_size = ftell(fp); rewind(fp); source_str = (char*) malloc(program_size + 1); source_str[program_size] = '\0'; fread(source_str, sizeof(char), program_size, fp); fclose(fp); // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) &source_str, NULL, &err); checkError(err, "Creating program"); // Build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; printf("Error: Failed to build program executable!\n%s\n", err_code(err)); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); // Determine the size of the log size_t log_size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); // Allocate memory for the log char *log = (char *) malloc(log_size); // Get the log clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); // Print the log printf("%s\n", log); return EXIT_FAILURE; } // Create the compute kernel from the program ko_calculate_imagerowdots_iterations = clCreateKernel(program, "calculate_imagerowdots_iterations", &err); checkError(err, "Creating kernel"); // Create the compute kernel from the program ko_calculate_colorrow = clCreateKernel(program, "calculate_colorrow", &err); checkError(err, "Creating kernel"); int number_images = 0; do { //Get memory for image long* h_image = (long*) calloc(x_mon * y_mon, sizeof(long)); unsigned char* h_image_pixel = (unsigned char*) calloc( x_mon * y_mon * 3, sizeof(unsigned char)); //############################################### //############################################### // // Loop to calculate image dot iterations // //############################################### //############################################### float y_value = y_ebene_max; float delta_y = delta(y_ebene_min, y_ebene_max, y_mon); for (int row = 0; row < y_mon; ++row) { //############################################### // // Create and write buffer // //############################################### //Get memory for row long* h_image_row = (long*) calloc(x_mon, sizeof(long)); // a vector d_a = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(long) * x_mon, NULL, &err); checkError(err, "Creating buffer d_a"); // Write a vector into compute device memory err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0, sizeof(long) * x_mon, h_image_row, 0, NULL, NULL); checkError(err, "Copying h_a to device at d_a"); //############################################### // // Set the arguments to our compute kernel // //############################################### err = clSetKernelArg(ko_calculate_imagerowdots_iterations, 0, sizeof(float), &x_ebene_min); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 1, sizeof(float), &x_ebene_max); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 2, sizeof(float), &y_value); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 3, sizeof(long), &x_mon); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 4, sizeof(float), &abort_value); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 5, sizeof(long), &itr); err |= clSetKernelArg(ko_calculate_imagerowdots_iterations, 6, sizeof(cl_mem), &d_a); checkError(err, "Setting kernel arguments"); /*__kernel void calculate_imagerowdots_iterations(const float x_min, const float x_max, const float y_value, const long x_mon, const float abort_value, const long itr, __global long * imagerow)*/ // Execute the kernel over the entire range of our 1d input data set // letting the OpenCL runtime choose the work-group size global = x_mon; err = clEnqueueNDRangeKernel(commands, ko_calculate_imagerowdots_iterations, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "Enqueueing kernel"); // Wait for the commands to complete err = clFinish(commands); checkError(err, "Waiting for kernel to finish"); // Read back the results from the compute device err = clEnqueueReadBuffer(commands, d_a, CL_TRUE, 0, sizeof(long) * x_mon, h_image_row, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array!\n%s\n", err_code(err)); exit(1); } //reduce y y_value -= delta_y; //cope row to image memcpy(h_image + row * x_mon, h_image_row, sizeof(long) * x_mon); free(h_image_row); } // for (i = 0; i < x_mon * y_mon; ++i) { // printf("%ld ", h_image[i]); // } // fflush(stdout); //############################################### //############################################### // // End of loop to calculate image dot iterations // //############################################### //############################################### //############################################### //############################################### // // Beginn color calculation // //############################################### //############################################### for (int row = 0; row < y_mon; ++row) { //Get memory for row long* h_image_row = (long*) calloc(x_mon, sizeof(long)); // a vector memcpy(h_image_row, h_image + row * x_mon, sizeof(long) * x_mon); d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(long) * x_mon, NULL, &err); checkError(err, "Creating buffer d_a"); // Write a vector into compute device memory err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0, sizeof(long) * x_mon, h_image_row, 0, NULL, NULL); checkError(err, "Copying h_image_row to device at d_a"); unsigned char* h_imagepixel_row = (unsigned char*) calloc(x_mon * 3, sizeof(unsigned char)); // a vector d_b = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned char) * x_mon * 3, NULL, &err); checkError(err, "Creating buffer d_b"); // Write a vector into compute device memory err = clEnqueueWriteBuffer(commands, d_b, CL_TRUE, 0, sizeof(unsigned char) * x_mon * 3, h_imagepixel_row, 0, NULL, NULL); checkError(err, "Copying h_imagepixel_row to device at d_b"); //############################################### // // Set the arguments to our compute kernel // //############################################### err = clSetKernelArg(ko_calculate_colorrow, 0, sizeof(long), &x_mon); err |= clSetKernelArg(ko_calculate_colorrow, 1, sizeof(long), &itr); err |= clSetKernelArg(ko_calculate_colorrow, 2, sizeof(cl_mem), &d_a); err |= clSetKernelArg(ko_calculate_colorrow, 3, sizeof(cl_mem), &d_b); checkError(err, "Setting kernel arguments"); /*__kernel void calculate_colorrow(const long width, long itr, long * imagerowvalues, unsigned char * imagerow)*/ // Execute the kernel over the entire range of our 1d input data set // letting the OpenCL runtime choose the work-group size global = x_mon; err = clEnqueueNDRangeKernel(commands, ko_calculate_colorrow, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "Enqueueing kernel"); // Wait for the commands to complete err = clFinish(commands); checkError(err, "Waiting for kernel to finish"); // Read back the results from the compute device err = clEnqueueReadBuffer(commands, d_b, CL_TRUE, 0, sizeof(unsigned char) * x_mon * 3, h_imagepixel_row, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("Error: Failed to read output array!\n%s\n", err_code(err)); exit(1); } memcpy(h_image_pixel + row * x_mon * 3, h_imagepixel_row, sizeof(unsigned char) * x_mon * 3); free(h_image_row); free(h_imagepixel_row); } if (number_images == 0) { zoom_dot = find_dot_to_zoom(x_ebene_min, x_ebene_max, y_ebene_min, y_ebene_max, h_image, y_mon, x_mon, itr); } reduce_plane_section_focus_dot(&x_ebene_min, &x_ebene_max, &y_ebene_min, &y_ebene_max, reduction, zoom_dot); // save the image char filename[50]; sprintf(filename, "img-%d.bmp", number_images); safe_image_to_bmp(x_mon, y_mon, h_image_pixel, filename); free(h_image); free(h_image_pixel); number_images++; itr = (long) (itr + itr * reduction / 100); printf("%d\n", number_images); fflush(stdout); } while (number_images < (fps * video_duration)); //############################################### // // cleanup then shutdown // //############################################### clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseProgram(program); clReleaseKernel(ko_calculate_imagerowdots_iterations); clReleaseCommandQueue(commands); clReleaseContext(context); return 0; }