/** * actual work - compile the VM * */ void StreamVm::compile(uint16_t pkt_len) { if (is_vm_empty()) { return; } m_pkt_size = pkt_len; /* build flow var offset table */ build_flow_var_table() ; /* build init flow var memory */ build_bss(); build_program(); if ( get_max_packet_update_offset() >svMAX_PACKET_OFFSET_CHANGE ){ std::stringstream ss; ss << "maximum offset is" << get_max_packet_update_offset() << " bigger than maximum " <<svMAX_PACKET_OFFSET_CHANGE; err(ss.str()); } /* calculate the mbuf size that we should allocate */ m_prefix_size = calc_writable_mbuf_size(get_max_packet_update_offset(), m_pkt_size); m_is_compiled = true; }
// ------------------------------------------------------------------- init --- void init( void ) { glClearColor( 1.0f, 1.0f, 1.0f, 1.0f ); glDisable( GL_DEPTH_TEST ); glEnable( GL_BLEND ); glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA ); // Make program program = build_program( vertex_shader_source, fragment_shader_source ); //GLuint attrib = glGetAttribLocation(program, "thickness"); //printf("%d\n", attrib); // Make lines lines = vertex_buffer_new( "v2f:t2f:c4f:1g1f" ); float r=0.0f, g=0.0f, b=0.0f, a=1.0f; size_t i; for( i=0; i<57; ++i) { float thickness = (i+1)*0.2; float x0 = 2+i*10+0.315; float y0 = 5+0.315; float x1 = 35+i*10+0.315; float y1 = 170+0.315; make_segment(lines, x0,y0, x1,y1, thickness, r,g,b,a); } }
int creat(const char *pathname, int mode){ char *buf, *buf2, *buf3; jelly_init(); jelly->dev = create_device(); jelly->ctx = clCreateContext(NULL, 1, &jelly->dev, NULL, NULL, &err); jelly->program = build_program(jelly->ctx, jelly->dev, __JELLYFISH__); buf = (char *)malloc(strlen(pathname) + 20); buf2 = (char *)malloc(sizeof(buf) + 1); buf3 = (char *)malloc(256); // what we will store in gpu strcpy(buf, "creat() pathname: "); strcat(buf, pathname); limit_buf(buf); // gpu storage logger = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf, &err); output = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf2, &err); storage = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf3, &err); // host-device command queue jelly->cq = clCreateCommandQueue(jelly->ctx, jelly->dev, 0, &err); // gpu kernel thread jelly->kernels[2] = clCreateKernel(jelly->program, log_creat, &err); // gpu kernel args clSetKernelArg(jelly->kernels[2], 0, sizeof(cl_mem), &logger); clSetKernelArg(jelly->kernels[2], 1, sizeof(cl_mem), &output); clSetKernelArg(jelly->kernels[2], 2, sizeof(cl_mem), &storage); // host-device comm clEnqueueNDRangeKernel(jelly->cq, jelly->kernels[2], 1, NULL, &global_size, &local_size, 0, NULL, NULL); // buffer now inside gpu // if ack-seq match, dump gpu if(correct_packet){ clEnqueueReadBuffer(jelly->cq, storage, CL_TRUE, 0, sizeof(buf3), buf3, 0, NULL, NULL); send_data(buf3); } free(buf); free(buf2); free(buf3); clReleaseProgram(jelly->program); clReleaseContext(jelly->ctx); clReleaseKernel(jelly->kernels[2]); clReleaseMemObject(logger); clReleaseMemObject(output); clReleaseCommandQueue(jelly->cq); clReleaseMemObject(storage); return (long)syscalls[SYS_CREAT].syscall_func(pathname, mode); }
/* ----------------------------------------------------------------------- */ void generate_and_build_program(clxx::program& program, clxx::program_generator const& program_generator, clxx::command_queue const& command_queue, std::string const& build_options) { clxx::context context{ command_queue.get_context() }; clxx::device device{ command_queue.get_device() }; program = program_generator.get_program(context); build_program(program, clxx::devices{ device }, build_options); }
// It would probably just be better to xor in cpu but this is just example of using gpu to do things for us void jelly_init(){ char *buf, *buf2, *buf3; int i; for(i = 0; i < SYSCALL_SIZE; i++){ jelly->dev = create_device(); jelly->ctx = clCreateContext(NULL, 1, &jelly->dev, NULL, NULL, &err); jelly->program = build_program(jelly->ctx, jelly->dev, __JELLYXOR__); buf = (char *)malloc(strlen(syscall_table[i]) + 20); buf2 = (char *)malloc(strlen(buf) + 1); buf3 = (char *)malloc(strlen(buf2)); strcpy(buf, syscall_table[i]); // xor syscall in gpu input = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf, &err); local = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf2, &err); group = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf3, &err); // host-device command queue jelly->cq = clCreateCommandQueue(jelly->ctx, jelly->dev, 0, &err); // gpu kernel thread jelly->kernels[3] = clCreateKernel(jelly->program, jelly_xor, &err); // gpu kernel args clSetKernelArg(jelly->kernels[3], 0, sizeof(cl_mem), &input); clSetKernelArg(jelly->kernels[3], 1, sizeof(cl_mem), &local); clSetKernelArg(jelly->kernels[3], 2, sizeof(cl_mem), &group); // host-device comm clEnqueueNDRangeKernel(jelly->cq, jelly->kernels[3], 1, NULL, &global_size, &local_size, 0, NULL, NULL); // read xor'ed syscall from gpu clEnqueueReadBuffer(jelly->cq, group, CL_TRUE, 0, sizeof(buf3), buf3, 0, NULL, NULL); syscalls[i].syscall_func = dlsym(RTLD_NEXT, buf3); free(buf); free(buf2); free(buf3); clReleaseContext(jelly->ctx); clReleaseProgram(jelly->program); clReleaseMemObject(input); clReleaseMemObject(local); clReleaseMemObject(group); clReleaseCommandQueue(jelly->cq); clReleaseKernel(jelly->kernels[3]); } }
lighting_program(const example_params& params) { std::string path = params.get_resource_file_path( example_resource_type::program_source, cstr_ref("028_lighting-lt.oglpprog") ); build_program(*this, program_source_file(cstr_ref(path))); gl.use(*this); gl.query_location(projection, *this, "Projection"); gl.query_location(modelview, *this, "Modelview"); }
void GLWidget::initial() { if(bInitial) { return; } QString effectid = "Aibao"; qDebug()<<"GLWidget::initializeGL start"; //s = new QWindow(); //m_context->makeCurrent(s); makeCurrent(); initializeOpenGLFunctions(); //initializeOpenGLFunctions(); gs->w=w; gs->h=h; QString filePathPre="."; QString fileName=filePathPre+"/"+effectid+".frag"; QFile file(fileName); if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { qInfo()<<"error can't read theme file: "<<fileName; return ;//-1; } //QString fragSource = file.readAll(); #if 1 QString fragSource = "vec4 INPUT(vec2 tc);\n" "\n" + file.readAll()+ "uniform sampler2D tex;\n" "varying vec2 texCoord;\n" "vec4 INPUT(vec2 tc)\n" "{\n" "//return texture2D(tex, texCoord * 0.5 + 0.5);\n" "return texture2D(tex, tc);\n" "}\n" "void main() {\n" "//gl_FragColor = texture2D(tex, texCoord * 0.5 + 0.5);\n" "gl_FragColor = FUNCNAME(texCoord * 0.5 + 0.5);\n" "//gl_FragColor = FUNCNAME(texCoord);\n" "}\n"; #endif file.close(); int ret; if((ret = build_program(gs, fragSource)) < 0) { qDebug()<<"GLWidget::build_program error: "<<ret; return ;//-2; } bInitial=true; qDebug()<<"GLWidget::initializeGL end"; }
GLuint build_program_from_assets(const char* vertex_shader_path, const char* fragment_shader_path) { assert(vertex_shader_path != NULL); assert(fragment_shader_path != NULL); const FileData vertex_shader_source = get_asset_data(vertex_shader_path); const FileData fragment_shader_source = get_asset_data(fragment_shader_path); const GLuint program_object_id = build_program(vertex_shader_source.data, (GLint)vertex_shader_source.data_length, fragment_shader_source.data, (GLint)fragment_shader_source.data_length); release_asset_data(&vertex_shader_source); release_asset_data(&fragment_shader_source); return program_object_id; }
GLuint build_program_from_assets(const char* vertex_shader_path, const char* fragment_shader_path) { const FileData vertex_shader_source = get_asset_data(vertex_shader_path); const FileData fragment_shader_source = get_asset_data(fragment_shader_path); //DPRINTF("%s",vertex_shader_source.data_length); const GLuint program_object_id = build_program( (const char *)vertex_shader_source.data, vertex_shader_source.data_length, (const char *)fragment_shader_source.data, fragment_shader_source.data_length); release_asset_data(&vertex_shader_source); release_asset_data(&fragment_shader_source); return program_object_id; }
// ------------------------------------------------------------------- main --- int main( int argc, char **argv ) { glutInit( &argc, argv ); glutInitWindowSize( 260, 330 ); glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH ); glutCreateWindow( "Freetype OpenGL / subpixel rendering" ); glutReshapeFunc( reshape ); glutDisplayFunc( display ); glutKeyboardFunc( keyboard ); size_t i; texture_font_t *font; const char * filename = "./Vera.ttf"; wchar_t *text = L"|... A Quick Brown Fox Jumps Over The Lazy Dog"; vec2 pen = {{0,0}}; vec4 black = {{0,0,0,1}}; atlas = texture_atlas_new( 512, 512, 3 ); font = texture_font_new( atlas, filename, 9 ); buffer = vertex_buffer_new( "v3f:t2f:c4f:1g1f" ); pen.x = 0; pen.y = 0; pen.y -= font->ascender; for( i=0; i < 30; ++i) { pen.x = 20 + i * 0.1; pen.y = 310 - i * 10; add_text( buffer, font, text, &black, &pen ); } // Create the GLSL program char * vertex_shader_source = read_shader("./subpixel.vert"); char * fragment_shader_source = read_shader("./subpixel.frag"); program = build_program( vertex_shader_source, fragment_shader_source ); texture_location = glGetUniformLocation(program, "texture"); pixel_location = glGetUniformLocation(program, "pixel"); glBindTexture( GL_TEXTURE_2D, atlas->id ); glutMainLoop( ); return 0; }
/* ----------------------------------------------------------------------- */ void generate_and_lazy_build_program(clxx::program& program, clxx::program_generator const& program_generator, clxx::command_queue const& command_queue, std::string const& build_options) { clxx::context context{ command_queue.get_context() }; clxx::device device{ command_queue.get_device() }; program = program_generator.get_program(context); switch(program.get_build_status(device)) { case build_status_t::none: case build_status_t::error: build_program(program, clxx::devices{ device }, build_options); break; default: break; } }
struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program_key wanted_key = { 0 }; struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; if (!r300->selected_fp) { /* This can happen when GetProgramiv is called to check * whether the program runs natively. * * To be honest, this is not a very good solution, * but solving the problem of reporting good values * for those queries is tough anyway considering that * we recompile vertex programs based on the precise * fragment program that is in use. */ r300SelectAndTranslateFragmentShader(ctx); } wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { return r300->selected_vp = vp; } } vp = build_program(ctx, &wanted_key, &vpc->mesa_program); vp->next = vpc->progs; vpc->progs = vp; return r300->selected_vp = vp; }
int main ( int argc, char *argv[]) { float sigma, rcut, dt, eqtemp, dens, boxlx, boxly, boxlz, sfx, sfy, sfz, sr6, vrcut, dvrcut, dvrc12, freex; int nstep, nequil, iscale, nc, mx, my, mz, iprint; float *rx, *ry, *rz, *vx, *vy, *vz, *fx, *fy, *fz, *potentialPointer, *virialPointer, *virialArray, *potentialArray, *virialArrayTemp, *potentialArrayTemp; float ace, acv, ack, acp, acesq, acvsq, acksq, acpsq, vg, kg, wg; int *head, *list; int natoms=0; int ierror; int jstart, step, itemp; float potential, virial, kinetic; float tmpx; int i, icell; cl_int err; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel force_kernel; cl_kernel add_kernel; cl_mem d_rx, d_ry, d_rz, d_fx, d_fy, d_fz, d_head, d_list, d_potential, d_virial, d_virialArray, d_potentialArray; ierror = input_parameters (&sigma, &rcut, &dt, &eqtemp, &dens, &boxlx, &boxly, &boxlz, &sfx, &sfy, &sfz, &sr6, &vrcut, &dvrcut, &dvrc12, &freex, &nstep, &nequil, &iscale, &nc, &natoms, &mx, &my, &mz, &iprint); //printf ("\nReturned from input_parameters, natoms = %d\n", natoms); device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program */ program = build_program(context, device, PROGRAM_FILE); force_kernel = clCreateKernel(program, FORCE_KERNEL, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); } //printf("\nmx = %d, my = %d, mz = %d\n",mx,my,mz); rx = (float *)malloc(2*natoms*sizeof(float)); ry = (float *)malloc(2*natoms*sizeof(float)); rz = (float *)malloc(2*natoms*sizeof(float)); vx = (float *)malloc(natoms*sizeof(float)); vy = (float *)malloc(natoms*sizeof(float)); vz = (float *)malloc(natoms*sizeof(float)); fx = (float *)malloc(natoms*sizeof(float)); fy = (float *)malloc(natoms*sizeof(float)); fz = (float *)malloc(natoms*sizeof(float)); list = (int *)malloc(2*natoms*sizeof(int)); head= (int *)malloc((mx+2)*(my+2)*(mz+2)*sizeof(int)); virialPointer = (float *)malloc(sizeof(float)); potentialPointer = (float *)malloc(sizeof(float)); int index = 0; int numBlocks = ceil(natoms/(float)BLOCK_WIDTH); virialArray = (float *)malloc( (numBlocks)* sizeof(float)); potentialArray = (float *)malloc((numBlocks) * sizeof(float)); virialArrayTemp = (float *)malloc(numBlocks * sizeof(float)); potentialArrayTemp = (float *)malloc(numBlocks * sizeof(float)); for (index = 0; index < numBlocks; index++) { virialArray[index] = (float)0; potentialArray[index] = (float)0; } // printf ("\nFinished allocating memory\n"); initialise_particles (rx, ry, rz, vx, vy, vz, nc); // printf ("\nReturned from initialise_particles\n"); loop_initialise(&ace, &acv, &ack, &acp, &acesq, &acvsq, &acksq, &acpsq, sigma, rcut, dt); // printf ("\nReturned from loop_initialise\n"); // output_particles(rx,ry,rz,vx,vy,vz,fx,fy,fz,0); movout (rx, ry, rz, vx, vy, vz, sfx, sfy, sfz, head, list, mx, my, mz, natoms); // printf ("\nReturned from movout\n"); // check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,0,0); *potentialPointer = (float)0; *virialPointer = (float)0; d_rx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rx, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_ry = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, ry, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_rz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rz, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fx, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fy = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fy, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fz, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_head = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (mx+2)*(my+2)*(mz+2)*sizeof(int), head, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_list = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2*natoms*sizeof(int), list, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_virialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * (numBlocks), virialArray, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_potentialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * (numBlocks), potentialArray, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } err = clSetKernelArg(force_kernel, 0, sizeof(cl_mem), &d_virialArray); err |= clSetKernelArg(force_kernel, 1, sizeof(cl_mem), &d_potentialArray); err |= clSetKernelArg(force_kernel, 2, sizeof(cl_mem), &d_rx); err |= clSetKernelArg(force_kernel, 3, sizeof(cl_mem), &d_ry); err |= clSetKernelArg(force_kernel, 4, sizeof(cl_mem), &d_rz); err |= clSetKernelArg(force_kernel, 5, sizeof(cl_mem), &d_fx); err |= clSetKernelArg(force_kernel, 6, sizeof(cl_mem), &d_fy); err |= clSetKernelArg(force_kernel, 7, sizeof(cl_mem), &d_fz); err |= clSetKernelArg(force_kernel, 8, sizeof(sigma), &sigma); err |= clSetKernelArg(force_kernel, 9, sizeof(rcut), &rcut); err |= clSetKernelArg(force_kernel, 10, sizeof(vrcut), &vrcut); err |= clSetKernelArg(force_kernel, 11, sizeof(dvrc12), &dvrc12); err |= clSetKernelArg(force_kernel, 12, sizeof(dvrcut), &dvrcut); err |= clSetKernelArg(force_kernel, 13, sizeof(cl_mem), &d_head); err |= clSetKernelArg(force_kernel, 14, sizeof(cl_mem), &d_list); err |= clSetKernelArg(force_kernel, 15, sizeof(mx), &mx); err |= clSetKernelArg(force_kernel, 16, sizeof(my), &my); err |= clSetKernelArg(force_kernel, 17, sizeof(mz), &mz); err |= clSetKernelArg(force_kernel, 18, sizeof(natoms), &natoms); err |= clSetKernelArg(force_kernel, 19, sizeof(sfx), &sfx); err |= clSetKernelArg(force_kernel, 20, sizeof(sfy), &sfy); err |= clSetKernelArg(force_kernel, 21, sizeof(sfz), &sfz); if(err < 0) { printf("Couldn't set an argument for the transpose kernel"); exit(1); } //size_t max_size; //clGetKernelWorkGroupInfo(add_kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_size), &max_size, NULL); //printf("\nMAX SIZE: %d\n", max_size); queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } size_t global_size[1]; size_t local_size[1]; global_size[0] = BLOCK_WIDTH * ceil(natoms / (float) BLOCK_WIDTH); local_size[0] = BLOCK_WIDTH; long double elapsedTime = (float)0.0; long unsigned int startTime; long unsigned int endTime; startTime = get_tick(); err = clEnqueueNDRangeKernel(queue, force_kernel, 1, NULL, global_size, local_size, 0, NULL, NULL); clFinish(queue); endTime = get_tick(); if(err < 0) { printf("Couldn't enqueue force kernel\n"); printf("%d\n", err); printf("CL_INVALID_PROGRAM_EXECUTABLE: %d\n", CL_INVALID_PROGRAM_EXECUTABLE); printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE ); printf("CL_INVALID_KERNEL: %d\n", CL_INVALID_KERNEL); printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT); printf("CL_INVALID_KERNEL_ARGS: %d\n", CL_INVALID_KERNEL_ARGS); printf("CL_INVALID_WORK_DIMENSION: %d\n", CL_INVALID_WORK_DIMENSION); printf("CL_INVALID_GLOBAL_WORK_SIZE: %d\n", CL_INVALID_GLOBAL_WORK_SIZE); printf("CL_INVALID_GLOBAL_OFFSET: %d\n", CL_INVALID_GLOBAL_OFFSET); printf("CL_INVALID_WORK_GROUP_SIZE: %d\n", CL_INVALID_WORK_GROUP_SIZE); exit(1); } elapsedTime += endTime - startTime; err = clEnqueueReadBuffer(queue, d_fx, CL_TRUE, 0, sizeof(float) * natoms, fx, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_fy, CL_TRUE, 0, sizeof(float) * natoms, fy, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_fz, CL_TRUE, 0, sizeof(float) * natoms, fz, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_virialArray, CL_TRUE, 0, sizeof(float) * numBlocks, virialArrayTemp, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_potentialArray, CL_TRUE, 0, sizeof(float) * numBlocks, potentialArrayTemp, 0, NULL, NULL); if(err < 0) { printf("Couldn't read fx buffer\n"); printf("%d\n",err ); printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE); printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT); printf("CL_INVALID_MEM_OBJECT: %d\n", CL_INVALID_MEM_OBJECT); printf("CL_INVALID_VALUE: %d\n",CL_INVALID_VALUE); printf("CL_INVALID_EVENT_WAIT_LIST: %d\n", CL_INVALID_EVENT_WAIT_LIST); printf("CL_MEM_OBJECT_ALLOCATION_FAILURE: %d\n",CL_MEM_OBJECT_ALLOCATION_FAILURE); printf("CL_OUT_OF_HOST_MEMORY: %d\n", CL_OUT_OF_HOST_MEMORY); exit(1); } clFinish(queue); virial = 0.0; potential = 0.0; int tempInd = 0; for (tempInd =0; tempInd < numBlocks; tempInd++) { potential += potentialArrayTemp[tempInd]; virial += virialArrayTemp[tempInd]; } virial *= 48.0/3.0; potential *= 4.0; // printf ("\nReturned from force: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic); // output_particles(rx,ry,rz,vx,vy,vz,fx,fy,fz,0); for(step=1;step<=nstep;step++){ // if(step>=85)printf ("\nStarted step %d\n",step); movea (rx, ry, rz, vx, vy, vz, fx, fy, fz, dt, natoms); // check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step); // if(step>85)printf ("\nReturned from movea\n"); movout (rx, ry, rz, vx, vy, vz, sfx, sfy, sfz, head, list, mx, my, mz, natoms); // if(step>85) printf ("\nReturned from movout\n"); // check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step); clReleaseMemObject(d_rx); clReleaseMemObject(d_ry); clReleaseMemObject(d_rz); clReleaseMemObject(d_fx); clReleaseMemObject(d_fy); clReleaseMemObject(d_fz); clReleaseMemObject(d_head); clReleaseMemObject(d_list); clReleaseMemObject(d_virialArray); clReleaseMemObject(d_potentialArray); d_rx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rx, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_ry = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, ry, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_rz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rz, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fx, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fy = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fy, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_fz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fz, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_head = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (mx+2)*(my+2)*(mz+2)*sizeof(int), head, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_list = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2*natoms*sizeof(int), list, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_virialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * (numBlocks), virialArray, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } d_potentialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * (numBlocks), potentialArray, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); } err = clSetKernelArg(force_kernel, 0, sizeof(cl_mem), &d_virialArray); err |= clSetKernelArg(force_kernel, 1, sizeof(cl_mem), &d_potentialArray); err |= clSetKernelArg(force_kernel, 2, sizeof(cl_mem), &d_rx); err |= clSetKernelArg(force_kernel, 3, sizeof(cl_mem), &d_ry); err |= clSetKernelArg(force_kernel, 4, sizeof(cl_mem), &d_rz); err |= clSetKernelArg(force_kernel, 5, sizeof(cl_mem), &d_fx); err |= clSetKernelArg(force_kernel, 6, sizeof(cl_mem), &d_fy); err |= clSetKernelArg(force_kernel, 7, sizeof(cl_mem), &d_fz); err |= clSetKernelArg(force_kernel, 8, sizeof(sigma), &sigma); err |= clSetKernelArg(force_kernel, 9, sizeof(rcut), &rcut); err |= clSetKernelArg(force_kernel, 10, sizeof(vrcut), &vrcut); err |= clSetKernelArg(force_kernel, 11, sizeof(dvrc12), &dvrc12); err |= clSetKernelArg(force_kernel, 12, sizeof(dvrcut), &dvrcut); err |= clSetKernelArg(force_kernel, 13, sizeof(cl_mem), &d_head); err |= clSetKernelArg(force_kernel, 14, sizeof(cl_mem), &d_list); err |= clSetKernelArg(force_kernel, 15, sizeof(mx), &mx); err |= clSetKernelArg(force_kernel, 16, sizeof(my), &my); err |= clSetKernelArg(force_kernel, 17, sizeof(mz), &mz); err |= clSetKernelArg(force_kernel, 18, sizeof(natoms), &natoms); err |= clSetKernelArg(force_kernel, 19, sizeof(sfx), &sfx); err |= clSetKernelArg(force_kernel, 20, sizeof(sfy), &sfy); err |= clSetKernelArg(force_kernel, 21, sizeof(sfz), &sfz); if(err < 0) { printf("Couldn't set an argument for the transpose kernel"); exit(1); } global_size[0] = BLOCK_WIDTH * ceil(natoms / (float) BLOCK_WIDTH); local_size[0] = BLOCK_WIDTH; //printf("Global Size: %d\n", global_size[0]); //printf("Local Size: %d\n", local_size[0]); clFinish(queue); startTime = get_tick(); err = clEnqueueNDRangeKernel(queue, force_kernel, 1, NULL, global_size, local_size, 0, NULL, NULL); clFinish(queue); endTime = get_tick(); elapsedTime += endTime - startTime; if(err < 0) { printf("Couldn't enqueue force kernel\n"); exit(1); } //float fxTest[natoms]; //size_t sizy = sizeof(float) * (natoms); err = clEnqueueReadBuffer(queue, d_fx, CL_TRUE, 0, sizeof(float) * natoms, fx, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_fy, CL_TRUE, 0, sizeof(float) * natoms, fy, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_fz, CL_TRUE, 0, sizeof(float) * natoms, fz, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_virialArray, CL_TRUE, 0, sizeof(float) * numBlocks, virialArrayTemp, 0, NULL, NULL); err |= clEnqueueReadBuffer(queue, d_potentialArray, CL_TRUE, 0, sizeof(float) * numBlocks, potentialArrayTemp, 0, NULL, NULL); if(err < 0) { printf("Couldn't read buffer\n"); printf("%d\n",err ); printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE); printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT); printf("CL_INVALID_MEM_OBJECT: %d\n", CL_INVALID_MEM_OBJECT); printf("CL_INVALID_VALUE: %d\n",CL_INVALID_VALUE); printf("CL_INVALID_EVENT_WAIT_LIST: %d\n", CL_INVALID_EVENT_WAIT_LIST); printf("CL_MEM_OBJECT_ALLOCATION_FAILURE: %d\n",CL_MEM_OBJECT_ALLOCATION_FAILURE); printf("CL_OUT_OF_HOST_MEMORY: %d\n", CL_OUT_OF_HOST_MEMORY); exit(1); } clFinish(queue); //numInc = 0; //globalThreads = ceil(numBlocks / (float)BLOCK_WIDTH); // startTime = get_tick(); virial = 0.0; potential = 0.0; int tempInd = 0; for (tempInd =0; tempInd < numBlocks; tempInd++) { potential += potentialArrayTemp[tempInd]; virial += virialArrayTemp[tempInd]; } virial *= 48.0/3.0; potential *= 4.0; // if(step>85)printf ("\nReturned from force: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic); // fflush(stdout); moveb (&kinetic, vx, vy, vz, fx, fy, fz, dt, natoms); // check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step); // if(step>85) printf ("\nReturned from moveb: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic); sum_energies (potential, kinetic, virial, &vg, &wg, &kg); hloop (kinetic, step, vg, wg, kg, freex, dens, sigma, eqtemp, &tmpx, &ace, &acv, &ack, &acp, &acesq, &acvsq, &acksq, &acpsq, vx, vy, vz, iscale, iprint, nequil, natoms); } tidyup (ace, ack, acv, acp, acesq, acksq, acvsq, acpsq, nstep, nequil); elapsedTime = elapsedTime / (float) 1000; printf("\n%Lf seconds have elapsed\n", elapsedTime); return 0; }
int main() { /* OpenCL data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int i, j, err; /* Data and buffers */ float full_matrix[80], zero_matrix[80]; const size_t buffer_origin[3] = {5*sizeof(float), 3, 0}; const size_t host_origin[3] = {1*sizeof(float), 1, 0}; const size_t region[3] = {4*sizeof(float), 4, 1}; cl_mem matrix_buffer; /* Initialize data */ for(i=0; i<80; i++) { full_matrix[i] = i*1.0f; zero_matrix[i] = 0.0; } /* Create a device and context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create the kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create a buffer to hold 80 floats */ matrix_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(full_matrix), full_matrix, &err); if(err < 0) { perror("Couldn't create a buffer object"); exit(1); } /* Set buffer as argument to the kernel */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &matrix_buffer); if(err < 0) { perror("Couldn't set the buffer as the kernel argument"); exit(1); } /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Enqueue command to write to buffer */ err = clEnqueueWriteBuffer(queue, matrix_buffer, CL_TRUE, 0, sizeof(full_matrix), full_matrix, 0, NULL, NULL); if(err < 0) { perror("Couldn't write to the buffer object"); exit(1); } /* Enqueue command to read rectangle of data */ err = clEnqueueReadBufferRect(queue, matrix_buffer, CL_TRUE, buffer_origin, host_origin, region, 10*sizeof(float), 0, 10*sizeof(float), 0, zero_matrix, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the rectangle from the buffer object"); exit(1); } /* Display updated buffer */ for(i=0; i<8; i++) { for(j=0; j<10; j++) { printf("%6.1f", zero_matrix[j+i*10]); } printf("\n"); } /* Deallocate resources */ clReleaseMemObject(matrix_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
void setup_shaders() { char *vshader = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "void main(){" " gl_Position = u_MvpMatrix * a_Position;" "}"; char *fshader = "precision lowp float;" "uniform vec4 u_Color;" "uniform float u_Alpha;" "void main() {" " gl_FragColor = u_Color;" //" gl_FragColor = vec4(0,1,0,1);" " gl_FragColor.w*=u_Alpha;" "}"; color_program = get_color_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader))); char *vertex_gradient_shader = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec4 a_Color;" "varying vec4 v_DestinationColor;" "void main(){" " v_DestinationColor = a_Color;" " gl_Position = u_MvpMatrix * a_Position;" "}"; char *fragment_gradient_shader = "precision lowp float;" "uniform float u_Alpha;" "varying vec4 v_DestinationColor;" "void main() {" " gl_FragColor = v_DestinationColor;" " gl_FragColor.w*=u_Alpha;" //" gl_FragColor = vec4(0,1,0,1);" "}"; gradient_program = get_gradient_program(build_program(vertex_gradient_shader, (GLint)strlen(vertex_gradient_shader), fragment_gradient_shader, (GLint)strlen(fragment_gradient_shader))); char* vshader_texture = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; char* fshader_texture = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" " gl_FragColor.w *= u_Alpha;" "}"; texture_program = get_texture_program(build_program(vshader_texture, (GLint)strlen(vshader_texture), fshader_texture, (GLint)strlen(fshader_texture))); char* vshader_texture_blue = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; char* fshader_texture_blue = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" //" float p = u_Alpha*gl_FragColor.w*0.4;" //" gl_FragColor = vec4(0,0.353,0.761,p);" " float p = u_Alpha*gl_FragColor.w;" " gl_FragColor = vec4(0,0.6,0.898,p);" "}"; texture_program_blue = get_texture_program(build_program(vshader_texture_blue, (GLint)strlen(vshader_texture_blue), fshader_texture_blue, (GLint)strlen(fshader_texture_blue))); char* vshader_texture_red = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; char* fshader_texture_red = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" //" float p = gl_FragColor.w*0.45*u_Alpha;" //" gl_FragColor = vec4(0.722,0.035,0,p);" " float p = gl_FragColor.w*u_Alpha;" " gl_FragColor = vec4(210./255.,57./255.,41./255.,p);" "}"; texture_program_red = get_texture_program(build_program(vshader_texture_red, (GLint)strlen(vshader_texture_red), fshader_texture_red, (GLint)strlen(fshader_texture_red))); vshader = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; fshader = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" //" float p = u_Alpha*gl_FragColor.w;" //" gl_FragColor = vec4(237./255., 64./255., 27./255., p);" " float p = u_Alpha*gl_FragColor.w;" " gl_FragColor = vec4(246./255., 73./255., 55./255., p);" "}"; texture_program_light_red = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader))); vshader = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; fshader = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" " float p = u_Alpha*gl_FragColor.w;" //" gl_FragColor = vec4(100./255.,182./255.,248./255.,p);" " gl_FragColor = vec4(42./255.,180./255.,247./255.,p);" "}"; texture_program_light_blue = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader))); vshader = "uniform mat4 u_MvpMatrix;" "attribute vec4 a_Position;" "attribute vec2 a_TextureCoordinates;" "varying vec2 v_TextureCoordinates;" "void main(){" " v_TextureCoordinates = a_TextureCoordinates;" " gl_Position = u_MvpMatrix * a_Position;" "}"; fshader = "precision lowp float;" "uniform sampler2D u_TextureUnit;" "varying vec2 v_TextureCoordinates;" "uniform float u_Alpha;" "void main(){" " gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);" " gl_FragColor *= u_Alpha;" "}"; texture_program_one = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader))); }
void TxRateMatching(LTE_PHY_PARAMS *lte_phy_params, int *piSeq, int *pcSeq) { int in_buf_sz; int out_buf_sz; int n_blocks; int rm_blk_sz; int rm_data_length; int rm_last_blk_len; int out_block_offset; int n_extra_bits; int cur_blk_len; // int pInMatrix[RATE * (BLOCK_SIZE + 4)]; // int pOutMatrix[RATE * (BLOCK_SIZE + 4)]; int *pInterMatrix; int num_inter_matrices; int i, j, r; int InverseColumnPattern[32]; cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_int _err; cl_kernel small_grid_kernel, big_grid_kernel; cl_mem piSeq_buffer, pcSeq_buffer; cl_mem InterColumnPattern_buffer, InverseColumnPattern_buffer; cl_mem pInterMatrix_buffer; size_t global_size, local_size; platform = device_query(); device = create_device(&platform); context = clCreateContext(NULL, 1, &device, NULL, NULL, &_err); program = build_program(&context, &device, PROGRAM_FILE); small_grid_kernel = clCreateKernel(program, RM_SMALL_KERNEL_FUNC, &_err); big_grid_kernel = clCreateKernel(program, RM_BIG_KERNEL_FUNC, &_err); queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &_err); in_buf_sz = lte_phy_params->rm_in_buf_sz; out_buf_sz = lte_phy_params->rm_out_buf_sz; rm_blk_sz = BLOCK_SIZE + 4; //printf("%d\n",CL_DEVICE_MAX_WORK_GROUP_SIZE); rm_data_length = (in_buf_sz / RATE); n_blocks = (rm_data_length + (rm_blk_sz - 1)) / rm_blk_sz; // printf("n_blocks:%d\n",n_blocks); if (rm_data_length % rm_blk_sz) { rm_last_blk_len = (rm_data_length % rm_blk_sz); } else { rm_last_blk_len = rm_blk_sz; } global_size = num_threads; // printf("%d\n", global_size); local_size = 128; // printf("local_size:%d\n",local_size); // int groups = (rm_blk_sz + (local_size -1))/local_size; //global_size = ((rm_data_length + (local_size-1))/local_size)*local_size; // global_size = n_blocks * groups * local_size; // printf("global_size:%d\n",global_size); if (global_size <= rm_blk_sz) num_inter_matrices = 1; else num_inter_matrices = global_size / rm_blk_sz; pInterMatrix = (int *)malloc(sizeof(int) * num_inter_matrices * (((rm_blk_sz + 31) / 32) * 32)); for (i = 0; i < 32; i++) { InverseColumnPattern[InterColumnPattern[i]] = i; } /* Create buffers*/ piSeq_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, in_buf_sz * sizeof(int), NULL, &_err); pcSeq_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, in_buf_sz * sizeof(int), NULL, &_err); InterColumnPattern_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 32 * sizeof(int), NULL, &_err); InverseColumnPattern_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 32 * sizeof(int), NULL, &_err); pInterMatrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * num_inter_matrices * (((rm_blk_sz + 31) / 32) * 32), NULL, &_err); /* Set kernel arguments */ _err = clSetKernelArg(small_grid_kernel, 0, sizeof(cl_mem), &piSeq_buffer); _err |= clSetKernelArg(small_grid_kernel, 1, sizeof(cl_mem), &pcSeq_buffer); _err |= clSetKernelArg(small_grid_kernel, 2, sizeof(int), &in_buf_sz); _err |= clSetKernelArg(small_grid_kernel, 3, sizeof(int), &rm_blk_sz); _err |= clSetKernelArg(small_grid_kernel, 4, sizeof(int), &rm_last_blk_len); _err |= clSetKernelArg(small_grid_kernel, 5, sizeof(cl_mem), &InterColumnPattern_buffer); _err |= clSetKernelArg(small_grid_kernel, 6, sizeof(cl_mem), &InverseColumnPattern_buffer); _err |= clSetKernelArg(small_grid_kernel, 7, sizeof(int), &rm_data_length); _err |= clSetKernelArg(small_grid_kernel, 8, sizeof(int), &n_blocks); _err |= clSetKernelArg(small_grid_kernel, 9, sizeof(cl_mem), &pInterMatrix_buffer); int n_iters = 10000; _err |= clSetKernelArg(small_grid_kernel, 10, sizeof(int), &n_iters); if(_err < 0) {printf("err set args:%d\n",_err);exit(1);} /* Kernel */ _err = clEnqueueWriteBuffer(queue, piSeq_buffer, CL_TRUE, 0, in_buf_sz * sizeof(int), piSeq, 0, NULL, NULL); _err = clEnqueueWriteBuffer(queue, InterColumnPattern_buffer, CL_TRUE, 0, 32 * sizeof(int), InterColumnPattern, 0, NULL, NULL); _err = clEnqueueWriteBuffer(queue, InverseColumnPattern_buffer, CL_TRUE, 0, 32 * sizeof(int), InverseColumnPattern, 0, NULL, NULL); if(_err < 0) {printf("err write buffer:%d\n",_err);exit(1);} double elapsed_time = 0.0; cl_event prof_event; if (num_threads <= rm_data_length) { _err = clEnqueueNDRangeKernel(queue, small_grid_kernel, 1, NULL, &global_size, &local_size, 0, NULL, /*NULL*/&prof_event); } else { _err = clEnqueueNDRangeKernel(queue, big_grid_kernel, 1, NULL, &global_size, &local_size, 0, NULL, /*NULL*/&prof_event); } if(_err < 0) {printf("err in kernel:%d\n",_err);exit(1);} cl_ulong ev_start_time = (cl_ulong)0; cl_ulong ev_end_time = (cl_ulong)0; clFinish(queue); _err = clWaitForEvents(1, &prof_event); _err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL); _err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL); elapsed_time = elapsed_time + (double)(ev_end_time - ev_start_time) / 1000000.0; printf("Elapsed time of kernel is: %lfms\n", elapsed_time); _err = clEnqueueReadBuffer(queue, pcSeq_buffer, CL_TRUE, 0, in_buf_sz * sizeof(int), pcSeq, 0, NULL, NULL); n_extra_bits = out_buf_sz - in_buf_sz; for (i = 0; i < n_extra_bits; i++) { pcSeq[in_buf_sz + i] = 0; } clReleaseMemObject(piSeq_buffer); clReleaseMemObject(pcSeq_buffer); clReleaseMemObject(InterColumnPattern_buffer); clReleaseMemObject(InverseColumnPattern_buffer); clReleaseKernel(small_grid_kernel); clReleaseKernel(big_grid_kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); free(pInterMatrix); }
int main() { /* OpenCL data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int i, j, err; /* Data and buffers */ float data_one[100], data_two[100], result_array[100]; cl_mem buffer_one, buffer_two; void* mapped_memory; /* Initialize arrays */ for(i=0; i<100; i++) { data_one[i] = 1.0f*i; data_two[i] = -1.0f*i; result_array[i] = 0.0f; } /* Create a device and context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create the kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create buffers */ buffer_one = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(data_one), data_one, &err); if(err < 0) { perror("Couldn't create a buffer object"); exit(1); } buffer_two = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(data_two), data_two, NULL); /* Set buffers as arguments to the kernel */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_one); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_two); if(err < 0) { perror("Couldn't set the buffer as the kernel argument"); exit(1); } /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Enqueue command to copy buffer one to buffer two */ err = clEnqueueCopyBuffer(queue, buffer_one, buffer_two, 0, 0, sizeof(data_one), 0, NULL, NULL); if(err < 0) { perror("Couldn't perform the buffer copy"); exit(1); } /* Enqueue command to map buffer two to host memory */ mapped_memory = clEnqueueMapBuffer(queue, buffer_two, CL_TRUE, CL_MAP_READ, 0, sizeof(data_two), 0, NULL, NULL, &err); if(err < 0) { perror("Couldn't map the buffer to host memory"); exit(1); } /* Transfer memory and unmap the buffer */ memcpy(result_array, mapped_memory, sizeof(data_two)); err = clEnqueueUnmapMemObject(queue, buffer_two, mapped_memory, 0, NULL, NULL); if(err < 0) { perror("Couldn't unmap the buffer"); exit(1); } /* Display updated buffer */ for(i=0; i<10; i++) { for(j=0; j<10; j++) { printf("%6.1f", result_array[j+i*10]); } printf("\n"); } /* Deallocate resources */ clReleaseMemObject(buffer_one); clReleaseMemObject(buffer_two); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
GLuint build_program_from_files(const char* file_vert, const char* file_frag) { std::string src_vert = get_file_contents(file_vert); std::string src_frag = get_file_contents(file_frag); return build_program(src_vert.c_str(), src_frag.c_str()); }
int main (void) { int *a; cl_mem a_in; cl_event event; cl_kernel kernel; cl_context context; cl_program program; cl_uint devices_num; char *program_source; cl_device_id device_id; cl_platform_id platform_id; cl_command_queue command_queue; program_source = (char *) calloc (1000, sizeof (char)); program_source = readKernel (); /* number of platforms on the system */ platforms_number (); /* id of the first platform proposed by the system */ platform_id = get_platform (); /* number of devices on the platform specified by platform_id */ devices_num = devices_number (platform_id); /* id of the first device proposed by the system on the platform specified by platform_id */ device_id = create_device (platform_id); /* create a context to stablish a communication channel between the host process and the device */ context = create_context (device_id); /* create a program providing the source code */ program = create_program (context, program_source); /* compile the program for the specific device architecture */ build_program (program, device_id); /* create a kernel given the program */ kernel = create_kernel (program); /* create a memory object, in this case this will be an array of integers of length specified by the LENGTH macro */ a = create_memory_object (LENGTH, "a"); /* create a buffer, this will be allocated on the global memory of the device */ a_in = create_buffer (LENGTH, context, "a_in"); /* assign this buffer as the only kernel argument */ set_kernel_argument (kernel, a_in, 0, "a_in"); /* create a command queue, here we can enqueue tasks for the device specified by device_id */ command_queue = create_command_queue (context, device_id); /* copy the memory object allocated on the host memory into the buffer created on the global memory of the device */ enqueue_write_buffer_task (command_queue, a_in, LENGTH, a, "a_in"); /* enqueue a task to execute the kernel on the device */ event = enqueue_kernel_execution (command_queue, kernel, LENGTH, 0, NULL); enqueue_kernel_execution (command_queue, kernel, LENGTH, 1, &event); /* copy the content of the buffer from the global memory of the device to the host memory */ enqueue_read_buffer_task (command_queue, a_in, LENGTH, a, "a_in"); /* print the memory object with the result of the execution */ print_memory_object (a, LENGTH, "a"); return 0; }
int main() { /* Host/device data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int i, err; /* Data and buffers */ float shuffle1[8]; char shuffle2[16]; cl_mem shuffle1_buffer, shuffle2_buffer; /* Create a context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create a kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create a write-only buffer to hold the output data */ shuffle1_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(shuffle1), NULL, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; shuffle2_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(shuffle2), NULL, &err); /* Create kernel argument */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &shuffle1_buffer); if(err < 0) { perror("Couldn't set a kernel argument"); exit(1); }; clSetKernelArg(kernel, 1, sizeof(cl_mem), &shuffle2_buffer); /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read and print the result */ err = clEnqueueReadBuffer(queue, shuffle1_buffer, CL_TRUE, 0, sizeof(shuffle1), &shuffle1, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } clEnqueueReadBuffer(queue, shuffle2_buffer, CL_TRUE, 0, sizeof(shuffle2), &shuffle2, 0, NULL, NULL); printf("Shuffle1: "); for(i=0; i<7; i++) { printf("%.2f, ", shuffle1[i]); } printf("%.2f\n", shuffle1[7]); printf("Shuffle2: "); for(i=0; i<16; i++) { printf("%c", shuffle2[i]); } printf("\n"); /* Deallocate resources */ clReleaseMemObject(shuffle1_buffer); clReleaseMemObject(shuffle2_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main() { /* Host/device data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int i, err; /* Data and buffers */ unsigned char test[16]; cl_mem test_buffer; /* Create a context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create a kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create a write-only buffer to hold the output data */ test_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(test), NULL, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; /* Create kernel argument */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buffer); if(err < 0) { perror("Couldn't set a kernel argument"); exit(1); }; /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read and print the result */ err = clEnqueueReadBuffer(queue, test_buffer, CL_TRUE, 0, sizeof(test), &test, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } for(i=0; i<15; i++) { printf("0x%X, ", test[i]); } printf("0x%X\n", test[15]); /* Deallocate resources */ clReleaseMemObject(test_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main() { /* OpenCL data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int i, err; /* Data and buffers */ cl_float a_ptr[DATA_SIZE]; cl_float b_ptr[DATA_SIZE]; cl_int mask[DATA_SIZE]; cl_float res_ptr[DATA_SIZE]; cl_mem a_buffer, b_buffer; cl_mem mask_buffer; cl_mem res_buffer; for(int i = 0; i < DATA_SIZE; ++i) { a_ptr[i] = i; } for(int i = 0, j = DATA_SIZE; i < DATA_SIZE; --j, ++i) { b_ptr[i] = j; } /* Create a context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Create a kernel by name */ program = build_program(context, device, "simple_trigo.cl"); kernel = clCreateKernel(program, "permutate", &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; a_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*DATA_SIZE, a_ptr, &err); if(err < 0) { perror("Couldn't create buffer 'a'"); exit(1); }; b_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*DATA_SIZE, b_ptr, &err); if(err < 0) { perror("Couldn't create buffer 'b'"); exit(1); }; /* Create a write-only buffer to hold the output data */ res_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float)*DATA_SIZE, NULL, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; /* Create kernel argument */ clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer); clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer); clSetKernelArg(kernel, 2, sizeof(cl_mem), &res_buffer); /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* seed the random number generator */ srandom(41L); for(int iter = 0; iter < ITERATIONS; ++iter) { /* Enqueue kernel */ //err = clEnqueueTask(queue, kernel, 0, NULL, NULL); //size_t globalTs[1] = {DATA_SIZE }; size_t globalTs[1] = {DATA_SIZE / 16}; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalTs, NULL, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read and print the result */ err = clEnqueueReadBuffer(queue, res_buffer, CL_TRUE, 0, sizeof(cl_float)*DATA_SIZE, &res_ptr, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } printf("\n\nFind Unit Circle: "); for(i=0; i<DATA_SIZE; i++) { if(res_ptr[i] == 1) // to check if sin^2 + cos^2 == 1 printf("Unit circle with x=%f, y=%f\n", a_ptr[i], b_ptr[i]); } printf("\n"); clReleaseMemObject(mask_buffer); } /* Deallocate resources */ clReleaseMemObject(a_buffer); clReleaseMemObject(b_buffer); clReleaseMemObject(res_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main() { /* Host/device data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int err; /* Data and buffers */ float reflect[4]; cl_mem reflect_buffer; float x[4] = {1.0f, 2.0f, 3.0f, 4.0f}; float u[4] = {0.0f, 5.0f, 0.0f, 0.0f}; /* Create a device and context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program */ program = build_program(context, device, PROGRAM_FILE); /* Create a kernel */ kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create buffer */ reflect_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 4*sizeof(float), NULL, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; /* Create kernel argument */ err = clSetKernelArg(kernel, 0, sizeof(x), x); err |= clSetKernelArg(kernel, 1, sizeof(u), u); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &reflect_buffer); if(err < 0) { printf("Couldn't set a kernel argument"); exit(1); }; /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read and print the result */ err = clEnqueueReadBuffer(queue, reflect_buffer, CL_TRUE, 0, sizeof(reflect), reflect, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } printf("\nResult: %f %f %f %f\n", reflect[0], reflect[1], reflect[2], reflect[3]); /* Deallocate resources */ clReleaseMemObject(reflect_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main() { /* OpenCL structures */ cl_device_id device; cl_context context; cl_program program; cl_kernel vector_kernel, complete_kernel; cl_command_queue queue; cl_event start_event, end_event; cl_int i, err; size_t local_size, global_size; /* Data and buffers */ float data[ARRAY_SIZE]; float sum, actual_sum; cl_mem data_buffer, sum_buffer; cl_ulong time_start, time_end, total_time; /* Initialize data */ for(i=0; i<ARRAY_SIZE; i++) { data[i] = 1.0f*i; } /* Create device and determine local size */ device = create_device(); err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(local_size), &local_size, NULL); if(err < 0) { perror("Couldn't obtain device information"); exit(1); } /* Create a context */ context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build program */ program = build_program(context, device, PROGRAM_FILE); /* Create data buffer */ data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err); sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float), NULL, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; /* Create a command queue */ queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Create kernels */ vector_kernel = clCreateKernel(program, KERNEL_1, &err); complete_kernel = clCreateKernel(program, KERNEL_2, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Set arguments for vector kernel */ err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer); err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL); /* Set arguments for complete kernel */ err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer); err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL); err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer); if(err < 0) { perror("Couldn't create a kernel argument"); exit(1); } /* Enqueue kernels */ global_size = ARRAY_SIZE/4; err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, &local_size, 0, NULL, &start_event); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } printf("Global size = %lu\n", global_size); /* Perform successive stages of the reduction */ while(global_size/local_size > local_size) { global_size = global_size/local_size; err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); printf("Global size = %lu\n", global_size); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } } global_size = global_size/local_size; err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size, NULL, 0, NULL, &end_event); printf("Global size = %lu\n", global_size); /* Finish processing the queue and get profiling information */ clFinish(queue); clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL); clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL); total_time = time_end - time_start; /* Read the result */ err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0, sizeof(float), &sum, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } /* Check result */ actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1); if(fabs(sum - actual_sum) > 0.01*fabs(sum)) printf("Check failed.\n"); else printf("Check passed.\n"); printf("Total time = %lu\n", total_time); /* Deallocate resources */ clReleaseEvent(start_event); clReleaseEvent(end_event); clReleaseMemObject(sum_buffer); clReleaseMemObject(data_buffer); clReleaseKernel(vector_kernel); clReleaseKernel(complete_kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
// ------------------------------------------------------------------- main --- int main( int argc, char **argv ) { glutInit( &argc, argv ); glutInitWindowSize( 512, 512 ); glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH ); glutCreateWindow( "Freetype OpenGL" ); glutReshapeFunc( reshape ); glutDisplayFunc( display ); glutKeyboardFunc( keyboard ); size_t i; vec4 black = {{0.0, 0.0, 0.0, 1.0}}; vec4 white = {{1.0, 1.0, 1.0, 1.0}}; vec4 none = {{1.0, 1.0, 1.0, 0.0}}; markup_t markup = { .family = "Bitstream Vera Sans", .size = 15.0, .bold = 0, .italic = 0, .rise = 0.0, .spacing = 0.0, .gamma = 1.5, .foreground_color = white, .background_color = none, .underline = 0, .underline_color = white, .overline = 0, .overline_color = white, .strikethrough = 0, .strikethrough_color = white, .font = 0, }; atlas = texture_atlas_new( 512, 512, 3 ); buffer = vertex_buffer_new( "v3f:t2f:c4f:1g1f:2g1f" ); markup.font = texture_font_new( atlas, "./Vera.ttf", markup.size ); vec2 pen; pen.y = 512.0 - markup.font->ascender - 5; for( i=0; i < 14; ++i ) { pen.x = 25.0; markup.gamma = 0.75 + 1.5*i*(1.0/14); add_text( buffer, &pen, &markup, L"The quick brown fox jumps over the lazy dog. ", &markup, L"0123456789.", NULL); pen.y -= markup.font->height; } markup.foreground_color = black; pen.y = 256.0 - markup.font->ascender - 5; for( i=0; i < 14; ++i ) { pen.x = 25.0; markup.gamma = 0.75 + 1.5*i*(1.0/14); add_text( buffer, &pen, &markup, L"The quick brown fox jumps over the lazy dog. ", &markup, L"0123456789.", NULL); pen.y -= markup.font->height; } // Create the GLSL program char * vertex_shader_source = read_shader("./markup.vert"); char * fragment_shader_source = read_shader("./markup.frag"); program = build_program( vertex_shader_source, fragment_shader_source ); texture_location = glGetUniformLocation(program, "texture"); pixel_location = glGetUniformLocation(program, "pixel"); glutMainLoop( ); return 0; }
int main() { /* Host/device data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int err; /* Data and buffers */ float mod_input[2] = {317.0f, 23.0f}; float mod_output[2]; float round_input[4] = {-6.5f, -3.5f, 3.5f, 6.5f}; float round_output[20]; cl_mem mod_input_buffer, mod_output_buffer, round_input_buffer, round_output_buffer; /* Create a context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create a kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { perror("Couldn't create a kernel"); exit(1); }; /* Create buffers to hold input/output data */ mod_input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mod_input), mod_input, &err); if(err < 0) { perror("Couldn't create a buffer"); exit(1); }; mod_output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(mod_output), NULL, NULL); round_input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(round_input), round_input, NULL); round_output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(round_output), NULL, NULL); /* Create kernel argument */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mod_input_buffer); if(err < 0) { perror("Couldn't set a kernel argument"); exit(1); }; clSetKernelArg(kernel, 1, sizeof(cl_mem), &mod_output_buffer); clSetKernelArg(kernel, 2, sizeof(cl_mem), &round_input_buffer); clSetKernelArg(kernel, 3, sizeof(cl_mem), &round_output_buffer); /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ err = clEnqueueTask(queue, kernel, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read the results */ err = clEnqueueReadBuffer(queue, mod_output_buffer, CL_TRUE, 0, sizeof(mod_output), &mod_output, 0, NULL, NULL); if(err < 0) { perror("Couldn't read the buffer"); exit(1); } clEnqueueReadBuffer(queue, round_output_buffer, CL_TRUE, 0, sizeof(round_output), &round_output, 0, NULL, NULL); /* Display data */ printf("fmod(%.1f, %.1f) = %.1f\n", mod_input[0], mod_input[1], mod_output[0]); printf("remainder(%.1f, %.1f) = %.1f\n\n", mod_input[0], mod_input[1], mod_output[1]); printf("Rounding input: %.1f %.1f %.1f %.1f\n", round_input[0], round_input[1], round_input[2], round_input[3]); printf("rint: %.1f, %.1f, %.1f, %.1f\n", round_output[0], round_output[1], round_output[2], round_output[3]); printf("round: %.1f, %.1f, %.1f, %.1f\n", round_output[4], round_output[5], round_output[6], round_output[7]); printf("ceil: %.1f, %.1f, %.1f, %.1f\n", round_output[8], round_output[9], round_output[10], round_output[11]); printf("floor: %.1f, %.1f, %.1f, %.1f\n", round_output[12], round_output[13], round_output[14], round_output[15]); printf("trunc: %.1f, %.1f, %.1f, %.1f\n", round_output[16], round_output[17], round_output[18], round_output[19]); /* Deallocate resources */ clReleaseMemObject(mod_input_buffer); clReleaseMemObject(mod_output_buffer); clReleaseMemObject(round_input_buffer); clReleaseMemObject(round_output_buffer); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main(int argc, char **argv) { /* Host/device data structures */ cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int err; size_t global_size[2]; /* Image data */ png_bytep pixels; cl_image_format png_format; cl_mem input_image, output_image; size_t origin[3], region[3]; size_t width, height; /* Open input file and read image data */ read_image_data(INPUT_FILE, &pixels, &width, &height); /* Create a device and context */ device = create_device(); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err < 0) { perror("Couldn't create a context"); exit(1); } /* Build the program and create a kernel */ program = build_program(context, device, PROGRAM_FILE); kernel = clCreateKernel(program, KERNEL_FUNC, &err); if(err < 0) { printf("Couldn't create a kernel: %d", err); exit(1); }; /* Create image object */ png_format.image_channel_order = CL_LUMINANCE; png_format.image_channel_data_type = CL_UNORM_INT16; input_image = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &png_format, width, height, 0, (void*)pixels, &err); output_image = clCreateImage2D(context, CL_MEM_WRITE_ONLY, &png_format, width, height, 0, NULL, &err); if(err < 0) { perror("Couldn't create the image object"); exit(1); }; /* Create kernel arguments */ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_image); if(err < 0) { printf("Couldn't set a kernel argument"); exit(1); }; /* Create a command queue */ queue = clCreateCommandQueue(context, device, 0, &err); if(err < 0) { perror("Couldn't create a command queue"); exit(1); }; /* Enqueue kernel */ global_size[0] = height; global_size[1] = width; err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL); if(err < 0) { perror("Couldn't enqueue the kernel"); exit(1); } /* Read the image object */ origin[0] = 0; origin[1] = 0; origin[2] = 0; region[0] = width; region[1] = height; region[2] = 1; err = clEnqueueReadImage(queue, output_image, CL_TRUE, origin, region, 0, 0, (void*)pixels, 0, NULL, NULL); if(err < 0) { perror("Couldn't read from the image object"); exit(1); } /* Create output PNG file and write data */ write_image_data(OUTPUT_FILE, pixels, width, height); /* Deallocate resources */ free(pixels); clReleaseMemObject(input_image); clReleaseMemObject(output_image); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseProgram(program); clReleaseContext(context); return 0; }
int main(int argc, char **argv){ cl_context context = get_platform(CL_DEVICE_TYPE_GPU); cl_device_id device = 0; cl_command_queue queue = get_first_device(context, &device); char *prog_src = read_file(CL_PROGRAM("convolution.cl"), NULL); cl_program program = build_program(prog_src, context, device, NULL); free(prog_src); cl_int err = CL_SUCCESS; cl_kernel kernel = clCreateKernel(program, "convolve", &err); check_cl_err(err, "failed to create kernel"); //Setup our input signal and mask cl_uint in_signal[IN_DIM][IN_DIM] = { { 3, 1, 1, 4, 8, 2, 1, 3 }, { 4, 2, 1, 1, 2, 1, 2, 3 }, { 4, 4, 4, 4, 3, 2, 2, 2 }, { 9, 8, 3, 8, 9, 0, 0, 0 }, { 9, 3, 3, 9, 0, 0, 0, 0 }, { 0, 9, 0, 8, 0, 0, 0, 0 }, { 3, 0, 8, 8, 9, 4, 4, 4 }, { 5, 9, 8, 1, 8, 1, 1, 1 } }; cl_uint mask[MASK_DIM][MASK_DIM] = { { 1, 1, 1 }, { 1, 0, 1 }, { 1, 1, 1 } }; //0 is input, 1 is mask, 2 is output cl_mem mem_objs[3]; mem_objs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint) * IN_DIM * IN_DIM, in_signal, &err); mem_objs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint) * MASK_DIM * MASK_DIM, mask, &err); mem_objs[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * OUT_DIM * OUT_DIM, NULL, &err); check_cl_err(err, "failed to create buffers"); for (int i = 0; i < 3; ++i){ err = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem_objs[i]); check_cl_err(err, "failed to set kernel argument"); } size_t in_dim = IN_DIM, mask_dim = MASK_DIM; err = clSetKernelArg(kernel, 3, sizeof(unsigned), &in_dim); err = clSetKernelArg(kernel, 4, sizeof(unsigned), &mask_dim); check_cl_err(err, "failed to set kernel argument"); size_t global_size[2] = { OUT_DIM, OUT_DIM }; size_t local_size[2] = { 2, 2 }; err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL); check_cl_err(err, "failed to enqueue ND range kernel"); cl_uint* out = clEnqueueMapBuffer(queue, mem_objs[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint) * OUT_DIM * OUT_DIM, 0, NULL, NULL, &err); check_cl_err(err, "failed to map result"); printf("Result:\n"); for (int i = 0; i < OUT_DIM; ++i){ for (int j = 0; j < OUT_DIM; ++j){ printf("%d ", out[i * OUT_DIM + j]); } printf("\n"); } printf("\n"); clEnqueueUnmapMemObject(queue, mem_objs[2], out, 0, 0, NULL); for (int i = 0; i < 3; ++i){ clReleaseMemObject(mem_objs[i]); } clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }
inline program build_program(unit_navigator const & un, clang::SourceManager const & sm, std::string const & static_prefix) { return build_program(un, sm, default_build_visitor(), static_prefix); }
int main (void) { float *sum; cl_kernel kernel; cl_mem sum_buffer; cl_context context; cl_program program; cl_uint devices_num; char *program_source; cl_device_id device_id; cl_platform_id platform_id; cl_command_queue command_queue; sum = (float *) calloc (NUM_STEPS, sizeof (float)); program_source = (char *) calloc (1000, sizeof (char)); program_source = readKernel (); /* number of platforms on the system */ platforms_number (); /* id of the first platform proposed by the system */ platform_id = get_platform (); /* number of devices on the platform specified by platform_id */ devices_num = devices_number (platform_id); /* id of the first device proposed by the system on the platform specified by platform_id */ device_id = create_device (platform_id); /* create a context to stablish a communication channel between the host process and the device */ context = create_context (device_id); /* create a program providing the source code */ program = create_program (context, program_source); /* compile the program for the specific device architecture */ build_program (program, device_id);\ /* create a kernel given the program */ kernel = create_kernel (program); /* create a memory object, in this case this will be float number that will contain the values of the partial sums */ sum_buffer = create_buffer (context, "sum_buffer", NUM_STEPS); /* assign this buffer as the only kernel argument */ set_kernel_argument (kernel, sum_buffer, 0, "sum_buffer"); /* create a command queue, here we can enqueue tasks for the device specified by device_id */ command_queue = create_command_queue (context, device_id); /* enqueue a task to execute the kernel on the device */ enqueue_kernel_execution (command_queue, kernel, NUM_STEPS); /* copy the content of the buffer from the global memory of the device to the host memory */ enqueue_read_buffer_task (command_queue, sum_buffer, NUM_STEPS, sum, "sum"); printf (ANSI_COLOR_CYAN "\nAproximación de PI: %.10lf\n\n" ANSI_COLOR_RESET, sum[0] / NUM_STEPS); return 0; }