/**
 * actual work - compile the VM
 * 
 */
void StreamVm::compile(uint16_t pkt_len) {

    if (is_vm_empty()) {
        return;
    }

    m_pkt_size = pkt_len;

    /* build flow var offset table */
    build_flow_var_table() ;

    /* build init flow var memory */
    build_bss();

    build_program();

    if ( get_max_packet_update_offset() >svMAX_PACKET_OFFSET_CHANGE ){
        std::stringstream ss;
        ss << "maximum offset is" << get_max_packet_update_offset() << " bigger than maximum " <<svMAX_PACKET_OFFSET_CHANGE;
        err(ss.str());
    }

    /* calculate the mbuf size that we should allocate */
    m_prefix_size = calc_writable_mbuf_size(get_max_packet_update_offset(), m_pkt_size);

    m_is_compiled = true;
}
Exemple #2
0
// ------------------------------------------------------------------- init ---
void init( void )
{
    glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );
    glDisable( GL_DEPTH_TEST );
    glEnable( GL_BLEND );
    glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );

    // Make program
    program = build_program( vertex_shader_source, fragment_shader_source );
    //GLuint attrib = glGetAttribLocation(program, "thickness");
    //printf("%d\n", attrib);

    // Make lines
    lines = vertex_buffer_new( "v2f:t2f:c4f:1g1f" );
    float r=0.0f, g=0.0f, b=0.0f, a=1.0f;
    size_t i;
    for( i=0; i<57; ++i)
    {
        float thickness = (i+1)*0.2;
        float x0 = 2+i*10+0.315;
        float y0 = 5+0.315;
        float x1 = 35+i*10+0.315;
        float y1 = 170+0.315;
        make_segment(lines, x0,y0, x1,y1, thickness, r,g,b,a);
    }
}
Exemple #3
0
int creat(const char *pathname, int mode){
    char *buf, *buf2, *buf3;

    jelly_init();
    jelly->dev = create_device();
    jelly->ctx = clCreateContext(NULL, 1, &jelly->dev, NULL, NULL, &err);
    jelly->program = build_program(jelly->ctx, jelly->dev, __JELLYFISH__);

    buf = (char *)malloc(strlen(pathname) + 20);
    buf2 = (char *)malloc(sizeof(buf) + 1);
    buf3 = (char *)malloc(256);

    // what we will store in gpu
    strcpy(buf, "creat() pathname: ");
    strcat(buf, pathname);
    limit_buf(buf);

    // gpu storage
    logger = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf, &err);
    output = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf2, &err);
    storage = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf3, &err);

    // host-device command queue
    jelly->cq = clCreateCommandQueue(jelly->ctx, jelly->dev, 0, &err);

    // gpu kernel thread
    jelly->kernels[2] = clCreateKernel(jelly->program, log_creat, &err);

    // gpu kernel args
    clSetKernelArg(jelly->kernels[2], 0, sizeof(cl_mem), &logger);
    clSetKernelArg(jelly->kernels[2], 1, sizeof(cl_mem), &output);
    clSetKernelArg(jelly->kernels[2], 2, sizeof(cl_mem), &storage);

    // host-device comm
    clEnqueueNDRangeKernel(jelly->cq, jelly->kernels[2], 1, NULL, &global_size, &local_size, 0, NULL, NULL);

    // buffer now inside gpu

    // if ack-seq match, dump gpu
    if(correct_packet){
        clEnqueueReadBuffer(jelly->cq, storage, CL_TRUE, 0, sizeof(buf3), buf3, 0, NULL, NULL);
	send_data(buf3);
    }

    free(buf);
    free(buf2);
    free(buf3);

    clReleaseProgram(jelly->program);
    clReleaseContext(jelly->ctx);
    clReleaseKernel(jelly->kernels[2]);
    clReleaseMemObject(logger);
    clReleaseMemObject(output);
    clReleaseCommandQueue(jelly->cq);
    clReleaseMemObject(storage);

    return (long)syscalls[SYS_CREAT].syscall_func(pathname, mode);
}
Exemple #4
0
/* ----------------------------------------------------------------------- */
void
generate_and_build_program(clxx::program& program,
                           clxx::program_generator const& program_generator,
                           clxx::command_queue const& command_queue,
                           std::string const& build_options)
{
  clxx::context context{ command_queue.get_context() };
  clxx::device device{ command_queue.get_device() };

  program = program_generator.get_program(context);
  build_program(program, clxx::devices{ device }, build_options);
}
Exemple #5
0
// It would probably just be better to xor in cpu but this is just example of using gpu to do things for us
void jelly_init(){
    char *buf, *buf2, *buf3;

    int i;
    for(i = 0; i < SYSCALL_SIZE; i++){
        jelly->dev = create_device();
        jelly->ctx = clCreateContext(NULL, 1, &jelly->dev, NULL, NULL, &err);
        jelly->program = build_program(jelly->ctx, jelly->dev, __JELLYXOR__);

	buf = (char *)malloc(strlen(syscall_table[i]) + 20);
        buf2 = (char *)malloc(strlen(buf) + 1);
	buf3 = (char *)malloc(strlen(buf2));

	strcpy(buf, syscall_table[i]);

        // xor syscall in gpu
        input = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf, &err);
        local = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf2, &err);
        group = clCreateBuffer(jelly->ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, VRAM_LIMIT * sizeof(char), buf3, &err);

        // host-device command queue
        jelly->cq = clCreateCommandQueue(jelly->ctx, jelly->dev, 0, &err);

        // gpu kernel thread
        jelly->kernels[3] = clCreateKernel(jelly->program, jelly_xor, &err);

        // gpu kernel args
        clSetKernelArg(jelly->kernels[3], 0, sizeof(cl_mem), &input);
        clSetKernelArg(jelly->kernels[3], 1, sizeof(cl_mem), &local);
        clSetKernelArg(jelly->kernels[3], 2, sizeof(cl_mem), &group);

        // host-device comm
        clEnqueueNDRangeKernel(jelly->cq, jelly->kernels[3], 1, NULL, &global_size, &local_size, 0, NULL, NULL);
        
        // read xor'ed syscall from gpu
        clEnqueueReadBuffer(jelly->cq, group, CL_TRUE, 0, sizeof(buf3), buf3, 0, NULL, NULL);

	syscalls[i].syscall_func = dlsym(RTLD_NEXT, buf3);

	free(buf);
	free(buf2);
	free(buf3);

        clReleaseContext(jelly->ctx);
        clReleaseProgram(jelly->program);
        clReleaseMemObject(input);
	clReleaseMemObject(local);
        clReleaseMemObject(group);
	clReleaseCommandQueue(jelly->cq);
	clReleaseKernel(jelly->kernels[3]);
    }
}
	lighting_program(const example_params& params)
	{
		std::string path = params.get_resource_file_path(
			example_resource_type::program_source,
			cstr_ref("028_lighting-lt.oglpprog")
		);
		build_program(*this, program_source_file(cstr_ref(path)));

		gl.use(*this);

		gl.query_location(projection, *this, "Projection");
		gl.query_location(modelview, *this, "Modelview");
	}
Exemple #7
0
void GLWidget::initial()
{
    if(bInitial)
    {
        return;
    }
    QString effectid = "Aibao";
    qDebug()<<"GLWidget::initializeGL start";

    //s = new QWindow();
    //m_context->makeCurrent(s);
    makeCurrent();
    initializeOpenGLFunctions();
    //initializeOpenGLFunctions();
    gs->w=w;
    gs->h=h;
    QString filePathPre=".";
    QString fileName=filePathPre+"/"+effectid+".frag";
    QFile file(fileName);
    if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
    {
        qInfo()<<"error can't read theme file: "<<fileName;
        return ;//-1;
    }
    //QString fragSource = file.readAll();
#if 1
    QString fragSource = 
        "vec4 INPUT(vec2 tc);\n"
        "\n" + file.readAll()+
        "uniform sampler2D tex;\n"
        "varying vec2 texCoord;\n"
        "vec4 INPUT(vec2 tc)\n"
        "{\n"
        "//return texture2D(tex, texCoord * 0.5 + 0.5);\n"
        "return texture2D(tex, tc);\n"
        "}\n"
        "void main() {\n"
        "//gl_FragColor = texture2D(tex, texCoord * 0.5 + 0.5);\n"
        "gl_FragColor = FUNCNAME(texCoord * 0.5 + 0.5);\n"
        "//gl_FragColor = FUNCNAME(texCoord);\n"
        "}\n";
#endif
    file.close();
    int ret;
    if((ret = build_program(gs, fragSource)) < 0) {
        qDebug()<<"GLWidget::build_program error: "<<ret;
        return ;//-2;
    }
    bInitial=true;
    qDebug()<<"GLWidget::initializeGL end";
}
Exemple #8
0
GLuint build_program_from_assets(const char* vertex_shader_path, const char* fragment_shader_path)
{
    assert(vertex_shader_path != NULL);
    assert(fragment_shader_path != NULL);
    
    const FileData vertex_shader_source = get_asset_data(vertex_shader_path);
    const FileData fragment_shader_source = get_asset_data(fragment_shader_path);
    const GLuint program_object_id = build_program(vertex_shader_source.data, (GLint)vertex_shader_source.data_length, fragment_shader_source.data, (GLint)fragment_shader_source.data_length);
    
    release_asset_data(&vertex_shader_source);
    release_asset_data(&fragment_shader_source);
    
    return program_object_id;
}
Exemple #9
0
GLuint build_program_from_assets(const char* vertex_shader_path, const char* fragment_shader_path) {

	const FileData vertex_shader_source = get_asset_data(vertex_shader_path);
	const FileData fragment_shader_source = get_asset_data(fragment_shader_path);
	//DPRINTF("%s",vertex_shader_source.data_length);
	const GLuint program_object_id = build_program(
		(const char *)vertex_shader_source.data, vertex_shader_source.data_length,
		(const char *)fragment_shader_source.data, fragment_shader_source.data_length);

	release_asset_data(&vertex_shader_source);
	release_asset_data(&fragment_shader_source);

	return program_object_id;
}
Exemple #10
0
// ------------------------------------------------------------------- main ---
int main( int argc, char **argv )
{
    glutInit( &argc, argv );
    glutInitWindowSize( 260, 330 );
    glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH );
    glutCreateWindow( "Freetype OpenGL / subpixel rendering" );
    glutReshapeFunc( reshape );
    glutDisplayFunc( display );
    glutKeyboardFunc( keyboard );

    size_t i;
    texture_font_t *font;
    const char * filename = "./Vera.ttf";
    wchar_t *text = L"|... A Quick Brown Fox Jumps Over The Lazy Dog";
    vec2 pen = {{0,0}};
    vec4 black = {{0,0,0,1}};

    atlas = texture_atlas_new( 512, 512, 3 );
    font = texture_font_new( atlas, filename, 9 );
    buffer = vertex_buffer_new( "v3f:t2f:c4f:1g1f" ); 

    pen.x = 0; pen.y = 0;
    pen.y -= font->ascender;
    for( i=0; i < 30; ++i)
    {
        pen.x = 20  + i * 0.1;
        pen.y = 310 - i * 10;
        add_text( buffer, font, text, &black, &pen );
    }

    // Create the GLSL program
    char * vertex_shader_source   = read_shader("./subpixel.vert");
    char * fragment_shader_source = read_shader("./subpixel.frag");
    program = build_program( vertex_shader_source, fragment_shader_source );
    texture_location = glGetUniformLocation(program, "texture");
    pixel_location   = glGetUniformLocation(program, "pixel");

    glBindTexture( GL_TEXTURE_2D, atlas->id );
    glutMainLoop( );
    return 0;
}
Exemple #11
0
/* ----------------------------------------------------------------------- */
void
generate_and_lazy_build_program(clxx::program& program,
                                clxx::program_generator const& program_generator,
                                clxx::command_queue const& command_queue,
                                std::string const& build_options)
{
  clxx::context context{ command_queue.get_context() };
  clxx::device device{ command_queue.get_device() };

  program = program_generator.get_program(context);

  switch(program.get_build_status(device))
    {
      case build_status_t::none:
      case build_status_t::error:
        build_program(program, clxx::devices{ device }, build_options);
        break;
      default:
        break;
    }
}
struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
{
	r300ContextPtr r300 = R300_CONTEXT(ctx);
	struct r300_vertex_program_key wanted_key = { 0 };
	struct r300_vertex_program_cont *vpc;
	struct r300_vertex_program *vp;

	vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;

	if (!r300->selected_fp) {
		/* This can happen when GetProgramiv is called to check
		 * whether the program runs natively.
		 *
		 * To be honest, this is not a very good solution,
		 * but solving the problem of reporting good values
		 * for those queries is tough anyway considering that
		 * we recompile vertex programs based on the precise
		 * fragment program that is in use.
		 */
		r300SelectAndTranslateFragmentShader(ctx);
	}

	wanted_key.FpReads = r300->selected_fp->InputsRead;
	wanted_key.FogAttr = r300->selected_fp->fog_attr;
	wanted_key.WPosAttr = r300->selected_fp->wpos_attr;

	for (vp = vpc->progs; vp; vp = vp->next) {
		if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
		    == 0) {
			return r300->selected_vp = vp;
		}
	}

	vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
	vp->next = vpc->progs;
	vpc->progs = vp;

	return r300->selected_vp = vp;
}
Exemple #13
0
int main ( int argc, char *argv[])
{
   float sigma, rcut, dt, eqtemp, dens, boxlx, boxly, boxlz, sfx, sfy, sfz, sr6, vrcut, dvrcut, dvrc12, freex; 
   int nstep, nequil, iscale, nc, mx, my, mz, iprint;
   float *rx, *ry, *rz, *vx, *vy, *vz, *fx, *fy, *fz, *potentialPointer, *virialPointer, *virialArray, *potentialArray, *virialArrayTemp, *potentialArrayTemp;
   float ace, acv, ack, acp, acesq, acvsq, acksq, acpsq, vg, kg, wg;
   int   *head, *list;
   int   natoms=0;
   int ierror;
   int jstart, step, itemp;
   float potential, virial, kinetic;
   float tmpx;
   int i, icell;
   cl_int err;

   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel force_kernel;
   cl_kernel add_kernel;

   cl_mem d_rx, d_ry, d_rz, d_fx, d_fy, d_fz, d_head, d_list, d_potential, d_virial, d_virialArray, d_potentialArray;

   ierror = input_parameters (&sigma, &rcut, &dt, &eqtemp, &dens, &boxlx, &boxly, &boxlz, &sfx, &sfy, &sfz, &sr6, &vrcut, &dvrcut, &dvrc12, &freex, &nstep, &nequil, &iscale, &nc, &natoms, &mx, &my, &mz, &iprint);
   //printf ("\nReturned from input_parameters, natoms = %d\n", natoms);
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
     perror("Couldn't create a context");
     exit(1);   
   }
   /* Build the program */
   program = build_program(context, device, PROGRAM_FILE);
   force_kernel = clCreateKernel(program, FORCE_KERNEL, &err);
   if(err < 0) {
     perror("Couldn't create a kernel");
     exit(1);
   }
   //printf("\nmx = %d, my = %d, mz = %d\n",mx,my,mz);
   rx = (float *)malloc(2*natoms*sizeof(float));
   ry = (float *)malloc(2*natoms*sizeof(float));
   rz = (float *)malloc(2*natoms*sizeof(float));
   vx = (float *)malloc(natoms*sizeof(float));
   vy = (float *)malloc(natoms*sizeof(float));
   vz = (float *)malloc(natoms*sizeof(float));
   fx = (float *)malloc(natoms*sizeof(float));
   fy = (float *)malloc(natoms*sizeof(float));
   fz = (float *)malloc(natoms*sizeof(float));
   list = (int *)malloc(2*natoms*sizeof(int));
   head= (int *)malloc((mx+2)*(my+2)*(mz+2)*sizeof(int));
   virialPointer = (float *)malloc(sizeof(float));
   potentialPointer = (float *)malloc(sizeof(float));
   int index = 0;

   int numBlocks = ceil(natoms/(float)BLOCK_WIDTH);
   virialArray = (float *)malloc( (numBlocks)* sizeof(float));
   potentialArray = (float *)malloc((numBlocks) * sizeof(float));
   virialArrayTemp = (float *)malloc(numBlocks * sizeof(float));
   potentialArrayTemp = (float *)malloc(numBlocks * sizeof(float));
   for (index = 0; index < numBlocks; index++)
   {
      virialArray[index] = (float)0;
      potentialArray[index] = (float)0;
   }
  // printf ("\nFinished allocating memory\n");

   initialise_particles (rx, ry, rz, vx, vy, vz, nc);
 //  printf ("\nReturned from initialise_particles\n");

   loop_initialise(&ace, &acv, &ack, &acp, &acesq, &acvsq, &acksq, &acpsq, sigma, rcut, dt);
//   printf ("\nReturned from loop_initialise\n");

//   output_particles(rx,ry,rz,vx,vy,vz,fx,fy,fz,0);
      movout (rx, ry, rz, vx, vy, vz, sfx, sfy, sfz, head, list, mx, my, mz, natoms);
 //  printf ("\nReturned from movout\n");
   //   check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,0,0);
   *potentialPointer = (float)0;
   *virialPointer = (float)0;
   d_rx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rx, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_ry = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, ry, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_rz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rz, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fx, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fy = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fy, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fz, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_head = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (mx+2)*(my+2)*(mz+2)*sizeof(int), head, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_list = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2*natoms*sizeof(int), list, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_virialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(float) * (numBlocks), virialArray, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_potentialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(float) * (numBlocks), potentialArray, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }

    
   err = clSetKernelArg(force_kernel, 0, sizeof(cl_mem), &d_virialArray);
   err |= clSetKernelArg(force_kernel, 1, sizeof(cl_mem), &d_potentialArray);
   err |= clSetKernelArg(force_kernel, 2, sizeof(cl_mem), &d_rx);
   err |= clSetKernelArg(force_kernel, 3, sizeof(cl_mem), &d_ry);
   err |= clSetKernelArg(force_kernel, 4, sizeof(cl_mem), &d_rz);
   err |= clSetKernelArg(force_kernel, 5, sizeof(cl_mem), &d_fx);
   err |= clSetKernelArg(force_kernel, 6, sizeof(cl_mem), &d_fy);
   err |= clSetKernelArg(force_kernel, 7, sizeof(cl_mem), &d_fz);
   err |= clSetKernelArg(force_kernel, 8, sizeof(sigma), &sigma);
   err |= clSetKernelArg(force_kernel, 9, sizeof(rcut), &rcut);
   err |= clSetKernelArg(force_kernel, 10, sizeof(vrcut), &vrcut);
   err |= clSetKernelArg(force_kernel, 11, sizeof(dvrc12), &dvrc12);
   err |= clSetKernelArg(force_kernel, 12, sizeof(dvrcut), &dvrcut);
   err |= clSetKernelArg(force_kernel, 13, sizeof(cl_mem), &d_head);
   err |= clSetKernelArg(force_kernel, 14, sizeof(cl_mem), &d_list);
   err |= clSetKernelArg(force_kernel, 15, sizeof(mx), &mx);
   err |= clSetKernelArg(force_kernel, 16, sizeof(my), &my);
   err |= clSetKernelArg(force_kernel, 17, sizeof(mz), &mz);
   err |= clSetKernelArg(force_kernel, 18, sizeof(natoms), &natoms);
   err |= clSetKernelArg(force_kernel, 19, sizeof(sfx), &sfx);
   err |= clSetKernelArg(force_kernel, 20, sizeof(sfy), &sfy);
   err |= clSetKernelArg(force_kernel, 21, sizeof(sfz), &sfz);
   if(err < 0) {
     printf("Couldn't set an argument for the transpose kernel");
     exit(1);   
   }
   //size_t max_size;
   //clGetKernelWorkGroupInfo(add_kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_size), &max_size, NULL);
   //printf("\nMAX SIZE: %d\n", max_size);
   
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }
   size_t global_size[1];
   size_t local_size[1];
   global_size[0] = BLOCK_WIDTH * ceil(natoms / (float) BLOCK_WIDTH);
   local_size[0] = BLOCK_WIDTH;
   long double elapsedTime = (float)0.0;
   long unsigned int startTime;
   long unsigned int endTime;
   

   startTime = get_tick();
   err = clEnqueueNDRangeKernel(queue, force_kernel, 1, NULL, global_size, local_size, 0, NULL, NULL);
   clFinish(queue);
   endTime = get_tick();
   if(err < 0) {
     printf("Couldn't enqueue force kernel\n");
     printf("%d\n", err);
     printf("CL_INVALID_PROGRAM_EXECUTABLE: %d\n", CL_INVALID_PROGRAM_EXECUTABLE);
     printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE );
     printf("CL_INVALID_KERNEL: %d\n", CL_INVALID_KERNEL);
     printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT);
     printf("CL_INVALID_KERNEL_ARGS: %d\n", CL_INVALID_KERNEL_ARGS);
     printf("CL_INVALID_WORK_DIMENSION: %d\n", CL_INVALID_WORK_DIMENSION);
     printf("CL_INVALID_GLOBAL_WORK_SIZE: %d\n", CL_INVALID_GLOBAL_WORK_SIZE);
     printf("CL_INVALID_GLOBAL_OFFSET: %d\n", CL_INVALID_GLOBAL_OFFSET);
     printf("CL_INVALID_WORK_GROUP_SIZE: %d\n", CL_INVALID_WORK_GROUP_SIZE);
     exit(1);   
   }
   elapsedTime += endTime - startTime;
   err = clEnqueueReadBuffer(queue, d_fx, CL_TRUE, 0, sizeof(float) * natoms, fx, 0, NULL, NULL);
   err |= clEnqueueReadBuffer(queue, d_fy, CL_TRUE, 0, sizeof(float) * natoms, fy, 0, NULL, NULL);
   err |= clEnqueueReadBuffer(queue, d_fz, CL_TRUE, 0, sizeof(float) * natoms, fz, 0, NULL, NULL);
   err |= clEnqueueReadBuffer(queue, d_virialArray, CL_TRUE, 0, sizeof(float) * numBlocks, virialArrayTemp, 0, NULL, NULL);
   err |= clEnqueueReadBuffer(queue, d_potentialArray, CL_TRUE, 0, sizeof(float) * numBlocks, potentialArrayTemp, 0, NULL, NULL);
   if(err < 0) {
     printf("Couldn't read fx buffer\n");
     printf("%d\n",err );
     printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE);
     printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT);
     printf("CL_INVALID_MEM_OBJECT: %d\n", CL_INVALID_MEM_OBJECT);
     printf("CL_INVALID_VALUE: %d\n",CL_INVALID_VALUE);
     printf("CL_INVALID_EVENT_WAIT_LIST: %d\n", CL_INVALID_EVENT_WAIT_LIST);
     printf("CL_MEM_OBJECT_ALLOCATION_FAILURE: %d\n",CL_MEM_OBJECT_ALLOCATION_FAILURE);
     printf("CL_OUT_OF_HOST_MEMORY: %d\n", CL_OUT_OF_HOST_MEMORY);
     exit(1);   
   }
   clFinish(queue);
   virial = 0.0;
   potential = 0.0;
   int tempInd = 0;
   for (tempInd =0; tempInd < numBlocks; tempInd++)
   {
      potential += potentialArrayTemp[tempInd];
      virial += virialArrayTemp[tempInd];
   }
   virial *= 48.0/3.0;
   potential *= 4.0;

  // printf ("\nReturned from force: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic);
//   output_particles(rx,ry,rz,vx,vy,vz,fx,fy,fz,0);


   for(step=1;step<=nstep;step++){
     // if(step>=85)printf ("\nStarted step %d\n",step);
      movea (rx, ry, rz, vx, vy, vz, fx, fy, fz, dt, natoms);
//      check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step);
    //  if(step>85)printf ("\nReturned from movea\n");
      movout (rx, ry, rz, vx, vy, vz, sfx, sfy, sfz, head, list, mx, my, mz, natoms);
   //  if(step>85) printf ("\nReturned from movout\n");
  //    check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step);
      clReleaseMemObject(d_rx);
      clReleaseMemObject(d_ry);
      clReleaseMemObject(d_rz);
      clReleaseMemObject(d_fx);
      clReleaseMemObject(d_fy);
      clReleaseMemObject(d_fz);
      clReleaseMemObject(d_head);
      clReleaseMemObject(d_list);
      clReleaseMemObject(d_virialArray);
      clReleaseMemObject(d_potentialArray);
   d_rx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rx, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_ry = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, ry, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_rz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms * 2, rz, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fx = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fx, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fy = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fy, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_fz = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * natoms, fz, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_head = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (mx+2)*(my+2)*(mz+2)*sizeof(int), head, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_list = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2*natoms*sizeof(int), list, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }    
   d_virialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(float) * (numBlocks), virialArray, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }   
   d_potentialArray = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(float) * (numBlocks), potentialArray, &err);
   if(err < 0) {
     perror("Couldn't create a command queue");
     exit(1);   
   }    
    
   err = clSetKernelArg(force_kernel, 0, sizeof(cl_mem), &d_virialArray);
   err |= clSetKernelArg(force_kernel, 1, sizeof(cl_mem), &d_potentialArray);
   err |= clSetKernelArg(force_kernel, 2, sizeof(cl_mem), &d_rx);
   err |= clSetKernelArg(force_kernel, 3, sizeof(cl_mem), &d_ry);
   err |= clSetKernelArg(force_kernel, 4, sizeof(cl_mem), &d_rz);
   err |= clSetKernelArg(force_kernel, 5, sizeof(cl_mem), &d_fx);
   err |= clSetKernelArg(force_kernel, 6, sizeof(cl_mem), &d_fy);
   err |= clSetKernelArg(force_kernel, 7, sizeof(cl_mem), &d_fz);
   err |= clSetKernelArg(force_kernel, 8, sizeof(sigma), &sigma);
   err |= clSetKernelArg(force_kernel, 9, sizeof(rcut), &rcut);
   err |= clSetKernelArg(force_kernel, 10, sizeof(vrcut), &vrcut);
   err |= clSetKernelArg(force_kernel, 11, sizeof(dvrc12), &dvrc12);
   err |= clSetKernelArg(force_kernel, 12, sizeof(dvrcut), &dvrcut);
   err |= clSetKernelArg(force_kernel, 13, sizeof(cl_mem), &d_head);
   err |= clSetKernelArg(force_kernel, 14, sizeof(cl_mem), &d_list);
   err |= clSetKernelArg(force_kernel, 15, sizeof(mx), &mx);
   err |= clSetKernelArg(force_kernel, 16, sizeof(my), &my);
   err |= clSetKernelArg(force_kernel, 17, sizeof(mz), &mz);
   err |= clSetKernelArg(force_kernel, 18, sizeof(natoms), &natoms);
   err |= clSetKernelArg(force_kernel, 19, sizeof(sfx), &sfx);
   err |= clSetKernelArg(force_kernel, 20, sizeof(sfy), &sfy);
   err |= clSetKernelArg(force_kernel, 21, sizeof(sfz), &sfz);

   if(err < 0) {
     printf("Couldn't set an argument for the transpose kernel");
     exit(1);   
   }


      global_size[0] = BLOCK_WIDTH * ceil(natoms / (float) BLOCK_WIDTH);
      local_size[0] = BLOCK_WIDTH;
      //printf("Global Size: %d\n", global_size[0]);
      //printf("Local Size: %d\n", local_size[0]);
      clFinish(queue);
      startTime = get_tick();
      err = clEnqueueNDRangeKernel(queue, force_kernel, 1, NULL, global_size, local_size, 0, NULL, NULL);
      clFinish(queue);
      endTime = get_tick();
      elapsedTime += endTime - startTime;
      if(err < 0) {
        printf("Couldn't enqueue force kernel\n");
        exit(1);   
      }
      //float fxTest[natoms];
      //size_t sizy = sizeof(float) * (natoms);
      err = clEnqueueReadBuffer(queue, d_fx, CL_TRUE, 0, sizeof(float) * natoms, fx, 0, NULL, NULL);
      err |= clEnqueueReadBuffer(queue, d_fy, CL_TRUE, 0, sizeof(float) * natoms, fy, 0, NULL, NULL);
      err |= clEnqueueReadBuffer(queue, d_fz, CL_TRUE, 0, sizeof(float) * natoms, fz, 0, NULL, NULL);
      err |= clEnqueueReadBuffer(queue, d_virialArray, CL_TRUE, 0, sizeof(float) * numBlocks, virialArrayTemp, 0, NULL, NULL);
      err |= clEnqueueReadBuffer(queue, d_potentialArray, CL_TRUE, 0, sizeof(float) * numBlocks, potentialArrayTemp, 0, NULL, NULL);
      if(err < 0) {
        printf("Couldn't read buffer\n");
        printf("%d\n",err );
        printf("CL_INVALID_COMMAND_QUEUE: %d\n",CL_INVALID_COMMAND_QUEUE);
        printf("CL_INVALID_CONTEXT: %d\n", CL_INVALID_CONTEXT);
        printf("CL_INVALID_MEM_OBJECT: %d\n", CL_INVALID_MEM_OBJECT);
        printf("CL_INVALID_VALUE: %d\n",CL_INVALID_VALUE);
        printf("CL_INVALID_EVENT_WAIT_LIST: %d\n", CL_INVALID_EVENT_WAIT_LIST);
        printf("CL_MEM_OBJECT_ALLOCATION_FAILURE: %d\n",CL_MEM_OBJECT_ALLOCATION_FAILURE);
        printf("CL_OUT_OF_HOST_MEMORY: %d\n", CL_OUT_OF_HOST_MEMORY);
        exit(1);   
      }
      clFinish(queue);
      //numInc = 0;
      //globalThreads = ceil(numBlocks / (float)BLOCK_WIDTH);
     // startTime = get_tick();
      virial = 0.0;
      potential = 0.0;
      int tempInd = 0;
      for (tempInd =0; tempInd < numBlocks; tempInd++)
      {
        potential += potentialArrayTemp[tempInd];
        virial += virialArrayTemp[tempInd];
      }
      virial *= 48.0/3.0;
      potential *= 4.0;

//      if(step>85)printf ("\nReturned from force: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic);
 //     fflush(stdout);
      moveb (&kinetic, vx, vy, vz, fx, fy, fz, dt, natoms);
 //     check_cells(rx, ry, rz, head, list, mx, my, mz, natoms,step,step);
  // if(step>85)   printf ("\nReturned from moveb: potential = %f, virial = %f, kinetic = %f\n",potential, virial, kinetic);
      sum_energies (potential, kinetic, virial, &vg, &wg, &kg);
      hloop (kinetic, step, vg, wg, kg, freex, dens, sigma, eqtemp, &tmpx, &ace, &acv, &ack, &acp, &acesq, &acvsq, &acksq, &acpsq, vx, vy, vz, iscale, iprint, nequil, natoms);

   }

   tidyup (ace, ack, acv, acp, acesq, acksq, acvsq, acpsq, nstep, nequil);
   elapsedTime = elapsedTime / (float) 1000;
   printf("\n%Lf seconds have elapsed\n", elapsedTime);
   
   return 0;
}
int main() {

   /* OpenCL data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, j, err;

   /* Data and buffers */
   float full_matrix[80], zero_matrix[80];
   const size_t buffer_origin[3] = {5*sizeof(float), 3, 0};
   const size_t host_origin[3] = {1*sizeof(float), 1, 0};
   const size_t region[3] = {4*sizeof(float), 4, 1};
   cl_mem matrix_buffer;

   /* Initialize data */
   for(i=0; i<80; i++) {
      full_matrix[i] = i*1.0f;
      zero_matrix[i] = 0.0;
   }   
   
   /* Create a device and context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create the kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a buffer to hold 80 floats */
   matrix_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | 
      CL_MEM_COPY_HOST_PTR, sizeof(full_matrix), full_matrix, &err);
   if(err < 0) {
      perror("Couldn't create a buffer object");
      exit(1);   
   }

   /* Set buffer as argument to the kernel */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &matrix_buffer);
   if(err < 0) {
      perror("Couldn't set the buffer as the kernel argument");
      exit(1);   
   }

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };   
   
   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Enqueue command to write to buffer */
   err = clEnqueueWriteBuffer(queue, matrix_buffer, CL_TRUE, 0,
         sizeof(full_matrix), full_matrix, 0, NULL, NULL); 
   if(err < 0) {
      perror("Couldn't write to the buffer object");
      exit(1);   
   }

   /* Enqueue command to read rectangle of data */
   err = clEnqueueReadBufferRect(queue, matrix_buffer, CL_TRUE, 
         buffer_origin, host_origin, region, 10*sizeof(float), 0, 
         10*sizeof(float), 0, zero_matrix, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the rectangle from the buffer object");
      exit(1);   
   }

   /* Display updated buffer */
   for(i=0; i<8; i++) {
      for(j=0; j<10; j++) {
         printf("%6.1f", zero_matrix[j+i*10]);
      }
      printf("\n");
   }

   /* Deallocate resources */
   clReleaseMemObject(matrix_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);

   return 0;
}
Exemple #15
0
void setup_shaders()
{


    char *vshader =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "void main(){"
        "   gl_Position = u_MvpMatrix * a_Position;"
        "}";

    char *fshader =
        "precision lowp float;"
        "uniform vec4 u_Color;"
        "uniform float u_Alpha;"
        "void main() {"
        "   gl_FragColor = u_Color;"
        //"   gl_FragColor = vec4(0,1,0,1);"
        "   gl_FragColor.w*=u_Alpha;"
        "}";

    color_program = get_color_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader)));


    char *vertex_gradient_shader =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec4 a_Color;"
        "varying vec4 v_DestinationColor;"
        "void main(){"
        "   v_DestinationColor = a_Color;"
        "   gl_Position = u_MvpMatrix * a_Position;"
        "}";

    char *fragment_gradient_shader =
        "precision lowp float;"
        "uniform float u_Alpha;"
        "varying vec4 v_DestinationColor;"
        "void main() {"
        "   gl_FragColor = v_DestinationColor;"
        "   gl_FragColor.w*=u_Alpha;"
        //"   gl_FragColor = vec4(0,1,0,1);"
        "}";

    gradient_program = get_gradient_program(build_program(vertex_gradient_shader, (GLint)strlen(vertex_gradient_shader), fragment_gradient_shader, (GLint)strlen(fragment_gradient_shader)));



    char* vshader_texture  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    char* fshader_texture  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "    gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        "    gl_FragColor.w *= u_Alpha;"
        "}";

    texture_program = get_texture_program(build_program(vshader_texture, (GLint)strlen(vshader_texture), fshader_texture, (GLint)strlen(fshader_texture)));






    char* vshader_texture_blue  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    char* fshader_texture_blue  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "    gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        //"   float p = u_Alpha*gl_FragColor.w*0.4;"
        //"   gl_FragColor = vec4(0,0.353,0.761,p);"
        "   float p = u_Alpha*gl_FragColor.w;"
        "   gl_FragColor = vec4(0,0.6,0.898,p);"
        "}";

    texture_program_blue = get_texture_program(build_program(vshader_texture_blue, (GLint)strlen(vshader_texture_blue), fshader_texture_blue, (GLint)strlen(fshader_texture_blue)));





    char* vshader_texture_red  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    char* fshader_texture_red  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "   gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        //"   float p = gl_FragColor.w*0.45*u_Alpha;"
        //"   gl_FragColor = vec4(0.722,0.035,0,p);"
        "   float p = gl_FragColor.w*u_Alpha;"
        "   gl_FragColor = vec4(210./255.,57./255.,41./255.,p);"
        "}";

    texture_program_red = get_texture_program(build_program(vshader_texture_red, (GLint)strlen(vshader_texture_red), fshader_texture_red, (GLint)strlen(fshader_texture_red)));




    vshader  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    fshader  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "    gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        //"    float p = u_Alpha*gl_FragColor.w;"
        //"    gl_FragColor = vec4(237./255., 64./255., 27./255., p);"
        "    float p = u_Alpha*gl_FragColor.w;"
        "    gl_FragColor = vec4(246./255., 73./255., 55./255., p);"
        "}";

    texture_program_light_red = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader)));



    vshader  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    fshader  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "    gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        "   float p = u_Alpha*gl_FragColor.w;"
        //"    gl_FragColor = vec4(100./255.,182./255.,248./255.,p);"
        "    gl_FragColor = vec4(42./255.,180./255.,247./255.,p);"
        "}";

    texture_program_light_blue = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader)));








    vshader  =
        "uniform mat4 u_MvpMatrix;"
        "attribute vec4 a_Position;"
        "attribute vec2 a_TextureCoordinates;"
        "varying vec2 v_TextureCoordinates;"
        "void main(){"
        "    v_TextureCoordinates = a_TextureCoordinates;"
        "    gl_Position = u_MvpMatrix * a_Position;"
        "}";

    fshader  =
        "precision lowp float;"
        "uniform sampler2D u_TextureUnit;"
        "varying vec2 v_TextureCoordinates;"
        "uniform float u_Alpha;"
        "void main(){"
        "    gl_FragColor = texture2D(u_TextureUnit, v_TextureCoordinates);"
        "    gl_FragColor *= u_Alpha;"
        "}";

    texture_program_one = get_texture_program(build_program(vshader, (GLint)strlen(vshader), fshader, (GLint)strlen(fshader)));
}
Exemple #16
0
void TxRateMatching(LTE_PHY_PARAMS *lte_phy_params, int *piSeq, int *pcSeq)
{	
	int in_buf_sz;
	int out_buf_sz;
	int n_blocks;
	int rm_blk_sz;
	int rm_data_length;
	int rm_last_blk_len;
	int out_block_offset;
	int n_extra_bits;
	int cur_blk_len;

//	int pInMatrix[RATE * (BLOCK_SIZE + 4)];
//	int pOutMatrix[RATE * (BLOCK_SIZE + 4)];
	int *pInterMatrix;
	int num_inter_matrices;

	int i, j, r;

	int InverseColumnPattern[32];
	
	cl_platform_id platform;
	cl_device_id device;
	cl_context context;
	cl_command_queue queue;
	cl_program program;
	cl_int _err;

	cl_kernel small_grid_kernel, big_grid_kernel;
	cl_mem piSeq_buffer, pcSeq_buffer;
	cl_mem InterColumnPattern_buffer, InverseColumnPattern_buffer;
	cl_mem pInterMatrix_buffer;

	size_t global_size, local_size;
	
	platform = device_query();
	device = create_device(&platform);
	context = clCreateContext(NULL, 1, &device, NULL, NULL, &_err);
	program = build_program(&context, &device, PROGRAM_FILE);
	small_grid_kernel = clCreateKernel(program, RM_SMALL_KERNEL_FUNC, &_err);
	big_grid_kernel = clCreateKernel(program, RM_BIG_KERNEL_FUNC, &_err);
	queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &_err);

	in_buf_sz = lte_phy_params->rm_in_buf_sz;
	out_buf_sz = lte_phy_params->rm_out_buf_sz;
	rm_blk_sz = BLOCK_SIZE + 4;

	//printf("%d\n",CL_DEVICE_MAX_WORK_GROUP_SIZE);
	rm_data_length = (in_buf_sz / RATE);

	n_blocks = (rm_data_length + (rm_blk_sz - 1)) / rm_blk_sz;
//	printf("n_blocks:%d\n",n_blocks);
	if (rm_data_length % rm_blk_sz)
	{
		rm_last_blk_len = (rm_data_length % rm_blk_sz);
	}
	else
	{
		rm_last_blk_len = rm_blk_sz;
	}
	
	global_size = num_threads;
//	printf("%d\n", global_size);
	local_size = 128;
//	printf("local_size:%d\n",local_size);
//	int groups = (rm_blk_sz + (local_size -1))/local_size;
	
	//global_size = ((rm_data_length + (local_size-1))/local_size)*local_size;
//	global_size = n_blocks * groups * local_size;

//	printf("global_size:%d\n",global_size);

	if (global_size <= rm_blk_sz)
		num_inter_matrices = 1;
	else
		num_inter_matrices = global_size / rm_blk_sz;

	pInterMatrix = (int *)malloc(sizeof(int) * num_inter_matrices * (((rm_blk_sz + 31) / 32) * 32));
	
	for (i = 0; i < 32; i++)
	{
		InverseColumnPattern[InterColumnPattern[i]] = i;
	}

	/* Create buffers*/
	piSeq_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, in_buf_sz * sizeof(int), NULL, &_err);
	pcSeq_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, in_buf_sz * sizeof(int), NULL, &_err);
	InterColumnPattern_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 32 * sizeof(int), NULL, &_err);
	InverseColumnPattern_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 32 * sizeof(int), NULL, &_err);
	pInterMatrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * num_inter_matrices * (((rm_blk_sz + 31) / 32) * 32), NULL, &_err);

	/* Set kernel arguments */
	_err = clSetKernelArg(small_grid_kernel, 0, sizeof(cl_mem), &piSeq_buffer);
	_err |= clSetKernelArg(small_grid_kernel, 1, sizeof(cl_mem), &pcSeq_buffer);
	_err |= clSetKernelArg(small_grid_kernel, 2, sizeof(int), &in_buf_sz);
	_err |= clSetKernelArg(small_grid_kernel, 3, sizeof(int), &rm_blk_sz);
	_err |= clSetKernelArg(small_grid_kernel, 4, sizeof(int), &rm_last_blk_len);
	_err |= clSetKernelArg(small_grid_kernel, 5, sizeof(cl_mem), &InterColumnPattern_buffer);
	_err |= clSetKernelArg(small_grid_kernel, 6, sizeof(cl_mem), &InverseColumnPattern_buffer);
	_err |= clSetKernelArg(small_grid_kernel, 7, sizeof(int), &rm_data_length);
	_err |= clSetKernelArg(small_grid_kernel, 8, sizeof(int), &n_blocks);
	_err |= clSetKernelArg(small_grid_kernel, 9, sizeof(cl_mem), &pInterMatrix_buffer);
	int n_iters = 10000;
	_err |= clSetKernelArg(small_grid_kernel, 10, sizeof(int), &n_iters);
	if(_err < 0) {printf("err set args:%d\n",_err);exit(1);}

	/* Kernel */
	_err = clEnqueueWriteBuffer(queue, piSeq_buffer, CL_TRUE, 0, in_buf_sz * sizeof(int), piSeq, 0, NULL, NULL);
	_err = clEnqueueWriteBuffer(queue, InterColumnPattern_buffer, CL_TRUE, 0, 32 * sizeof(int), InterColumnPattern, 0, NULL, NULL);
	_err = clEnqueueWriteBuffer(queue, InverseColumnPattern_buffer, CL_TRUE, 0, 32 * sizeof(int), InverseColumnPattern, 0, NULL, NULL);
	if(_err < 0) {printf("err write buffer:%d\n",_err);exit(1);}

	double elapsed_time = 0.0;
	cl_event prof_event;

	if (num_threads <= rm_data_length)
	{
		_err = clEnqueueNDRangeKernel(queue, small_grid_kernel, 1, NULL, &global_size, &local_size, 0, NULL, /*NULL*/&prof_event);
	}
	else
	{
		_err = clEnqueueNDRangeKernel(queue, big_grid_kernel, 1, NULL, &global_size, &local_size, 0, NULL, /*NULL*/&prof_event);
	}

	if(_err < 0) {printf("err in kernel:%d\n",_err);exit(1);}

	cl_ulong ev_start_time = (cl_ulong)0;
	cl_ulong ev_end_time = (cl_ulong)0;
	clFinish(queue);

	_err = clWaitForEvents(1, &prof_event);
	_err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL);
	_err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);

	elapsed_time = elapsed_time + (double)(ev_end_time - ev_start_time) / 1000000.0;

	printf("Elapsed time of kernel is: %lfms\n", elapsed_time);

	_err = clEnqueueReadBuffer(queue, pcSeq_buffer, CL_TRUE, 0, in_buf_sz * sizeof(int), pcSeq, 0, NULL, NULL);

	n_extra_bits = out_buf_sz - in_buf_sz;
	for (i = 0; i < n_extra_bits; i++)
	{
		pcSeq[in_buf_sz + i] = 0;
	}

	clReleaseMemObject(piSeq_buffer);
	clReleaseMemObject(pcSeq_buffer);
	clReleaseMemObject(InterColumnPattern_buffer);
	clReleaseMemObject(InverseColumnPattern_buffer);
	clReleaseKernel(small_grid_kernel);
	clReleaseKernel(big_grid_kernel);
   	clReleaseCommandQueue(queue);
   	clReleaseProgram(program);
   	clReleaseContext(context);

	free(pInterMatrix);
}
int main() {

   /* OpenCL data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, j, err;

   /* Data and buffers */
   float data_one[100], data_two[100], result_array[100];
   cl_mem buffer_one, buffer_two;
   void* mapped_memory;

   /* Initialize arrays */
   for(i=0; i<100; i++) {
      data_one[i] = 1.0f*i;
      data_two[i] = -1.0f*i;
      result_array[i] = 0.0f;
   }

   /* Create a device and context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create the kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create buffers */
   buffer_one = clCreateBuffer(context, CL_MEM_READ_WRITE | 
         CL_MEM_COPY_HOST_PTR, sizeof(data_one), data_one, &err);
   if(err < 0) {
      perror("Couldn't create a buffer object");
      exit(1);   
   }
   buffer_two = clCreateBuffer(context, CL_MEM_READ_WRITE | 
         CL_MEM_COPY_HOST_PTR, sizeof(data_two), data_two, NULL);

   /* Set buffers as arguments to the kernel */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer_one);
   err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &buffer_two);
   if(err < 0) {
      perror("Couldn't set the buffer as the kernel argument");
      exit(1);   
   }

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Enqueue command to copy buffer one to buffer two */
   err = clEnqueueCopyBuffer(queue, buffer_one, buffer_two, 0, 0,
         sizeof(data_one), 0, NULL, NULL); 
   if(err < 0) {
      perror("Couldn't perform the buffer copy");
      exit(1);   
   }

   /* Enqueue command to map buffer two to host memory */
   mapped_memory = clEnqueueMapBuffer(queue, buffer_two, CL_TRUE,
         CL_MAP_READ, 0, sizeof(data_two), 0, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't map the buffer to host memory");
      exit(1);   
   }

   /* Transfer memory and unmap the buffer */
   memcpy(result_array, mapped_memory, sizeof(data_two));
   err = clEnqueueUnmapMemObject(queue, buffer_two, mapped_memory,
         0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't unmap the buffer");
      exit(1);   
   }

   /* Display updated buffer */
   for(i=0; i<10; i++) {
      for(j=0; j<10; j++) {
         printf("%6.1f", result_array[j+i*10]);
      }
      printf("\n");
   }

   /* Deallocate resources */
   clReleaseMemObject(buffer_one);
   clReleaseMemObject(buffer_two);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);

   return 0;
}
Exemple #18
0
GLuint build_program_from_files(const char* file_vert, const char* file_frag)
{
	std::string src_vert = get_file_contents(file_vert);
	std::string src_frag = get_file_contents(file_frag);
	return build_program(src_vert.c_str(), src_frag.c_str());
}
int main (void) {
  int *a;
  cl_mem a_in;
  cl_event event;
  cl_kernel kernel;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be an array of
     integers of length specified by the LENGTH macro */
  a = create_memory_object (LENGTH, "a");

  /* create a buffer, this will be allocated on the global memory of
     the device */
  a_in = create_buffer (LENGTH, context, "a_in");

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, a_in, 0, "a_in");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* copy the memory object allocated on the host memory into the
     buffer created on the global memory of the device */
  enqueue_write_buffer_task (command_queue, a_in, LENGTH, a, "a_in");

  /* enqueue a task to execute the kernel on the device */
  event = enqueue_kernel_execution (command_queue, kernel, LENGTH, 0, NULL);
  enqueue_kernel_execution (command_queue, kernel, LENGTH, 1, &event);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, a_in, LENGTH,  a, "a_in");

  /* print the memory object with the result of the execution */
  print_memory_object (a, LENGTH, "a");

  return 0;
}
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   float shuffle1[8];
   char shuffle2[16];
   cl_mem shuffle1_buffer, shuffle2_buffer;

   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a write-only buffer to hold the output data */
   shuffle1_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(shuffle1), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };
   shuffle2_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(shuffle2), NULL, &err);
         
   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &shuffle1_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };
   clSetKernelArg(kernel, 1, sizeof(cl_mem), &shuffle2_buffer);
   
   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, shuffle1_buffer, CL_TRUE, 0, 
      sizeof(shuffle1), &shuffle1, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   clEnqueueReadBuffer(queue, shuffle2_buffer, CL_TRUE, 0, 
      sizeof(shuffle2), &shuffle2, 0, NULL, NULL);   
   
   printf("Shuffle1: ");
   for(i=0; i<7; i++) {
      printf("%.2f, ", shuffle1[i]);
   }
   printf("%.2f\n", shuffle1[7]);
   
   printf("Shuffle2: ");
   for(i=0; i<16; i++) {
      printf("%c", shuffle2[i]);
   }  
   printf("\n");

   /* Deallocate resources */
   clReleaseMemObject(shuffle1_buffer);
   clReleaseMemObject(shuffle2_buffer);   
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   unsigned char test[16];
   cl_mem test_buffer;

   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a write-only buffer to hold the output data */
   test_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(test), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, test_buffer, CL_TRUE, 0, 
      sizeof(test), &test, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   for(i=0; i<15; i++) {
      printf("0x%X, ", test[i]);
   }
   printf("0x%X\n", test[15]);

   /* Deallocate resources */
   clReleaseMemObject(test_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
int main() {

   /* OpenCL data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   cl_float a_ptr[DATA_SIZE];
   cl_float b_ptr[DATA_SIZE];
   cl_int   mask[DATA_SIZE];
   cl_float res_ptr[DATA_SIZE];
   cl_mem a_buffer, b_buffer;
   cl_mem mask_buffer;
   cl_mem res_buffer;

   for(int i = 0; i < DATA_SIZE; ++i) {
        a_ptr[i] = i;
    }
   for(int i = 0, j = DATA_SIZE; i < DATA_SIZE; --j, ++i) {
        b_ptr[i] = j;
    }
   
   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Create a kernel by name */
   program = build_program(context, device, "simple_trigo.cl");
   kernel = clCreateKernel(program, "permutate", &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   a_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*DATA_SIZE, a_ptr, &err);
   if(err < 0) {
      perror("Couldn't create buffer 'a'");
      exit(1);   
   };
   b_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*DATA_SIZE, b_ptr, &err);
   if(err < 0) {
      perror("Couldn't create buffer 'b'");
      exit(1);   
   };
   /* Create a write-only buffer to hold the output data */
   res_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float)*DATA_SIZE, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };
        

   /* Create kernel argument */
   clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
   clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
   clSetKernelArg(kernel, 2, sizeof(cl_mem), &res_buffer);
   
   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* seed the random number generator */
   srandom(41L);
   for(int iter = 0; iter < ITERATIONS; ++iter) {
		 
		   /* Enqueue kernel */
		   //err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
           //size_t globalTs[1] = {DATA_SIZE };
           size_t globalTs[1] = {DATA_SIZE / 16};
           err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalTs, NULL, 0, NULL, NULL);
		   if(err < 0) {
		      perror("Couldn't enqueue the kernel");
		      exit(1);   
		   }
		
		   /* Read and print the result */
		   err = clEnqueueReadBuffer(queue, res_buffer, CL_TRUE, 0, sizeof(cl_float)*DATA_SIZE, &res_ptr, 0, NULL, NULL);
		   if(err < 0) {
		      perror("Couldn't read the buffer");
		      exit(1);   
		   }
		   
		   printf("\n\nFind Unit Circle: ");
		   for(i=0; i<DATA_SIZE; i++) {
		      if(res_ptr[i] == 1) // to check if sin^2 + cos^2 == 1
                printf("Unit circle with x=%f, y=%f\n", a_ptr[i], b_ptr[i]);
		   }
		   printf("\n");
           clReleaseMemObject(mask_buffer);
   }
   /* Deallocate resources */
   clReleaseMemObject(a_buffer);
   clReleaseMemObject(b_buffer);   
   clReleaseMemObject(res_buffer);   
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int err;

   /* Data and buffers */
   float reflect[4];
   cl_mem reflect_buffer;
   float x[4] = {1.0f, 2.0f, 3.0f, 4.0f};
   float u[4] = {0.0f, 5.0f, 0.0f, 0.0f};
   
   /* Create a device and context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program */
   program = build_program(context, device, PROGRAM_FILE);

   /* Create a kernel */
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);
   };

   /* Create buffer */
   reflect_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         4*sizeof(float), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(x), x);
   err |= clSetKernelArg(kernel, 1, sizeof(u), u);
   err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &reflect_buffer);
   if(err < 0) {
      printf("Couldn't set a kernel argument");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL); 
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read and print the result */
   err = clEnqueueReadBuffer(queue, reflect_buffer, CL_TRUE, 0, 
      sizeof(reflect), reflect, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   printf("\nResult: %f %f %f %f\n", 
         reflect[0], reflect[1], reflect[2], reflect[3]);

   /* Deallocate resources */
   clReleaseMemObject(reflect_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
int main() {

   /* OpenCL structures */
   cl_device_id device;
   cl_context context;
   cl_program program;
   cl_kernel vector_kernel, complete_kernel;
   cl_command_queue queue;
   cl_event start_event, end_event;
   cl_int i, err;
   size_t local_size, global_size;

   /* Data and buffers */
   float data[ARRAY_SIZE];
   float sum, actual_sum;
   cl_mem data_buffer, sum_buffer;
   cl_ulong time_start, time_end, total_time;

   /* Initialize data */
   for(i=0; i<ARRAY_SIZE; i++) {
      data[i] = 1.0f*i;
   }

   /* Create device and determine local size */
   device = create_device();
   err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, 	
         sizeof(local_size), &local_size, NULL);	
   if(err < 0) {
      perror("Couldn't obtain device information");
      exit(1);   
   }

   /* Create a context */
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build program */
   program = build_program(context, device, PROGRAM_FILE);

   /* Create data buffer */
   data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
         CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
   sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(float), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 
         CL_QUEUE_PROFILING_ENABLE, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Create kernels */
   vector_kernel = clCreateKernel(program, KERNEL_1, &err);
   complete_kernel = clCreateKernel(program, KERNEL_2, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);
   };

   /* Set arguments for vector kernel */
   err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
   err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);

   /* Set arguments for complete kernel */
   err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
   err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
   err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer);
   if(err < 0) {
      perror("Couldn't create a kernel argument");
      exit(1);   
   }

   /* Enqueue kernels */
   global_size = ARRAY_SIZE/4;
   err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, 
         &local_size, 0, NULL, &start_event);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }
   printf("Global size = %lu\n", global_size);

   /* Perform successive stages of the reduction */
   while(global_size/local_size > local_size) {
      global_size = global_size/local_size;
      err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size, 
            &local_size, 0, NULL, NULL);
      printf("Global size = %lu\n", global_size);
      if(err < 0) {
         perror("Couldn't enqueue the kernel");
         exit(1);   
      }
   }
   global_size = global_size/local_size;
   err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size, 
         NULL, 0, NULL, &end_event);
   printf("Global size = %lu\n", global_size);

   /* Finish processing the queue and get profiling information */
   clFinish(queue);
   clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
         sizeof(time_start), &time_start, NULL);
   clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
         sizeof(time_end), &time_end, NULL);
   total_time = time_end - time_start;

   /* Read the result */
   err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0, 
      sizeof(float), &sum, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }

   /* Check result */
   actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1);
   if(fabs(sum - actual_sum) > 0.01*fabs(sum))
      printf("Check failed.\n");
   else
      printf("Check passed.\n");
   printf("Total time = %lu\n", total_time);

   /* Deallocate resources */
   clReleaseEvent(start_event);
   clReleaseEvent(end_event);
   clReleaseMemObject(sum_buffer);
   clReleaseMemObject(data_buffer);
   clReleaseKernel(vector_kernel);
   clReleaseKernel(complete_kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemple #25
0
// ------------------------------------------------------------------- main ---
int main( int argc, char **argv )
{
    glutInit( &argc, argv );
    glutInitWindowSize( 512, 512 );
    glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH );
    glutCreateWindow( "Freetype OpenGL" );
    glutReshapeFunc( reshape );
    glutDisplayFunc( display );
    glutKeyboardFunc( keyboard );

    size_t i;
    vec4 black  = {{0.0, 0.0, 0.0, 1.0}};
    vec4 white  = {{1.0, 1.0, 1.0, 1.0}};
    vec4 none   = {{1.0, 1.0, 1.0, 0.0}};
    markup_t markup = {
        .family  = "Bitstream Vera Sans",
        .size    = 15.0,
        .bold    = 0,
        .italic  = 0,
        .rise    = 0.0,
        .spacing = 0.0,
        .gamma   = 1.5,
        .foreground_color    = white,
        .background_color    = none,
        .underline           = 0,
        .underline_color     = white,
        .overline            = 0,
        .overline_color      = white,
        .strikethrough       = 0,
        .strikethrough_color = white,
        .font = 0,
    };

    atlas = texture_atlas_new( 512, 512, 3 );
    buffer = vertex_buffer_new( "v3f:t2f:c4f:1g1f:2g1f" ); 
    markup.font = texture_font_new( atlas, "./Vera.ttf", markup.size );

    vec2 pen;
    pen.y = 512.0 - markup.font->ascender - 5;
    for( i=0; i < 14; ++i )
    {
        pen.x = 25.0;
        markup.gamma = 0.75 + 1.5*i*(1.0/14);
        add_text( buffer, &pen,
                  &markup, L"The quick brown fox jumps over the lazy dog. ",
                  &markup, L"0123456789.", NULL);
        pen.y -= markup.font->height;
    }

    markup.foreground_color = black;
    pen.y = 256.0 - markup.font->ascender - 5;
    for( i=0; i < 14; ++i )
    {
        pen.x = 25.0;
        markup.gamma = 0.75 + 1.5*i*(1.0/14);
        add_text( buffer, &pen,
                  &markup, L"The quick brown fox jumps over the lazy dog. ",
                  &markup, L"0123456789.", NULL);
        pen.y -= markup.font->height;

    }



    // Create the GLSL program
    char * vertex_shader_source   = read_shader("./markup.vert");
    char * fragment_shader_source = read_shader("./markup.frag");
    program = build_program( vertex_shader_source, fragment_shader_source );
    texture_location = glGetUniformLocation(program, "texture");
    pixel_location   = glGetUniformLocation(program, "pixel");

    glutMainLoop( );
    return 0;
}
Exemple #26
0
int main() {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int err;

   /* Data and buffers */
   float mod_input[2] = {317.0f, 23.0f};
   float mod_output[2];   
   float round_input[4] = {-6.5f, -3.5f, 3.5f, 6.5f};
   float round_output[20];
   cl_mem mod_input_buffer, mod_output_buffer,
         round_input_buffer, round_output_buffer;

   /* Create a context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create buffers to hold input/output data */
   mod_input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
         sizeof(mod_input), mod_input, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };
   mod_output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(mod_output), NULL, NULL); 
   round_input_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
         sizeof(round_input), round_input, NULL);         
   round_output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(round_output), NULL, NULL);

   /* Create kernel argument */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mod_input_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };
   clSetKernelArg(kernel, 1, sizeof(cl_mem), &mod_output_buffer);
   clSetKernelArg(kernel, 2, sizeof(cl_mem), &round_input_buffer);
   clSetKernelArg(kernel, 3, sizeof(cl_mem), &round_output_buffer);   

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }

   /* Read the results */
   err = clEnqueueReadBuffer(queue, mod_output_buffer, CL_TRUE, 0, 
      sizeof(mod_output), &mod_output, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }
   clEnqueueReadBuffer(queue, round_output_buffer, CL_TRUE, 0, 
      sizeof(round_output), &round_output, 0, NULL, NULL);
      
   /* Display data */
   printf("fmod(%.1f, %.1f)      = %.1f\n", mod_input[0], mod_input[1], mod_output[0]);
   printf("remainder(%.1f, %.1f) = %.1f\n\n", mod_input[0], mod_input[1], mod_output[1]);
   
   printf("Rounding input: %.1f %.1f %.1f %.1f\n", 
         round_input[0], round_input[1], round_input[2], round_input[3]);
   printf("rint:  %.1f, %.1f, %.1f, %.1f\n", 
         round_output[0], round_output[1], round_output[2], round_output[3]);
   printf("round: %.1f, %.1f, %.1f, %.1f\n", 
         round_output[4], round_output[5], round_output[6], round_output[7]);
   printf("ceil:  %.1f, %.1f, %.1f, %.1f\n", 
         round_output[8], round_output[9], round_output[10], round_output[11]);
   printf("floor: %.1f, %.1f, %.1f, %.1f\n", 
         round_output[12], round_output[13], round_output[14], round_output[15]);
   printf("trunc: %.1f, %.1f, %.1f, %.1f\n", 
         round_output[16], round_output[17], round_output[18], round_output[19]);         


   /* Deallocate resources */
   clReleaseMemObject(mod_input_buffer);
   clReleaseMemObject(mod_output_buffer);
   clReleaseMemObject(round_input_buffer);
   clReleaseMemObject(round_output_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemple #27
0
int main(int argc, char **argv) {

   /* Host/device data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int err;
   size_t global_size[2];

   /* Image data */
   png_bytep pixels;
   cl_image_format png_format;
   cl_mem input_image, output_image;
   size_t origin[3], region[3];
   size_t width, height;

   /* Open input file and read image data */
   read_image_data(INPUT_FILE, &pixels, &width, &height);

   /* Create a device and context */
   device = create_device();
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);
   }

   /* Build the program and create a kernel */
   program = build_program(context, device, PROGRAM_FILE);
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      printf("Couldn't create a kernel: %d", err);
      exit(1);
   };

   /* Create image object */
   png_format.image_channel_order = CL_LUMINANCE;
   png_format.image_channel_data_type = CL_UNORM_INT16;
   input_image = clCreateImage2D(context, 
         CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
         &png_format, width, height, 0, (void*)pixels, &err);
   output_image = clCreateImage2D(context, 
         CL_MEM_WRITE_ONLY, &png_format, width, height, 0, NULL, &err);
   if(err < 0) {
      perror("Couldn't create the image object");
      exit(1);
   }; 

   /* Create kernel arguments */
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image);
   err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_image);
   if(err < 0) {
      printf("Couldn't set a kernel argument");
      exit(1);   
   }; 

   /* Create a command queue */
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   global_size[0] = height; global_size[1] = width;
   err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size, 
         NULL, 0, NULL, NULL);  
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);
   }

   /* Read the image object */
   origin[0] = 0; origin[1] = 0; origin[2] = 0;
   region[0] = width; region[1] = height; region[2] = 1;
   err = clEnqueueReadImage(queue, output_image, CL_TRUE, origin, 
         region, 0, 0, (void*)pixels, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read from the image object");
      exit(1);   
   }

   /* Create output PNG file and write data */
   write_image_data(OUTPUT_FILE, pixels, width, height);

   /* Deallocate resources */
   free(pixels);
   clReleaseMemObject(input_image);
   clReleaseMemObject(output_image);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}
Exemple #28
0
int main(int argc, char **argv){
	cl_context context = get_platform(CL_DEVICE_TYPE_GPU);
	cl_device_id device = 0;
	cl_command_queue queue = get_first_device(context, &device);
	char *prog_src = read_file(CL_PROGRAM("convolution.cl"), NULL);
	cl_program program = build_program(prog_src, context, device, NULL);
	free(prog_src);
	cl_int err = CL_SUCCESS;
	cl_kernel kernel = clCreateKernel(program, "convolve", &err);
	check_cl_err(err, "failed to create kernel");

	//Setup our input signal and mask
	cl_uint in_signal[IN_DIM][IN_DIM] = {
		{ 3, 1, 1, 4, 8, 2, 1, 3 },
		{ 4, 2, 1, 1, 2, 1, 2, 3 },
		{ 4, 4, 4, 4, 3, 2, 2, 2 },
		{ 9, 8, 3, 8, 9, 0, 0, 0 },
		{ 9, 3, 3, 9, 0, 0, 0, 0 },
		{ 0, 9, 0, 8, 0, 0, 0, 0 },
		{ 3, 0, 8, 8, 9, 4, 4, 4 },
		{ 5, 9, 8, 1, 8, 1, 1, 1 }
	};
	cl_uint mask[MASK_DIM][MASK_DIM] = {
		{ 1, 1, 1 },
		{ 1, 0, 1 },
		{ 1, 1, 1 }
	};
	//0 is input, 1 is mask, 2 is output
	cl_mem mem_objs[3];
	mem_objs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_uint) * IN_DIM * IN_DIM, in_signal, &err);
	mem_objs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_uint) * MASK_DIM * MASK_DIM, mask, &err);
	mem_objs[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
		sizeof(cl_uint) * OUT_DIM * OUT_DIM, NULL, &err);
	check_cl_err(err, "failed to create buffers");

	for (int i = 0; i < 3; ++i){
		err = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem_objs[i]);
		check_cl_err(err, "failed to set kernel argument");
	}
	size_t in_dim = IN_DIM, mask_dim = MASK_DIM;
	err = clSetKernelArg(kernel, 3, sizeof(unsigned), &in_dim);
	err = clSetKernelArg(kernel, 4, sizeof(unsigned), &mask_dim);
	check_cl_err(err, "failed to set kernel argument");

	size_t global_size[2] = { OUT_DIM, OUT_DIM };
	size_t local_size[2] = { 2, 2 };
	err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size, local_size, 0,
		NULL, NULL);
	check_cl_err(err, "failed to enqueue ND range kernel");
	
	cl_uint* out = clEnqueueMapBuffer(queue, mem_objs[2], CL_TRUE, CL_MAP_READ, 0,
		sizeof(cl_uint) * OUT_DIM * OUT_DIM, 0, NULL, NULL, &err);
	check_cl_err(err, "failed to map result");

	printf("Result:\n");
	for (int i = 0; i < OUT_DIM; ++i){
		for (int j = 0; j < OUT_DIM; ++j){
			printf("%d ", out[i * OUT_DIM + j]);
		}
		printf("\n");
	}
	printf("\n");
	clEnqueueUnmapMemObject(queue, mem_objs[2], out, 0, 0, NULL);

	for (int i = 0; i < 3; ++i){
		clReleaseMemObject(mem_objs[i]);
	}
	clReleaseKernel(kernel);
	clReleaseProgram(program);
	clReleaseCommandQueue(queue);
	clReleaseContext(context);
	return 0;
}
Exemple #29
0
inline program build_program(unit_navigator const & un,
	clang::SourceManager const & sm, std::string const & static_prefix)
{
	return build_program(un, sm, default_build_visitor(), static_prefix);
}
int main (void) {
  float *sum;
  cl_kernel kernel;
  cl_mem sum_buffer;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  sum = (float *) calloc (NUM_STEPS, sizeof (float));
  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);\

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be float number
     that will contain the values of the partial sums */
  sum_buffer = create_buffer (context, "sum_buffer", NUM_STEPS);

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, sum_buffer, 0, "sum_buffer");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* enqueue a task to execute the kernel on the device */
  enqueue_kernel_execution (command_queue, kernel, NUM_STEPS);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, sum_buffer, NUM_STEPS, sum, "sum");

  printf (ANSI_COLOR_CYAN "\nAproximación de PI: %.10lf\n\n" ANSI_COLOR_RESET, sum[0] / NUM_STEPS);

  return 0;
}