Ejemplo n.º 1
0
int main(int argc, const char* argv[]) 
{
	if (argc < 2) {
		std::cerr << "Please specify the filename !" << std::endl;
		return 1;
	}

	SiteMan* siteman = new SiteMan();
	Program* p = new Program();

	const char* filename = argv[1];

	int ret = siteman->parse(filename);
	if (ret != 0) {
		std::cerr << "An error occured during file parsing." << std::endl;
		return ret;
	}

	std::cout << "Parsing successfully done !" << std::endl;

	std::cout << "Creating linear program..." << std::endl;
	ret = p->build(siteman);
	if (ret == 0)
		std::cout << "Linear program successfully built !" << std::endl;
	else {
		std::cerr << "An error occured during program building." << std::endl;
		return ret;
	}

	std::cout << "Solving program..." << std::endl;
	std::cout << "Result: [" << p->solve() << "]" << std::endl;

	delete p;
	delete siteman;
}
Ejemplo n.º 2
0
void setup( int dev )
{
    std::vector<Device> devices = _context.getInfo<CAL_CONTEXT_DEVICES>();
    std::string source;
    Program     program;

    // create kernel A
    source  = create_kernel(devices[dev]);
    std::cout << source; // uncomment to output il source 
    program = Program( _context, source.c_str(), source.length() );
    program.build(devices);
    program.disassemble(std::cout); // uncomment to emit ISA
    _kernel = Kernel(program,"main");
    _kernel.setArgBind(0,"cb0",0,16);
    _kernel.setArgBind(1,"i0");
    _kernel.setArgBind(2,"o0");

    // create queue
    _queue = CommandQueue(_context,devices[dev]);

    // create buffers
    _input  = Image2D( _context, 256, 64, CAL_FORMAT_UINT_4, 0 );
    _output = Image2D( _context, 256, 64, CAL_FORMAT_UINT_4, 0 );

    fill_data(_input);
}
Ejemplo n.º 3
0
int init() {
    cl_int status = 0;
	const char* buildOption ="-x clc++ ";
	std::vector<Platform> platforms;
	status = Platform::get(&platforms);
	if (status != CL_SUCCESS){
		std::cout<<"Error: Getting platforms!"<<std::endl;
		return FAILURE;
	}
	std::vector<cl::Platform>::iterator iter;
	for(iter = platforms.begin(); iter != platforms.end(); ++iter)
		if(!strcmp((*iter).getInfo<CL_PLATFORM_VENDOR>().c_str(), "Advanced Micro Devices, Inc."))
            break;
	cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(*iter) (), 0};
	bool gpuNotFound = false;
	try{
		context = cl::Context(CL_DEVICE_TYPE_GPU, cps, NULL, NULL, &status);
	}
	catch(std::exception e){
		gpuNotFound = true;
	}
	if(gpuNotFound){
		std::cout<<"GPU not found, falling back to CPU!"<<std::endl;
		context = cl::Context(CL_DEVICE_TYPE_CPU, cps, NULL, NULL, &status);
		if (status != CL_SUCCESS){
			std::cout<<"Error: Creating context!"<<std::endl;
			return FAILURE;
		}
	}
	Program program;

    std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
    try{
		for(unsigned int i=0; i < devices.size(); i++){
            CommandQueue tempQueue = CommandQueue(context, devices[i]);
            std::ifstream sourceFile("Rationals.cl");
            std::string sourceCode(
                std::istreambuf_iterator<char>(sourceFile),
                (std::istreambuf_iterator<char>()));
            Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));
            program = Program(context, source);
            program.build(devices, buildOption);
            Kernel tempKernel = Kernel(program, "countRationals");
            Buffer tempInputBuffer = Buffer(context, CL_MEM_READ_WRITE, NUM_ELEMENTS * sizeof(unsigned long long));
            Buffer tempOutputBuffer = Buffer(context, CL_MEM_READ_WRITE, NUM_ELEMENTS * sizeof(unsigned long long));
            queue.push_back(tempQueue);
            kernels.push_back(tempKernel);
            inputBuffer.push_back(tempInputBuffer);
            outputBuffer.push_back(tempOutputBuffer);
		}
    }catch(cl::Error e){
        std::cout << e.what() << std::endl;
        std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(cl::Device::getDefault()) << std::endl;
        std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(cl::Device::getDefault()) << std::endl;
        std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(cl::Device::getDefault()) << std::endl;
        return FAILURE;
    }
	return SUCCESS;
}
Ejemplo n.º 4
0
bool
Scene::load_shaders_from_strings(Program &program,
                                 const std::string &vtx_shader,
                                 const std::string &frg_shader,
                                 const std::string &vtx_shader_filename,
                                 const std::string &frg_shader_filename)
{
    program.init();

    Log::debug("Loading vertex shader from file %s:\n%s",
               vtx_shader_filename.c_str(), vtx_shader.c_str());

    program.addShader(GL_VERTEX_SHADER, vtx_shader);
    if (!program.valid()) {
        Log::error("Failed to add vertex shader from file %s:\n  %s\n",
                   vtx_shader_filename.c_str(),
                   program.errorMessage().c_str());
        program.release();
        return false;
    }

    Log::debug("Loading fragment shader from file %s:\n%s",
               frg_shader_filename.c_str(), frg_shader.c_str());

    program.addShader(GL_FRAGMENT_SHADER, frg_shader);
    if (!program.valid()) {
        Log::error("Failed to add fragment shader from file %s:\n  %s\n",
                   frg_shader_filename.c_str(),
                   program.errorMessage().c_str());
        program.release();
        return false;
    }

    program.build();
    if (!program.ready()) {
        Log::error("Failed to link program created from files %s and %s:  %s\n",
                   vtx_shader_filename.c_str(),
                   frg_shader_filename.c_str(),
                   program.errorMessage().c_str());
        program.release();
        return false;
    }

    return true;
}
Ejemplo n.º 5
0
void setup( int dev, int workgroup_size )
{
    std::vector<Device> devices = _context.getInfo<CAL_CONTEXT_DEVICES>();

    // create program
    std::string source = create_kernel_peekperf(workgroup_size);
    //std::cout << source; // Uncomment to emit IL code
    _program = Program( _context, source.c_str(), source.length() );
    _program.build(devices);
    //_program.disassemble(std::cout); // Uncomment to emit ISA code

    // create kernel
    _kernel = Kernel(_program,"main");
    _kernel.setArgBind(0,"g[]");

    _nr_groups = devices[dev].getInfo<CAL_DEVICE_NUMBEROFSIMD>();

    _queue = CommandQueue(_context,devices[dev]);

    // create output buffer
    int width = 64*((workgroup_size + 63)/64);
    _output = Image2D(_context, width, _nr_groups, CAL_FORMAT_FLOAT_4, CAL_RESALLOC_GLOBAL_BUFFER );
}
Ejemplo n.º 6
0
static inline cl::Program getProgram(const char * option = "")
{
    using namespace std;
    using namespace cl;
#if defined(DEBUG)
    cout << "start get program" << endl;
#endif
    cl_int err = 0;
    errorMessage = "create program failed";
    Program program = Program(context, source, &err);
#if defined(DEBUG)
    cout << "start build" << endl;
#endif
    errorMessage = "program build failed";
    try {
        err = program.build(devices, option);
    } catch (cl::Error e) {
        if (e.err() != CL_SUCCESS) {
            if (e.err() == CL_BUILD_PROGRAM_FAILURE) {
                std::string str
                    = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
                cout << "compile error:" << endl;
                cout << str << endl;
            } else {
                cout << "build error but not program failure err:"
                     << dec << e.err()
                     << " " << e.what() << endl;
            }
        }
        throw e;
    }
    errorMessage = "";
#if defined(DEBUG)
    cout << "end get program" << endl;
#endif
    return program;
}
Ejemplo n.º 7
0
int main(int argc, char **argv)
{
	srand((unsigned)time(NULL));

	Kernel kernel;
	CommandQueue queue;
	Context context;

	{
		std::vector<Platform> platformList;
		Platform::get(&platformList);

		clog << "Platform number is: " << platformList.size() << endl;

		std::string platformVendor;
		platformList[0].getInfo((cl_platform_info)CL_PLATFORM_VENDOR, &platformVendor);
		clog << "Platform is by: " << platformVendor << "\n";

		cl_context_properties cprops[] = {
			CL_CONTEXT_PLATFORM, (cl_context_properties) platformList[0](),
			0
		};
		context = Context(GET_TARGET_PLATFORM, cprops);

		std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
		queue = CommandQueue(context, devices[0]);

		std::string sourceCode = "#include \"es.cl\"\n";
		Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));
		Program program = Program(context, source);

		try
		{
			program.build(devices, "-I.");
		}
		catch (Error &)
		{
			std::string errors;
			program.getBuildInfo(devices[0], CL_PROGRAM_BUILD_LOG, &errors);
			std::cerr << "Build log: " << endl << errors << endl;
			return 1;
		}

		kernel = Kernel(program, "es");
	}

	individual *individuals = new individual[LAMBDA];
	for (int i = 0; i < LAMBDA; i++)
	{
		for (int j = 0; j < DIM; ++j)
		{
			individuals[i].x[j] = (rand()/((float)RAND_MAX)) * (XMAX-XMIN) + XMIN;
			individuals[i].s[j] = (XMAX-XMIN) / 6.f;
		}
		for (int j = 0; j < DIM_A; ++j)
		{
			individuals[i].a[j] = (rand()/((float)RAND_MAX)) * (2*PI) - PI;
		}
		
		individuals[i].fitness = 0;
	}

	float gbest = std::numeric_limits<float>::infinity(), xbest[DIM];
	
	Buffer esBuffer = Buffer(context, 0, INDIVIDUALS_SIZE);
	Event ev;
	queue.enqueueMapBuffer(esBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, INDIVIDUALS_SIZE);
	
	for (int i = 0; i < 1000; i++)
	{
		queue.enqueueWriteBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals);
		kernel.setArg(1, (cl_ulong)rand());
		kernel.setArg(0, esBuffer);
		queue.enqueueNDRangeKernel(kernel, NullRange, NDRange(LAMBDA), NDRange(1), NULL, &ev);
		ev.wait();
		queue.enqueueReadBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals);
		
		std::sort(individuals, individuals + LAMBDA, individual_comp);
		individual mean = get_mean(individuals);
		for (int j = 0; j < LAMBDA; ++j)
		{
			individuals[j] = mean;
		}
	}
	gbest = individuals[0].fitness;
	for (int i = 0; i < DIM; ++i) xbest[i] = individuals[0].x[i];
	clog << "Best value " << gbest << " found at (";
	for (int i = 0; i < DIM; ++i) clog << xbest[i] << (i == DIM-1 ? ")" : ", ");
	clog << "\n";
	clog << "Our computation estemates it: f(" << xbest[0] << ", ..., " << xbest[DIM-1] << ") = " << es_f(xbest) << endl;

	delete[] individuals;

	return 0;
}
Ejemplo n.º 8
0
        Main() 
        {
            mScale = 0.1f;
            mOffset = 1.0f;
            mDataOffset = 0;
            isClicked = false;
            screen = Vec2<unsigned int>(800, 600);
            std::setlocale(LC_ALL, "en_US.UTF-8");
            glfwInit();
            glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_API);
            glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
            //glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GL_TRUE);
            glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
            GLFWwindow* window = glfwCreateWindow(screen.x, screen.y, "test", nullptr, nullptr);
            if (window == nullptr)
            {
                printf("cant create window");
                return;
            }
            
            glfwSetWindowSizeCallback(window, windowSizeCallback);
            glfwSetKeyCallback(window, keyCallback);
            glfwSetMouseButtonCallback(window, clickCallback);
            glfwSetCursorPosCallback(window, mouseCallback);
            glfwMakeContextCurrent(window);
            glewExperimental = true;
            glewInit();
            
            int tmp;

            glGetIntegerv(GL_MAX_ELEMENTS_VERTICES , &tmp);

            std::cout << "GL_MAX_ELEMENTS_VERTICES: " << tmp << std::endl;
            
            int err = Pa_Initialize();
            if (err != paNoError)
                printf("error");
            
            int num = Pa_GetDeviceCount();
            const PaDeviceInfo* devInfo;
            const PaHostApiInfo* apiInfo;
            for (int i = 0; i < num; ++i) {
                devInfo = Pa_GetDeviceInfo(i);
                apiInfo = Pa_GetHostApiInfo(devInfo->hostApi);
                printf("%i, %s on %s\n", i, devInfo->name, apiInfo->name);
            }
            
            
            float sampleRate = 44100.0f;
            
            
            double t = glfwGetTime();
            Kern k(sampleRate, 12, 4 * 16.352f, sampleRate / 2);
            BlockMatrix<std::complex<double>> b(k.K, k.mN0, k.mB, 0.01);
            mAudioData = new double[b.getWidth()];
            mAudioLength = b.getWidth();
            for (unsigned int i = 0; i < mAudioLength; ++i) {
                mAudioData[i] = wave(55, sampleRate, i) + wave(110, sampleRate, i) + wave(220, sampleRate, i)
                        + wave(440, sampleRate, i) + wave(880, sampleRate, i) + wave(1760, sampleRate, i)
                        + wave(3520, sampleRate, i) + wave(7040, sampleRate, i);
            }
            
            printf("kernel time:%f\n", glfwGetTime() - t);
            float drawArray[k.mB * 2];
            std::complex<double> out[k.mB];
            CQT::transform(mAudioData, out, b, mAudioLength);
            t = glfwGetTime();
            
            printf("transform time:%f\n", glfwGetTime() - t);
            
            //glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
            glDebugMessageCallback(debugCallback, nullptr);
            //glEnable(GL_DEBUG_OUTPUT);
                        
            printf("%s\n", glGetString(GL_VERSION));
            Shader fs("res/shader/fragment.c", true, GL_FRAGMENT_SHADER);
            Shader vs("res/shader/vertex.c", true, GL_VERTEX_SHADER);
            Program* p = new Program();
            p->attach(fs);
            p->attach(vs);
            p->build();
            p->use();
            
            Program p2;
            Shader fs2("res/shader/fragment2.c", true, GL_FRAGMENT_SHADER);
            Shader vs2("res/shader/vertex2.c", true, GL_VERTEX_SHADER);
            p2.attach(fs2);
            p2.attach(vs2);
            p2.build();
            p2.use();
            
            int uniformData = p2.getUniformLocation("data");
            
            unsigned int waterfallSize = 512;
            
            tm = new TextureManager();
            
            unsigned int waterfallTexture;
            unsigned int waterfallId = tm->getFreeTexture();
            
            glGenTextures(1, &waterfallTexture);
            glActiveTexture(GL_TEXTURE0 + waterfallId);
            
            glBindTexture( GL_TEXTURE0, waterfallTexture );

            glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
            glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
            
            unsigned char* textureTmp = new unsigned char[waterfallSize * b.getWidth()];
            
            glTexImage2D( GL_TEXTURE_2D_ARRAY, 0, GL_R8, b.getWidth(), waterfallSize, 0, GL_RED, GL_UNSIGNED_BYTE, textureTmp);
            
            delete textureTmp;
            
            float max = 0;
            for (unsigned int i = 0; i < k.mB; ++i) {
                    drawArray[2 * i + 0] = (float)i / k.mB * 2.0f - 1.0f;
                    float tmp = std::abs(out[i]);
                    drawArray[2 * i + 1] = tmp;
                    max = std::max(tmp, max);

                }
            
            font = new Font(512, "res/font/DroidSans.woff", 32, tm);
            print = new Print(font);
            //print.set(&font, "res/shader/fontVertex.c", "res/shader/fontFragment.c");
            print->setScreenSize(screen);
            glm::vec2* vert = new glm::vec2[1024];
            
            glm::vec2* debug = new glm::vec2[b.getWidth()];
            for (unsigned int i = 0; i < b.getWidth(); ++i) {
                debug[i].x = (float)i / b.getWidth() * 2.0f - 1.0f;
            }
            uint32_t vao;
            glGenVertexArrays(1, &vao);
            glBindVertexArray(vao);
            uint32_t vbo[2];
            glGenBuffers(1, vbo);
            glBindBuffer(GL_ARRAY_BUFFER, vbo[0]);
            glEnableVertexAttribArray(0);
            glBufferData(GL_ARRAY_BUFFER, k.mB * sizeof(glm::vec2), drawArray, GL_DYNAMIC_DRAW);
            glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);
            glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
            glEnable(GL_BLEND);
            glfwSetWindowUserPointer(window, this);
            glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
            
            double time, timeo;
            glfwSwapInterval(1);
            PaStream* stream;
            PaStreamParameters params;
            params.device = 21;
            params.channelCount = 1;
            params.sampleFormat = paFloat32;
            params.hostApiSpecificStreamInfo = nullptr;
            params.suggestedLatency = 0.5;
            
            
            err =  Pa_OpenStream(&stream, &params, nullptr, sampleRate, paFramesPerBufferUnspecified, 0, paCallback, this);
            if (err != paNoError)
                printf("error %i", err);
            Pa_StartStream(stream);
            while(!glfwWindowShouldClose(window))
            {
                timeo = time;
                time = glfwGetTime();
                CQT::transform(mAudioData, out, b, mAudioLength);
            
            
                max = 0.0f;
                for (unsigned int i = 0; i < k.mB; ++i) {
                    drawArray[2 * i + 0] = (float)i / k.mB * 2.0f - 1.0f;
                    float tmp = std::abs(out[i]);
                    drawArray[2 * i + 1] = tmp;
                    max = std::max(tmp, max);

                }
                for (unsigned int i = 0; i < k.mB; ++i) {
                    drawArray[2 * i + 1] = std::log(drawArray[2 * i +1]) * mScale + mOffset;
                }
                //printf("%f\n", drawArray[1]);
                glBindVertexArray(vao);
                glBindBuffer(GL_ARRAY_BUFFER, vbo[0]);
                glBufferData(GL_ARRAY_BUFFER, k.mB * sizeof(glm::vec2), drawArray, GL_DYNAMIC_DRAW);
                p->use();
                glDrawArrays(GL_LINE_STRIP, 0, k.mB);
                for (unsigned int i = 0; i < b.getWidth(); ++i) {
                    debug[i].y = mAudioData[i] / 15.0;
                }
                glBufferData(GL_ARRAY_BUFFER, b.getWidth() * sizeof(glm::vec2), debug, GL_DYNAMIC_DRAW);
                glDrawArrays(GL_LINE_STRIP, 0, b.getWidth());
               print->printfAt(-300.0f, 100.0f, 16.0f, 16.0f, u8"Fps:%03.3f", 1/(time-timeo));
                
                glfwSwapBuffers(window);
                glClear(GL_COLOR_BUFFER_BIT);
                glfwPollEvents();
                
            }
            Pa_StopStream(stream);
            Pa_CloseStream(stream);
            Pa_Terminate();

            std::cout << "Hello World. I'm Peach." << std::endl;

        }
Ejemplo n.º 9
0
        Main() 
        {
            isClicked = false;
            screen = Vec2<unsigned int>(800, 600);
            std::setlocale(LC_ALL, "en_US.UTF-8");
            glfwInit();
            glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_API);
            glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
            glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
            glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GL_TRUE);
            GLFWwindow* window = glfwCreateWindow(screen.x, screen.y, "test", nullptr, nullptr);
            if (window == nullptr)
            {
                printf("cant create window");
                return;
            }
            
            glfwSetWindowSizeCallback(window, windowSizeCallback);
            glfwSetKeyCallback(window, keyCallback);
            glfwSetMouseButtonCallback(window, clickCallback);
            glfwSetCursorPosCallback(window, mouseCallback);
            glfwMakeContextCurrent(window);
            glewExperimental = true;
            glewInit();
            
            //glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
            glDebugMessageCallback(debugCallback, nullptr);
            glEnable(GL_DEBUG_OUTPUT);
            
            
            printf("%s\n", glGetString(GL_VERSION));
            Shader* fs = new Shader("res/shader/fragment.c", true, GL_FRAGMENT_SHADER);
            Shader* vs = new Shader("res/shader/vertex.c", true, GL_VERTEX_SHADER);
            Program* p = new Program();
            p->attach(fs);
            p->attach(vs);
            p->build();
            p->use();
            
            tm = new TextureManager();
            font = new Font(512, "res/font/DroidSans.woff", 32, tm);
            print = new Print(font);
            //print.set(&font, "res/shader/fontVertex.c", "res/shader/fontFragment.c");
            print->setScreenSize(screen);
            glm::vec2* vert = new glm::vec2[1024];
            uint32_t vao;
            glGenVertexArrays(1, &vao);
            glBindVertexArray(vao);
            uint32_t vbo;
            glGenBuffers(1, &vbo);
            glBindBuffer(GL_ARRAY_BUFFER, vbo);
            glEnableVertexAttribArray(0);
            glBufferData(GL_ARRAY_BUFFER, 1024 * sizeof(glm::vec2), vert, GL_DYNAMIC_DRAW);
            glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);
            glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
            glEnable(GL_BLEND);
            glfwSetWindowUserPointer(window, this);
            glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
            
            
            tr = new TileRenderer();
            d = new Drawing(tr);
            tr->setScreenSize(screen);
            //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
            double time, timeo;
            glfwSwapInterval(0);
            
            while(!glfwWindowShouldClose(window))
            {
                timeo = time;
                time = glfwGetTime();
                glClear(GL_COLOR_BUFFER_BIT);
                
                glBindVertexArray(vao);
                glBindBuffer(GL_ARRAY_BUFFER, vbo);
                d->render();
                //tr->renderTile(t);
                print->printfAt(-0.3f, 0.7f, 16.0f, 16.0f, u8"Fps:%03.3f", 1/(time-timeo));
                glfwSwapBuffers(window);
                glfwWaitEvents();
                
            }

            std::cout << "Hello World. I'm Peach." << std::endl;

        }
Ejemplo n.º 10
0
void ocl_main(UArg arg0, UArg arg1)
{
   int    argc = (int)     arg0;
   char **argv = (char **) arg1;
#else
#define RETURN(x) return x
int main(int argc, char *argv[])
{
#endif
   cl_int err     = CL_SUCCESS;
   int    bufsize = sizeof(Golden);
   int    num_errors = 0;
   const int    print_nerrors = 12;

   for (int i=0; i < NumElements; ++i) 
   {
       srcA[i] = i * 1.0; 
       srcB[i] = ((i+7) % 257 )* 1.0; 
       Golden[i]   =   srcA[i] + srcB[i];
   }

   try 
   {
     Context context(CL_DEVICE_TYPE_ACCELERATOR);

     std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
      
     int d = 0;
     std::string str;
     devices[d].getInfo(CL_DEVICE_NAME, &str);
     cout << "DEVICE: " << str << endl << endl;

     Buffer bufA   (context, CL_MEM_READ_ONLY,  bufsize);
     Buffer bufB   (context, CL_MEM_READ_ONLY,  bufsize);
     Buffer bufDst (context, CL_MEM_WRITE_ONLY, bufsize);

#ifndef _TI_RTOS
     ifstream t("vadd_wrapper.cl");
     if (!t)
     {
         std::cout << "Could not open Kernel Source file ([file].cl)\n";
         exit(-1);
     }

     std::string kSrc((istreambuf_iterator<char>(t)),
                      istreambuf_iterator<char>());
     Program::Sources    source(1, make_pair(kSrc.c_str(), kSrc.length()));
     Program             program = Program(context, source);
     program.build(devices, "vadd_openmp.obj"); 
#else
     Program::Binaries binary(1, make_pair(vadd_wrapper_dsp_bin,
                                              sizeof(vadd_wrapper_dsp_bin)));
     Program           program = Program(context, devices, binary);
     program.build(devices);
#endif

     Kernel kernel(program, "vadd_wrapper");
     kernel.setArg(0, bufA);
     kernel.setArg(1, bufB);
     kernel.setArg(2, bufDst);
     kernel.setArg(3, NumChunks);
     kernel.setArg(4, ChunkSize);

     Event ev1,ev2,ev3,ev4, ev5,  ev6,ev7,ev8;

     // In Order Command Queue, only one kernel pushed to device at a time
     // OpenMP c code should use: In Order Command Queue + Task
     CommandQueue InO_Q(context, devices[d], CL_QUEUE_PROFILING_ENABLE);

     InO_Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, srcA, NULL, &ev1);
     InO_Q.enqueueWriteBuffer(bufB, CL_FALSE, 0, bufsize, srcB, NULL, &ev2);

     std::vector<Event> vec_ev5(1);
     InO_Q.enqueueTask(kernel, NULL, &vec_ev5[0]);

     InO_Q.enqueueReadBuffer(bufDst, CL_TRUE, 0, bufsize, dst, &vec_ev5, &ev6);

     for (int i=0; i < NumElements; ++i)
     {
       if (Golden[i] - dst[i] < -EPISILON || Golden[i] - dst[i] > EPISILON) 
       { 
           if((num_errors += 1) < print_nerrors)
               printf("Error %d: %f <==> %f\n", i, Golden[i], dst[i]);
       }
     }

     ocl_event_times(ev1, "Write   BufA ");
     ocl_event_times(ev2, "Write   BufB ");
     ocl_event_times(vec_ev5[0], "Kernel       ");
     ocl_event_times(ev6, "Read   BufDst");
   }
   catch (Error err) 
   { cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }

   if (num_errors > 0)
   { 
      cout << "FAIL with " << num_errors << " errors!\n";
      RETURN (-1);
   }
   else cout << "PASS!" << endl; 

   RETURN (0);
}