Example #1
0
window_t* window(
        window_type_t type,
        uint8_t max_size)
{
    debug_print("window()\n");
    
    window_t* new_window = (window_t*) calloc(1, sizeof(window_t));    
    
    new_window->buffer = queue();            
    new_window->type = type;
    new_window->max_size = max_size;
    new_window->autocommit = true;
    
    pthread_mutex_init(&new_window->lock, 0);
    sem_init(&new_window->available, 0, 0);
    
    if (type == WINDOW_TYPE_SNW) {
        new_window->max_size = 1;
    }
    
    success_print("window() succeed\n");
    return new_window;
}
Example #2
0
/** Find a path with sufficient unused residual capacity.
 *  @return true if a path was found from source to sink.
 */
bool mflo_ffs::findPath() {
	vertex u,v; edge e;
	List queue(g->n());

	while (scale > 0) {
		for (u = 1; u <= g->n(); u++) pEdge[u] = 0;
		queue.addLast(g->src());
		while (!queue.empty()) {
			u = queue.first(); queue.removeFirst();
			for (e = g->firstAt(u); e != 0; e=g->nextAt(u,e)) {
				v = g->mate(u,e);
				if (g->res(u,e) >= scale && pEdge[v] == 0 
				    && v != g->src()) {
					pEdge[v] = e; 
					if (v == g->snk()) return true;
					queue.addLast(v);
				}
			}
		}
		scale /= 2;
	}
	return false;
}
Example #3
0
// this example demonstrates how to use the mapped_view class to map
// an array of numbers to device memory and use the reduce() algorithm
// to calculate the sum.
int main()
{
    // get default device and setup context
    compute::device gpu = compute::system::default_device();
    compute::context context(gpu);
    compute::command_queue queue(context, gpu);
    std::cout << "device: " << gpu.name() << std::endl;

    // create data on host
    int data[] = { 4, 2, 3, 7, 8, 9, 1, 6 };

    // create mapped view on device
    compute::mapped_view<int> view(data, 8, context);

    // use reduce() to calculate sum on the device
    int sum = 0;
    compute::reduce(view.begin(), view.end(), &sum, queue);

    // print the sum on the host
    std::cout << "sum: " << sum << std::endl;

    return 0;
}
Example #4
0
std::unique_ptr<RenderQueue> BREW::CreateLabelDrawable( std::shared_ptr<const Label> label ) const {
	const auto& font_name = GetProperty<std::string>( "FontName", label );
	const auto& font = GetResourceManager().GetFont( font_name );
	auto font_size = GetProperty<unsigned int>( "FontSize", label );
	auto font_color = GetProperty<sf::Color>( "Color", label );

	std::unique_ptr<RenderQueue> queue( new RenderQueue );

	sf::Text vis_label( label->GetWrappedText(), *font, font_size );
	vis_label.setColor( font_color );

	if( !label->GetLineWrap() ) {
		// Calculate alignment when word wrap is disabled.
		sf::Vector2f avail_space( label->GetAllocation().width - label->GetRequisition().x, label->GetAllocation().height - label->GetRequisition().y );
		sf::Vector2f position( avail_space.x * label->GetAlignment().x, avail_space.y * label->GetAlignment().y );

		vis_label.setPosition( position.x, position.y );
	}

	queue->Add( Renderer::Get().CreateText( vis_label ) );

	return queue;
}
void dijkstra(int source) {
	fill_range(dist, dist + graph.vertex_num, INF); // \SourceRef{source:utility}
	fill_range(prev, prev + graph.vertex_num, -1);
	fill_range<Edge *>(path, path + graph.vertex_num, NULL);
	dist[source] = 0;
	std::set<int, bool(*)(int ,int)> queue(dijkstra_compare); // use binary heap
	for (int vi = 0; vi < graph.vertex_num; ++vi) {
		queue.insert(vi);
	}
	for (; !queue.empty(); ) {
		int u = *queue.begin();
		queue.erase(u);
		for (SPEdge * edge = graph.head[u]; edge != NULL; edge = edge->next) {
			if (queue.count(edge->v) > 0 && dist[edge->u] + edge->w < dist[edge->v]) {
				queue.erase(edge->v);
				dist[edge->v] = dist[edge->u] + edge->w;
				prev[edge->v] = edge->u;
				path[edge->v] = edge;
				queue.insert(edge->v);
			}
		}
	}
}
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    compute::device device = compute::system::default_device();
    compute::context context(device);
    compute::command_queue queue(context, device);

    compute::vector<compute::uint_> vector(PERF_N, context);

    compute::default_random_engine rng(queue);
    compute::uniform_int_distribution<compute::uint_> dist(0, 1);

    perf_timer t;
    t.start();
    dist.generate(vector.begin(), vector.end(), rng, queue);
    queue.finish();
    t.stop();
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    return 0;
}
Example #7
0
void YKNewTask(void (*task)(void), void*taskStack, unsigned char priority) { /* Creates a new task */
	int ip,sp;

	TCBptr new_task = &YKTCBArray[activeTasks];

	activeTasks++;
	new_task->priority = priority;
	new_task->state = READY;
	new_task->delay = 0;
	new_task->next = NULL;
	new_task->prev = NULL;
	YKRdyList = queue(YKRdyList,new_task);
	ip = (int) task & 0xFFFF;
	sp = (int) taskStack & 0xFFFF;

	sp = initStack(ip,sp);
	new_task->sp = (void*)sp;

	if(runningTask != NULL){
		YKScheduler(0);
	}

}
Example #8
0
void YKEventSet(YKEVENT* e, unsigned mask){
	TCBptr head;
	TCBptr temp;
	int schedule;
	YKEnterMutex();
	e->flags |= mask; /* Set bits from mask to one (leave others unchanged) */
	head = e->tasks;
	e->sFlags = e->flags; /* Save flags that caused this pend */
	/* unblock all tasks associated with this event*/
	schedule = 0;
	while( head != NULL){
		temp = head;
		head = head->next;
		e->tasks = head;
		temp->next = NULL;
		YKRdyList = queue(YKRdyList,temp);
		schedule = 1;
	}
	YKExitMutex();
	if(schedule)
		YKScheduler(0);

}
Example #9
0
bool KRlprPrinterImpl::setupCommand(TQString& cmd, KPrinter *printer)
{
	// retrieve the KMPrinter object, to get host and queue name
	KMPrinter	*rpr = KMFactory::self()->manager()->findPrinter(printer->printerName());
	if (!rpr)
		return false;

	QString	host(rpr->option("host")), queue(rpr->option("queue"));
	if (!host.isEmpty() && !queue.isEmpty())
	{
		QString		exestr = TDEStandardDirs::findExe("rlpr");
		if (exestr.isEmpty())
		{
			printer->setErrorMessage(i18n("The <b>%1</b> executable could not be found in your path. Check your installation.").arg("rlpr"));
			return false;
		}

		cmd = TQString::fromLatin1("%1 -H %2 -P %3 -\\#%4").arg(exestr).arg(quote(host)).arg(quote(queue)).arg(printer->numCopies());

		// proxy settings
		TDEConfig	*conf = KMFactory::self()->printConfig();
		conf->setGroup("RLPR");
		QString	host = conf->readEntry("ProxyHost",TQString::null), port = conf->readEntry("ProxyPort",TQString::null);
		if (!host.isEmpty())
		{
			cmd.append(" -X ").append(quote(host));
			if (!port.isEmpty()) cmd.append(" --port=").append(port);
		}

		return true;
	}
	else
	{
		printer->setErrorMessage(i18n("The printer is incompletely defined. Try to reinstall it."));
		return false;
	}
}
Example #10
0
int main() {
    auto& converter = SKKRomanKanaConverter::theInstance();
    converter.Initialize("kana-rule.conf");

    TestInputQueueObserver observer;
    SKKInputQueue queue(&observer);

    queue.AddChar('a');
    assert(observer.Test("あ", ""));

    observer.Clear();
    queue.AddChar('k');
    assert(observer.Test("", "k"));
    queue.AddChar('y');
    assert(observer.Test("", "ky"));
    queue.RemoveChar();
    assert(observer.Test("", "k"));
    queue.AddChar('i');
    assert(observer.Test("き", ""));

    observer.Clear();
    queue.AddChar('n');
    assert(observer.Test("", "n"));
    queue.Terminate();
    assert(observer.Test("ん", ""));

    queue.AddChar('n');
    assert(queue.CanConvert('i'));

    queue.Terminate();
    observer.Clear();
    queue.AddChar('o');
    queue.AddChar('w');
    queue.AddChar('s');
    queue.AddChar('a');
    assert(observer.Test("おさ", ""));
}
Example #11
0
/** Compute exact distance labels and return in distance vector.
 *  For vertices that can't reach sink, compute labels to source.
 */
void mflo_pp::initdist() {
	vertex u,v; edge e;
	List queue(g->n());

	for (u = 1; u < g->n(); u++) d[u] = 2*g->n();

	// compute distance labels for vertices that have path to sink
	d[g->snk()] = 0;
	queue.addLast(g->snk());
	while (!queue.empty()) {
		u = queue.first(); queue.removeFirst();
		for (e = g->firstAt(u); e != 0; e = g->nextAt(u,e)) {
			v = g->mate(u,e);
			if (g->res(v,e) > 0 && d[v] > d[u] + 1) {
				d[v] = d[u] + 1;
				queue.addLast(v);
			}
		}
	}

	if (d[g->src()] < g->n()) 
		Util::fatal("initdist: path present from source to sink");

	// compute distance labels for remaining vertices
	d[g->src()] = g->n();
	queue.addLast(g->src());
	while (!queue.empty()) {
		u = queue.first(); queue.removeFirst();
		for (e = g->firstAt(u); e != 0; e = g->nextAt(u,e)) {
			v = g->mate(u,e);
			if (g->res(v,e) > 0 && d[v] > d[u] + 1) {
				d[v] = d[u] + 1;
				queue.addLast(v);
			}
		}
	}
}
Example #12
0
void
intel_wait_engine_idle(void)
{
	TRACE(("intel_wait_engine_idle()\n"));

	{
		QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
		queue.PutFlush();
	}

	// TODO: this should only be a temporary solution!
	// a better way to do this would be to acquire the engine's lock and
	// sync to the latest token

	bigtime_t start = system_time();

	ring_buffer &ring = gInfo->shared_info->primary_ring_buffer;
	uint32 head, tail;
	while (true) {
		head = read32(ring.register_base + RING_BUFFER_HEAD)
			& INTEL_RING_BUFFER_HEAD_MASK;
		tail = read32(ring.register_base + RING_BUFFER_TAIL)
			& INTEL_RING_BUFFER_HEAD_MASK;

		if (head == tail)
			break;

		if (system_time() > start + 1000000LL) {
			// the engine seems to be locked up!
			TRACE(("intel_extreme: engine locked up, head %lx!\n", head));
			break;
		}

		spin(10);
	}
}
Example #13
0
// this example demonstrates how to print the values in a vector
int main()
{
    // get default device and setup context
    compute::device gpu = compute::system::default_device();
    compute::context context(gpu);
    compute::command_queue queue(context, gpu);
    std::cout << "device: " << gpu.name() << std::endl;

    // create vector on the device and fill with the sequence 1..10
    compute::vector<int> vector(10, context);
    compute::iota(vector.begin(), vector.end(), 1, queue);

//[print_vector_example
    std::cout << "vector: [ ";
    boost::compute::copy(
        vector.begin(), vector.end(),
        std::ostream_iterator<int>(std::cout, ", "),
        queue
    );
    std::cout << "]" << std::endl;
//]

    return 0;
}
Example #14
0
int main(int argc, char *argv[])
{
    perf_parse_args(argc, argv);
    std::cout << "size: " << PERF_N << std::endl;

    // setup context and queue for the default device
    boost::compute::device device = boost::compute::system::default_device();
    boost::compute::context context(device);
    boost::compute::command_queue queue(context, device);
    std::cout << "device: " << device.name() << std::endl;

    // create vector of random numbers on the host
    std::vector<int> host_vector(PERF_N);
    std::generate(host_vector.begin(), host_vector.end(), rand_int);

    perf_timer t;
    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
        boost::compute::vector<int> device_vector(
            host_vector.begin(), host_vector.end(), queue
        );

        t.start();
        device_vector.erase(
            boost::compute::remove(
                device_vector.begin(), device_vector.end(), 4, queue
            ),
            device_vector.end(),
            queue
        );
        queue.finish();
        t.stop();
    }
    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

    return 0;
}
Example #15
0
std::deque<Address> GPU::find(std::vector<FindArgs> &requests) const
{
    cl::CommandQueue queue(context,dev);
//    std::clog<<"Queue and kernel constructed"<<std::endl;
    std::deque<Address> result;
    cl_ulong2 *output=new cl_ulong2[requests.size()];
    cl::Buffer bufOutput(context, CL_MEM_WRITE_ONLY, requests.size()*sizeof(cl_ulong2));
//    std::clog<<"Output buffer ready"<<std::endl;
    cl::Buffer bufArgs(context, requests.data(), requests.data()+requests.size(), true, true);
//    std::clog<<"Args buffer ready"<<std::endl;
    cl::make_kernel<cl::Buffer&,cl::Buffer&,cl::Buffer&> find(kfind);
    find(cl::EnqueueArgs(queue, cl::NDRange(requests.size())), *bufData, bufArgs, bufOutput);
    queue.finish();
//    std::clog<<"Kernels executed"<<std::endl;
    cl::copy(queue, bufOutput, output, output+requests.size());
    queue.finish();
    for(size_t i=0;i<requests.size();++i)
    {
        if(output[i].s[0]!=-1)
            result.push_back(Address(output[i].s[0],output[i].s[1]));
    }
    delete []output;
    return result;
}
bool find_dijkstra() {
	fill_range(cost, cost + graph.vertex_num, INF); // \SourceRef{source:utility}
	fill_range(prev, prev + graph.vertex_num, -1);
	fill_range<Edge *>(path, path + graph.vertex_num, NULL);
	cost[source] = 0;
	std::set<int, bool(*)(int ,int)> queue(dijkstra_compare);
	for (int vi = 0; vi < graph.vertex_num; ++vi) {
		queue.insert(vi);
	}
	for (; !queue.empty(); ) {
		int u = *queue.begin();
		queue.erase(u);
		for (Edge * edge = graph.head[u]; edge != NULL; edge = edge->next) {
			if (queue.count(edge->v) > 0 && edge->flow < edge->capacity && cost[edge->u] + edge->cost < cost[edge->v]) {
				queue.erase(edge->v);
				cost[edge->v] = cost[edge->u] + edge->cost;
				prev[edge->v] = edge->u;
				path[edge->v] = edge;
				queue.insert(edge->v);
			}
		}
	}
	return cost[sink] != INF;
}
Example #17
0
queue
ser_queue( )
{
  auto comm = std::make_shared< ser_queue_comm >( );

  queue q( queue_type::serial, [comm = std::move( comm )]( queue & q ) {
    boost::lock_guard< boost::mutex > lock( comm->mt_qu );

    comm->qu.append_queue( { steal_work, q } );

    if ( comm->cor_sched ) {
      return;
    }

    queue q_ser( queue_type::serial );

    q_ser.submit_work( [comm]( ) mutable {
      boost::lock_guard< event::mutex > lock_exec( comm->mt_exec );
      boost::unique_lock< boost::mutex > lock( comm->mt_qu );

      assert( comm->cor_sched );

      comm->cor_sched = false;
      auto q_work     = std::move( comm->qu );
      comm->qu        = queue( queue_type::serial );
      lock.unlock( );

      q_work.run_until_empty( );
    } );

    schedule_queue( std::move( q_ser ) );
    comm->cor_sched = true;
  } );

  return q;
}
Example #18
0
//--------------------------------------------------------------------------------------------------
bool Index::reachable_bfs(unsigned x, unsigned y) {
  if (x == y)
    return true;
  ++queryId;
  std::deque<unsigned> queue(1, x);
  unsigned v;
  const std::vector<unsigned> *nb;
  while (!queue.empty()) {
    v = queue.front();
    queue.pop_front();
    if (visited[v] == queryId)
      continue;
    visited[v] = queryId;
    ++expanded;
    nb = g->get_neighbors(v);
    for (std::vector<unsigned>::const_iterator it = nb->begin();
        it != nb->end(); ++it) {
      if (y == *it)
        return true;
      queue.push_back(*it);
    }
  }
  return false;
}
Example #19
0
int main( void )
{
   videoQueue_t queue( 320, 240 );
   printf( "entry size %u\n", queue.entrySize_ );
   printf( "row stride %u\n", queue.rowStride_ );

   unsigned idx = NUMENTRIES ;
   videoQueue_t::entry_t *entry ;

   while( 0 != ( entry = queue.getEmpty() ) )
   {
      printf( "empty %u, %p\n", idx, entry );
      entry->when_ms_ = idx-- ;
      queue.putFull( entry );
   }

   while( 0 != ( entry = queue.getFull() ) )
   {
      printf( "full " I64FMT ", %p\n", entry->when_ms_, entry );
      queue.putEmpty( entry );
   }
   
   return 0 ;
}
void LLEventQueue::flush()
{
	if(!mSignal) return;
		
    // Consider the case when a given listener on this LLEventQueue posts yet
    // another event on the same queue. If we loop over mEventQueue directly,
    // we'll end up processing all those events during the same flush() call
    // -- rather like an EventStream. Instead, copy mEventQueue and clear it,
    // so that any new events posted to this LLEventQueue during flush() will
    // be processed in the *next* flush() call.
    EventQueue queue(mEventQueue);
    mEventQueue.clear();
    // NOTE NOTE NOTE: Any new access to member data beyond this point should
    // cause us to move our LLStandardSignal object to a pimpl class along
    // with said member data. Then the local shared_ptr will preserve both.

    // DEV-43463: capture a local copy of mSignal. See LLEventStream::post()
    // for detailed comments.
    boost::shared_ptr<LLStandardSignal> signal(mSignal);
    for ( ; ! queue.empty(); queue.pop_front())
    {
        (*signal)(queue.front());
    }
}
Example #21
0
size_t SegmentedInputStorage::queueSize(InputQueue inputQueue) const
{
    return queue(inputQueue).size();
}
Example #22
0
int main(int argc, char *argv[])
{
    float *h_psum;					// vector to hold partial sum
    int in_nsteps = INSTEPS;		// default number of steps (updated later to device prefereable)
    int niters = ITERS;				// number of iterations
    int nsteps;
    float step_size;
    ::size_t nwork_groups;
    ::size_t max_size, work_group_size = 8;
    float pi_res;

    cl::Buffer d_partial_sums;

    try
    {
        cl_uint deviceIndex = 0;
        parseArguments(argc, argv, &deviceIndex);

        // Get list of devices
        std::vector<cl::Device> devices;
        unsigned numDevices = getDeviceList(devices);

        // Check device index in range
        if (deviceIndex >= numDevices)
        {
          std::cout << "Invalid device index (try '--list')\n";
          return EXIT_FAILURE;
        }

        cl::Device device = devices[deviceIndex];

        std::string name;
        getDeviceName(device, name);
        std::cout << "\nUsing OpenCL device: " << name << "\n";

        std::vector<cl::Device> chosen_device;
        chosen_device.push_back(device);
        cl::Context context(chosen_device);
        cl::CommandQueue queue(context, device);

        // Create the program object
        cl::Program program(context, util::loadProgram("../pi_ocl.cl"), true);

        // Create the kernel object for quering information
        cl::Kernel ko_pi(program, "pi");

        // Get the work group size
        work_group_size = ko_pi.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device);
        //printf("wgroup_size = %lu\n", work_group_size);

        cl::make_kernel<int, float, cl::LocalSpaceArg, cl::Buffer> pi(program, "pi");

        // Now that we know the size of the work_groups, we can set the number of work
        // groups, the actual number of steps, and the step size
        nwork_groups = in_nsteps/(work_group_size*niters);

        if ( nwork_groups < 1) {
            nwork_groups = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
            work_group_size=in_nsteps / (nwork_groups*niters);
        }

        nsteps = work_group_size * niters * nwork_groups;
        step_size = 1.0f/static_cast<float>(nsteps);
        std::vector<float> h_psum(nwork_groups);

        printf(
            " %d work groups of size %d.  %d Integration steps\n",
            (int)nwork_groups,
            (int)work_group_size,
            nsteps);

        d_partial_sums = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups);

        util::Timer timer;

        // Execute the kernel over the entire range of our 1d input data set
        // using the maximum number of work group items for this device
        pi(
            cl::EnqueueArgs(
                    queue,
                    cl::NDRange(nsteps / niters),
                    cl::NDRange(work_group_size)),
                    niters,
                    step_size,
                    cl::Local(sizeof(float) * work_group_size),
                    d_partial_sums);

        cl::copy(queue, d_partial_sums, h_psum.begin(), h_psum.end());

        // complete the sum and compute final integral value
        pi_res = 0.0f;
        for (unsigned int i = 0; i< nwork_groups; i++) {
                pi_res += h_psum[i];
        }
        pi_res = pi_res * step_size;

        //rtime = wtime() - rtime;
        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.;
        printf("\nThe calculation ran in %lf seconds\n", rtime);
        printf(" pi = %f for %d steps\n", pi_res, nsteps);

        }
        catch (cl::Error err) {
            std::cout << "Exception\n";
            std::cerr
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
            << ")"
            << std::endl;
        }
}
Example #23
0
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
        struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c);
        bool discard = false;
        uint32_t inputs_remaining = c->num_inputs;
        uint32_t vpm_read_fifo_count = 0;
        uint32_t vpm_read_offset = 0;
        int last_vpm_read_index = -1;
        /* Map from the QIR ops enum order to QPU unpack bits. */
        static const uint32_t unpack_map[] = {
                QPU_UNPACK_8A,
                QPU_UNPACK_8B,
                QPU_UNPACK_8C,
                QPU_UNPACK_8D,
                QPU_UNPACK_16A_TO_F32,
                QPU_UNPACK_16B_TO_F32,
        };

        list_inithead(&c->qpu_inst_list);

        switch (c->stage) {
        case QSTAGE_VERT:
        case QSTAGE_COORD:
                /* There's a 4-entry FIFO for VPMVCD reads, each of which can
                 * load up to 16 dwords (4 vec4s) per vertex.
                 */
                while (inputs_remaining) {
                        uint32_t num_entries = MIN2(inputs_remaining, 16);
                        queue(c, qpu_load_imm_ui(qpu_vrsetup(),
                                                 vpm_read_offset |
                                                 0x00001a00 |
                                                 ((num_entries & 0xf) << 20)));
                        inputs_remaining -= num_entries;
                        vpm_read_offset += num_entries;
                        vpm_read_fifo_count++;
                }
                assert(vpm_read_fifo_count <= 4);

                queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00));
                break;
        case QSTAGE_FRAG:
                break;
        }

        list_for_each_entry(struct qinst, qinst, &c->instructions, link) {
#if 0
                fprintf(stderr, "translating qinst to qpu: ");
                qir_dump_inst(qinst);
                fprintf(stderr, "\n");
#endif

                static const struct {
                        uint32_t op;
                } translate[] = {
#define A(name) [QOP_##name] = {QPU_A_##name}
#define M(name) [QOP_##name] = {QPU_M_##name}
                        A(FADD),
                        A(FSUB),
                        A(FMIN),
                        A(FMAX),
                        A(FMINABS),
                        A(FMAXABS),
                        A(FTOI),
                        A(ITOF),
                        A(ADD),
                        A(SUB),
                        A(SHL),
                        A(SHR),
                        A(ASR),
                        A(MIN),
                        A(MAX),
                        A(AND),
                        A(OR),
                        A(XOR),
                        A(NOT),

                        M(FMUL),
                        M(MUL24),
                };

                struct qpu_reg src[4];
                for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
                        int index = qinst->src[i].index;
                        switch (qinst->src[i].file) {
                        case QFILE_NULL:
                                src[i] = qpu_rn(0);
                                break;
                        case QFILE_TEMP:
                                src[i] = temp_registers[index];
                                break;
                        case QFILE_UNIF:
                                src[i] = qpu_unif();
                                break;
                        case QFILE_VARY:
                                src[i] = qpu_vary();
                                break;
                        case QFILE_SMALL_IMM:
                                src[i].mux = QPU_MUX_SMALL_IMM;
                                src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
                                /* This should only have returned a valid
                                 * small immediate field, not ~0 for failure.
                                 */
                                assert(src[i].addr <= 47);
                                break;
                        case QFILE_VPM:
                                assert((int)qinst->src[i].index >=
                                       last_vpm_read_index);
                                (void)last_vpm_read_index;
                                last_vpm_read_index = qinst->src[i].index;
                                src[i] = qpu_ra(QPU_R_VPM);
                                break;
                        }
                }

                struct qpu_reg dst;
                switch (qinst->dst.file) {
                case QFILE_NULL:
                        dst = qpu_ra(QPU_W_NOP);
                        break;
                case QFILE_TEMP:
                        dst = temp_registers[qinst->dst.index];
                        break;
                case QFILE_VPM:
                        dst = qpu_ra(QPU_W_VPM);
                        break;
                case QFILE_VARY:
                case QFILE_UNIF:
                case QFILE_SMALL_IMM:
                        assert(!"not reached");
                        break;
                }

                switch (qinst->op) {
                case QOP_MOV:
                        /* Skip emitting the MOV if it's a no-op. */
                        if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B ||
                            dst.mux != src[0].mux || dst.addr != src[0].addr) {
                                queue(c, qpu_a_MOV(dst, src[0]));
                        }
                        break;

                case QOP_SEL_X_0_ZS:
                case QOP_SEL_X_0_ZC:
                case QOP_SEL_X_0_NS:
                case QOP_SEL_X_0_NC:
                case QOP_SEL_X_0_CS:
                case QOP_SEL_X_0_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
                                          QPU_COND_ZS);

                        queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
                                              1) + QPU_COND_ZS);
                        break;

                case QOP_SEL_X_Y_ZS:
                case QOP_SEL_X_Y_ZC:
                case QOP_SEL_X_Y_NS:
                case QOP_SEL_X_Y_NC:
                case QOP_SEL_X_Y_CS:
                case QOP_SEL_X_Y_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
                                          QPU_COND_ZS);

                        queue(c, qpu_a_MOV(dst, src[1]));
                        set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
                                              1) + QPU_COND_ZS);

                        break;

                case QOP_RCP:
                case QOP_RSQ:
                case QOP_EXP2:
                case QOP_LOG2:
                        switch (qinst->op) {
                        case QOP_RCP:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
                                                   src[0]));
                                break;
                        case QOP_RSQ:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT),
                                                   src[0]));
                                break;
                        case QOP_EXP2:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP),
                                                   src[0]));
                                break;
                        case QOP_LOG2:
                                queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG),
                                                   src[0]));
                                break;
                        default:
                                abort();
                        }

                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));

                        break;

                case QOP_PACK_8888_F:
                        queue(c, qpu_m_MOV(dst, src[0]));
                        *last_inst(c) |= QPU_PM;
                        *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
                                                       QPU_PACK);
                        break;

                case QOP_PACK_8A_F:
                case QOP_PACK_8B_F:
                case QOP_PACK_8C_F:
                case QOP_PACK_8D_F:
                        queue(c,
                              qpu_m_MOV(dst, src[0]) |
                              QPU_PM |
                              QPU_SET_FIELD(QPU_PACK_MUL_8A +
                                            qinst->op - QOP_PACK_8A_F,
                                            QPU_PACK));
                        break;

                case QOP_FRAG_X:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_ra(QPU_R_XY_PIXEL_COORD)));
                        break;

                case QOP_FRAG_Y:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_rb(QPU_R_XY_PIXEL_COORD)));
                        break;

                case QOP_FRAG_REV_FLAG:
                        queue(c, qpu_a_ITOF(dst,
                                            qpu_rb(QPU_R_MS_REV_FLAGS)));
                        break;

                case QOP_FRAG_Z:
                case QOP_FRAG_W:
                        /* QOP_FRAG_Z/W don't emit instructions, just allocate
                         * the register to the Z/W payload.
                         */
                        break;

                case QOP_TLB_DISCARD_SETUP:
                        discard = true;
                        queue(c, qpu_a_MOV(src[0], src[0]));
                        *last_inst(c) |= QPU_SF;
                        break;

                case QOP_TLB_STENCIL_SETUP:
                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
                        break;

                case QOP_TLB_Z_WRITE:
                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
                        if (discard) {
                                set_last_cond_add(c, QPU_COND_ZS);
                        }
                        break;

                case QOP_TLB_COLOR_READ:
                        queue(c, qpu_NOP());
                        *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                    QPU_SIG_COLOR_LOAD);

                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                        break;

                case QOP_TLB_COLOR_WRITE:
                        queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
                        if (discard) {
                                set_last_cond_add(c, QPU_COND_ZS);
                        }
                        break;

                case QOP_VARY_ADD_C:
                        queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
                        break;

                case QOP_TEX_S:
                case QOP_TEX_T:
                case QOP_TEX_R:
                case QOP_TEX_B:
                        queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
                                                  (qinst->op - QOP_TEX_S)),
                                           src[0]));
                        break;

                case QOP_TEX_DIRECT:
                        fixup_raddr_conflict(c, dst, &src[0], &src[1]);
                        queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
                        break;

                case QOP_TEX_RESULT:
                        queue(c, qpu_NOP());
                        *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                    QPU_SIG_LOAD_TMU0);
                        if (dst.mux != QPU_MUX_R4)
                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                        break;

                case QOP_UNPACK_8A_F:
                case QOP_UNPACK_8B_F:
                case QOP_UNPACK_8C_F:
                case QOP_UNPACK_8D_F:
                case QOP_UNPACK_16A_F:
                case QOP_UNPACK_16B_F: {
                        if (src[0].mux == QPU_MUX_R4) {
                                queue(c, qpu_a_MOV(dst, src[0]));
                                *last_inst(c) |= QPU_PM;
                                *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
                                                               (qinst->op -
                                                                QOP_UNPACK_8A_F),
                                                               QPU_UNPACK);
                        } else {
                                assert(src[0].mux == QPU_MUX_A);

                                /* Since we're setting the pack bits, if the
                                 * destination is in A it would get re-packed.
                                 */
                                queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
                                                     qpu_rb(31) : dst),
                                                    src[0], src[0]));
                                *last_inst(c) |=
                                        QPU_SET_FIELD(unpack_map[qinst->op -
                                                                 QOP_UNPACK_8A_F],
                                                      QPU_UNPACK);

                                if (dst.mux == QPU_MUX_A) {
                                        queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                                }
                        }
                }
                        break;

                case QOP_UNPACK_8A_I:
                case QOP_UNPACK_8B_I:
                case QOP_UNPACK_8C_I:
                case QOP_UNPACK_8D_I:
                case QOP_UNPACK_16A_I:
                case QOP_UNPACK_16B_I: {
                        assert(src[0].mux == QPU_MUX_A);

                        /* Since we're setting the pack bits, if the
                         * destination is in A it would get re-packed.
                         */
                        queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ?
                                            qpu_rb(31) : dst), src[0]));
                        *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
                                                                  QOP_UNPACK_8A_I],
                                                       QPU_UNPACK);

                        if (dst.mux == QPU_MUX_A) {
                                queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                        }
                }
                        break;

                default:
                        assert(qinst->op < ARRAY_SIZE(translate));
                        assert(translate[qinst->op].op != 0); /* NOPs */

                        /* If we have only one source, put it in the second
                         * argument slot as well so that we don't take up
                         * another raddr just to get unused data.
                         */
                        if (qir_get_op_nsrc(qinst->op) == 1)
                                src[1] = src[0];

                        fixup_raddr_conflict(c, dst, &src[0], &src[1]);

                        if (qir_is_mul(qinst)) {
                                queue(c, qpu_m_alu2(translate[qinst->op].op,
                                                    dst,
                                                    src[0], src[1]));
                                if (qinst->dst.pack) {
                                        *last_inst(c) |= QPU_PM;
                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
                                                                       QPU_PACK);
                                }
                        } else {
                                queue(c, qpu_a_alu2(translate[qinst->op].op,
                                                    dst,
                                                    src[0], src[1]));
                                if (qinst->dst.pack) {
                                        assert(dst.mux == QPU_MUX_A);
                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
                                                                       QPU_PACK);
                                }
                        }

                        break;
                }

                if (qinst->sf) {
                        assert(!qir_is_multi_instruction(qinst));
                        *last_inst(c) |= QPU_SF;
                }
        }

        qpu_schedule_instructions(c);

        /* thread end can't have VPM write or read */
        if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_WADDR_ADD) == QPU_W_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_WADDR_MUL) == QPU_W_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_A) == QPU_R_VPM ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_B) == QPU_R_VPM) {
                qpu_serialize_one_inst(c, qpu_NOP());
        }

        /* thread end can't have uniform read */
        if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_A) == QPU_R_UNIF ||
            QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
                          QPU_RADDR_B) == QPU_R_UNIF) {
                qpu_serialize_one_inst(c, qpu_NOP());
        }

        /* thread end can't have TLB operations */
        if (qpu_inst_is_tlb(c->qpu_insts[c->qpu_inst_count - 1]))
                qpu_serialize_one_inst(c, qpu_NOP());

        c->qpu_insts[c->qpu_inst_count - 1] =
                qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
                            QPU_SIG_PROG_END);
        qpu_serialize_one_inst(c, qpu_NOP());
        qpu_serialize_one_inst(c, qpu_NOP());

        switch (c->stage) {
        case QSTAGE_VERT:
        case QSTAGE_COORD:
                break;
        case QSTAGE_FRAG:
                c->qpu_insts[c->qpu_inst_count - 1] =
                        qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
                                    QPU_SIG_SCOREBOARD_UNLOCK);
                break;
        }

        if (vc4_debug & VC4_DEBUG_QPU)
                vc4_dump_program(c);

        vc4_qpu_validate(c->qpu_insts, c->qpu_inst_count);

        free(temp_registers);
}
Example #24
0
File: vadd.cpp Project: pelmer/esc
int main(void)
{
    std::vector<float> h_a(LENGTH);              // a vector 
    std::vector<float> h_b(LENGTH);              // b vector 	
    std::vector<float> h_c (LENGTH, 0xdeadbeef); // c = a + b, from compute device

    cl::Buffer d_a;      // device memory used for the input  a vector
    cl::Buffer d_b;      // device memory used for the input  b vector
    cl::Buffer d_c;      // device memory used for the output c vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);

        d_c  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        util::Timer timer;

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            count);

        queue.finish();

        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
        printf("\nThe kernels ran in %lf seconds\n", rtime);

        cl::copy(queue, d_c, begin(h_c), end(h_c));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++) {
            tmp = h_a[i] + h_b[i]; // expected value for d_c[i]
            tmp -= h_c[i];                      // compute errors
            if(tmp*tmp < TOL*TOL) {      // correct if square deviation is less 
                correct++;                         //  than tolerance squared
            }
            else {

                printf(
                    " tmp %f h_a %f h_b %f  h_c %f \n",
                    tmp, 
                    h_a[i], 
                    h_b[i], 
                    h_c[i]);
            }
        }

        // summarize results
        printf(
            "vector add to find C = A+B:  %d out of %d results were correct.\n", 
            correct, 
            count);
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
Example #25
0
std::unique_ptr<RenderQueue> BREW::CreateSpinButtonDrawable( std::shared_ptr<const SpinButton> spinbutton ) const {
	auto border_color = GetProperty<sf::Color>( "BorderColor", spinbutton );
	auto background_color = GetProperty<sf::Color>( "BackgroundColor", spinbutton );
	auto text_color = GetProperty<sf::Color>( "Color", spinbutton );
	auto cursor_color = GetProperty<sf::Color>( "Color", spinbutton );
	auto text_padding = GetProperty<float>( "Padding", spinbutton );
	auto cursor_thickness = GetProperty<float>( "Thickness", spinbutton );
	auto border_width = GetProperty<float>( "BorderWidth", spinbutton );
	auto border_color_shift = GetProperty<int>( "BorderColorShift", spinbutton );
	const auto& font_name = GetProperty<std::string>( "FontName", spinbutton );
	const auto& font = GetResourceManager().GetFont( font_name );
	auto font_size = GetProperty<unsigned int>( "FontSize", spinbutton );
	auto stepper_aspect_ratio = GetProperty<float>( "StepperAspectRatio", spinbutton );
	auto stepper_color = GetProperty<sf::Color>( "StepperBackgroundColor", spinbutton );
	auto stepper_border_color = GetProperty<sf::Color>( "BorderColor", spinbutton );
	auto stepper_arrow_color = GetProperty<sf::Color>( "StepperArrowColor", spinbutton );

	std::unique_ptr<RenderQueue> queue( new RenderQueue );

	// Pane.
	queue->Add(
		Renderer::Get().CreatePane(
			sf::Vector2f( 0.f, 0.f ),
			sf::Vector2f( spinbutton->GetAllocation().width, spinbutton->GetAllocation().height ),
			border_width,
			background_color,
			border_color,
			-border_color_shift
		)
	);

	auto button_width = ( spinbutton->GetAllocation().height / 2.f ) * stepper_aspect_ratio;

	// Up Stepper.
	queue->Add(
		Renderer::Get().CreatePane(
			sf::Vector2f( spinbutton->GetAllocation().width - button_width - border_width, border_width ),
			sf::Vector2f( button_width, spinbutton->GetAllocation().height / 2.f - border_width ),
			border_width,
			stepper_color,
			stepper_border_color,
			spinbutton->IsIncreaseStepperPressed() ? -border_color_shift : border_color_shift
		)
	);

	// Up Stepper Triangle.
	queue->Add(
		Renderer::Get().CreateTriangle(
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 2.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 6.f ),
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f * 3.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 3.f ),
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 3.f ),
			stepper_arrow_color
		)
	);

	// Down Stepper.
	queue->Add(
		Renderer::Get().CreatePane(
			sf::Vector2f( spinbutton->GetAllocation().width - button_width - border_width, spinbutton->GetAllocation().height / 2.f ),
			sf::Vector2f( button_width, spinbutton->GetAllocation().height / 2.f - border_width ),
			border_width,
			stepper_color,
			stepper_border_color,
			spinbutton->IsDecreaseStepperPressed() ? -border_color_shift : border_color_shift
		)
	);

	// Down Stepper Triangle.
	queue->Add(
		Renderer::Get().CreateTriangle(
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 2.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 6.f ),
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 3.f ),
			sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f * 3.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 3.f ),
			stepper_arrow_color
		)
	);

	auto line_height = GetFontLineHeight( *font, font_size );
	sf::Text vis_label( spinbutton->GetVisibleText(), *font, font_size );
	vis_label.setFillColor( text_color );
	vis_label.setPosition( text_padding, spinbutton->GetAllocation().height / 2.f - line_height / 2.f );

	queue->Add( Renderer::Get().CreateText( vis_label ) );

	// Draw cursor if spinbutton is active and cursor is visible.
	if( spinbutton->HasFocus() && spinbutton->IsCursorVisible() ) {
		sf::String cursor_string( spinbutton->GetVisibleText() );
		if( spinbutton->GetCursorPosition() - spinbutton->GetVisibleOffset() < static_cast<int>( cursor_string.getSize() ) ) {
			cursor_string.erase( static_cast<std::size_t>( spinbutton->GetCursorPosition() - spinbutton->GetVisibleOffset() ), cursor_string.getSize() );
		}

		// Get metrics.
		sf::Vector2f metrics( GetTextStringMetrics( cursor_string, *font, font_size ) );

		queue->Add(
			Renderer::Get().CreateRect(
				sf::FloatRect(
					metrics.x + text_padding,
					spinbutton->GetAllocation().height / 2.f - line_height / 2.f,
					cursor_thickness,
					line_height
				),
				cursor_color
			)
		);
	}

	return queue;
}
Example #26
0
	bool execute()
	{
		queue().clear();
		return false;
	}
Example #27
0
void null_modem_device::device_reset()
{
	update_serial(0);
	queue();
}
Example #28
0
void null_modem_device::tra_complete()
{
	queue();
}
Example #29
0
 void io_looper_task_worker::loop()
 {
     io_looper_task_queue* looper = dynamic_cast<io_looper_task_queue*>(queue());
     looper->loop_worker();
 }
Example #30
0
 io_looper_task_worker::io_looper_task_worker(task_worker_pool* pool, task_queue* q, int index, task_worker* inner_provider)
     : task_worker(pool, q, index, inner_provider)
 {
     io_looper_task_queue* looper = dynamic_cast<io_looper_task_queue*>(queue());
     looper->start(nullptr, 0);
 }