window_t* window( window_type_t type, uint8_t max_size) { debug_print("window()\n"); window_t* new_window = (window_t*) calloc(1, sizeof(window_t)); new_window->buffer = queue(); new_window->type = type; new_window->max_size = max_size; new_window->autocommit = true; pthread_mutex_init(&new_window->lock, 0); sem_init(&new_window->available, 0, 0); if (type == WINDOW_TYPE_SNW) { new_window->max_size = 1; } success_print("window() succeed\n"); return new_window; }
/** Find a path with sufficient unused residual capacity. * @return true if a path was found from source to sink. */ bool mflo_ffs::findPath() { vertex u,v; edge e; List queue(g->n()); while (scale > 0) { for (u = 1; u <= g->n(); u++) pEdge[u] = 0; queue.addLast(g->src()); while (!queue.empty()) { u = queue.first(); queue.removeFirst(); for (e = g->firstAt(u); e != 0; e=g->nextAt(u,e)) { v = g->mate(u,e); if (g->res(u,e) >= scale && pEdge[v] == 0 && v != g->src()) { pEdge[v] = e; if (v == g->snk()) return true; queue.addLast(v); } } } scale /= 2; } return false; }
// this example demonstrates how to use the mapped_view class to map // an array of numbers to device memory and use the reduce() algorithm // to calculate the sum. int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // create data on host int data[] = { 4, 2, 3, 7, 8, 9, 1, 6 }; // create mapped view on device compute::mapped_view<int> view(data, 8, context); // use reduce() to calculate sum on the device int sum = 0; compute::reduce(view.begin(), view.end(), &sum, queue); // print the sum on the host std::cout << "sum: " << sum << std::endl; return 0; }
std::unique_ptr<RenderQueue> BREW::CreateLabelDrawable( std::shared_ptr<const Label> label ) const { const auto& font_name = GetProperty<std::string>( "FontName", label ); const auto& font = GetResourceManager().GetFont( font_name ); auto font_size = GetProperty<unsigned int>( "FontSize", label ); auto font_color = GetProperty<sf::Color>( "Color", label ); std::unique_ptr<RenderQueue> queue( new RenderQueue ); sf::Text vis_label( label->GetWrappedText(), *font, font_size ); vis_label.setColor( font_color ); if( !label->GetLineWrap() ) { // Calculate alignment when word wrap is disabled. sf::Vector2f avail_space( label->GetAllocation().width - label->GetRequisition().x, label->GetAllocation().height - label->GetRequisition().y ); sf::Vector2f position( avail_space.x * label->GetAlignment().x, avail_space.y * label->GetAlignment().y ); vis_label.setPosition( position.x, position.y ); } queue->Add( Renderer::Get().CreateText( vis_label ) ); return queue; }
void dijkstra(int source) { fill_range(dist, dist + graph.vertex_num, INF); // \SourceRef{source:utility} fill_range(prev, prev + graph.vertex_num, -1); fill_range<Edge *>(path, path + graph.vertex_num, NULL); dist[source] = 0; std::set<int, bool(*)(int ,int)> queue(dijkstra_compare); // use binary heap for (int vi = 0; vi < graph.vertex_num; ++vi) { queue.insert(vi); } for (; !queue.empty(); ) { int u = *queue.begin(); queue.erase(u); for (SPEdge * edge = graph.head[u]; edge != NULL; edge = edge->next) { if (queue.count(edge->v) > 0 && dist[edge->u] + edge->w < dist[edge->v]) { queue.erase(edge->v); dist[edge->v] = dist[edge->u] + edge->w; prev[edge->v] = edge->u; path[edge->v] = edge; queue.insert(edge->v); } } } }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); compute::vector<compute::uint_> vector(PERF_N, context); compute::default_random_engine rng(queue); compute::uniform_int_distribution<compute::uint_> dist(0, 1); perf_timer t; t.start(); dist.generate(vector.begin(), vector.end(), rng, queue); queue.finish(); t.stop(); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
void YKNewTask(void (*task)(void), void*taskStack, unsigned char priority) { /* Creates a new task */ int ip,sp; TCBptr new_task = &YKTCBArray[activeTasks]; activeTasks++; new_task->priority = priority; new_task->state = READY; new_task->delay = 0; new_task->next = NULL; new_task->prev = NULL; YKRdyList = queue(YKRdyList,new_task); ip = (int) task & 0xFFFF; sp = (int) taskStack & 0xFFFF; sp = initStack(ip,sp); new_task->sp = (void*)sp; if(runningTask != NULL){ YKScheduler(0); } }
void YKEventSet(YKEVENT* e, unsigned mask){ TCBptr head; TCBptr temp; int schedule; YKEnterMutex(); e->flags |= mask; /* Set bits from mask to one (leave others unchanged) */ head = e->tasks; e->sFlags = e->flags; /* Save flags that caused this pend */ /* unblock all tasks associated with this event*/ schedule = 0; while( head != NULL){ temp = head; head = head->next; e->tasks = head; temp->next = NULL; YKRdyList = queue(YKRdyList,temp); schedule = 1; } YKExitMutex(); if(schedule) YKScheduler(0); }
bool KRlprPrinterImpl::setupCommand(TQString& cmd, KPrinter *printer) { // retrieve the KMPrinter object, to get host and queue name KMPrinter *rpr = KMFactory::self()->manager()->findPrinter(printer->printerName()); if (!rpr) return false; QString host(rpr->option("host")), queue(rpr->option("queue")); if (!host.isEmpty() && !queue.isEmpty()) { QString exestr = TDEStandardDirs::findExe("rlpr"); if (exestr.isEmpty()) { printer->setErrorMessage(i18n("The <b>%1</b> executable could not be found in your path. Check your installation.").arg("rlpr")); return false; } cmd = TQString::fromLatin1("%1 -H %2 -P %3 -\\#%4").arg(exestr).arg(quote(host)).arg(quote(queue)).arg(printer->numCopies()); // proxy settings TDEConfig *conf = KMFactory::self()->printConfig(); conf->setGroup("RLPR"); QString host = conf->readEntry("ProxyHost",TQString::null), port = conf->readEntry("ProxyPort",TQString::null); if (!host.isEmpty()) { cmd.append(" -X ").append(quote(host)); if (!port.isEmpty()) cmd.append(" --port=").append(port); } return true; } else { printer->setErrorMessage(i18n("The printer is incompletely defined. Try to reinstall it.")); return false; } }
int main() { auto& converter = SKKRomanKanaConverter::theInstance(); converter.Initialize("kana-rule.conf"); TestInputQueueObserver observer; SKKInputQueue queue(&observer); queue.AddChar('a'); assert(observer.Test("あ", "")); observer.Clear(); queue.AddChar('k'); assert(observer.Test("", "k")); queue.AddChar('y'); assert(observer.Test("", "ky")); queue.RemoveChar(); assert(observer.Test("", "k")); queue.AddChar('i'); assert(observer.Test("き", "")); observer.Clear(); queue.AddChar('n'); assert(observer.Test("", "n")); queue.Terminate(); assert(observer.Test("ん", "")); queue.AddChar('n'); assert(queue.CanConvert('i')); queue.Terminate(); observer.Clear(); queue.AddChar('o'); queue.AddChar('w'); queue.AddChar('s'); queue.AddChar('a'); assert(observer.Test("おさ", "")); }
/** Compute exact distance labels and return in distance vector. * For vertices that can't reach sink, compute labels to source. */ void mflo_pp::initdist() { vertex u,v; edge e; List queue(g->n()); for (u = 1; u < g->n(); u++) d[u] = 2*g->n(); // compute distance labels for vertices that have path to sink d[g->snk()] = 0; queue.addLast(g->snk()); while (!queue.empty()) { u = queue.first(); queue.removeFirst(); for (e = g->firstAt(u); e != 0; e = g->nextAt(u,e)) { v = g->mate(u,e); if (g->res(v,e) > 0 && d[v] > d[u] + 1) { d[v] = d[u] + 1; queue.addLast(v); } } } if (d[g->src()] < g->n()) Util::fatal("initdist: path present from source to sink"); // compute distance labels for remaining vertices d[g->src()] = g->n(); queue.addLast(g->src()); while (!queue.empty()) { u = queue.first(); queue.removeFirst(); for (e = g->firstAt(u); e != 0; e = g->nextAt(u,e)) { v = g->mate(u,e); if (g->res(v,e) > 0 && d[v] > d[u] + 1) { d[v] = d[u] + 1; queue.addLast(v); } } } }
void intel_wait_engine_idle(void) { TRACE(("intel_wait_engine_idle()\n")); { QueueCommands queue(gInfo->shared_info->primary_ring_buffer); queue.PutFlush(); } // TODO: this should only be a temporary solution! // a better way to do this would be to acquire the engine's lock and // sync to the latest token bigtime_t start = system_time(); ring_buffer &ring = gInfo->shared_info->primary_ring_buffer; uint32 head, tail; while (true) { head = read32(ring.register_base + RING_BUFFER_HEAD) & INTEL_RING_BUFFER_HEAD_MASK; tail = read32(ring.register_base + RING_BUFFER_TAIL) & INTEL_RING_BUFFER_HEAD_MASK; if (head == tail) break; if (system_time() > start + 1000000LL) { // the engine seems to be locked up! TRACE(("intel_extreme: engine locked up, head %lx!\n", head)); break; } spin(10); } }
// this example demonstrates how to print the values in a vector int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // create vector on the device and fill with the sequence 1..10 compute::vector<int> vector(10, context); compute::iota(vector.begin(), vector.end(), 1, queue); //[print_vector_example std::cout << "vector: [ "; boost::compute::copy( vector.begin(), vector.end(), std::ostream_iterator<int>(std::cout, ", "), queue ); std::cout << "]" << std::endl; //] return 0; }
int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector<int> host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::vector<int> device_vector( host_vector.begin(), host_vector.end(), queue ); t.start(); device_vector.erase( boost::compute::remove( device_vector.begin(), device_vector.end(), 4, queue ), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; }
std::deque<Address> GPU::find(std::vector<FindArgs> &requests) const { cl::CommandQueue queue(context,dev); // std::clog<<"Queue and kernel constructed"<<std::endl; std::deque<Address> result; cl_ulong2 *output=new cl_ulong2[requests.size()]; cl::Buffer bufOutput(context, CL_MEM_WRITE_ONLY, requests.size()*sizeof(cl_ulong2)); // std::clog<<"Output buffer ready"<<std::endl; cl::Buffer bufArgs(context, requests.data(), requests.data()+requests.size(), true, true); // std::clog<<"Args buffer ready"<<std::endl; cl::make_kernel<cl::Buffer&,cl::Buffer&,cl::Buffer&> find(kfind); find(cl::EnqueueArgs(queue, cl::NDRange(requests.size())), *bufData, bufArgs, bufOutput); queue.finish(); // std::clog<<"Kernels executed"<<std::endl; cl::copy(queue, bufOutput, output, output+requests.size()); queue.finish(); for(size_t i=0;i<requests.size();++i) { if(output[i].s[0]!=-1) result.push_back(Address(output[i].s[0],output[i].s[1])); } delete []output; return result; }
bool find_dijkstra() { fill_range(cost, cost + graph.vertex_num, INF); // \SourceRef{source:utility} fill_range(prev, prev + graph.vertex_num, -1); fill_range<Edge *>(path, path + graph.vertex_num, NULL); cost[source] = 0; std::set<int, bool(*)(int ,int)> queue(dijkstra_compare); for (int vi = 0; vi < graph.vertex_num; ++vi) { queue.insert(vi); } for (; !queue.empty(); ) { int u = *queue.begin(); queue.erase(u); for (Edge * edge = graph.head[u]; edge != NULL; edge = edge->next) { if (queue.count(edge->v) > 0 && edge->flow < edge->capacity && cost[edge->u] + edge->cost < cost[edge->v]) { queue.erase(edge->v); cost[edge->v] = cost[edge->u] + edge->cost; prev[edge->v] = edge->u; path[edge->v] = edge; queue.insert(edge->v); } } } return cost[sink] != INF; }
queue ser_queue( ) { auto comm = std::make_shared< ser_queue_comm >( ); queue q( queue_type::serial, [comm = std::move( comm )]( queue & q ) { boost::lock_guard< boost::mutex > lock( comm->mt_qu ); comm->qu.append_queue( { steal_work, q } ); if ( comm->cor_sched ) { return; } queue q_ser( queue_type::serial ); q_ser.submit_work( [comm]( ) mutable { boost::lock_guard< event::mutex > lock_exec( comm->mt_exec ); boost::unique_lock< boost::mutex > lock( comm->mt_qu ); assert( comm->cor_sched ); comm->cor_sched = false; auto q_work = std::move( comm->qu ); comm->qu = queue( queue_type::serial ); lock.unlock( ); q_work.run_until_empty( ); } ); schedule_queue( std::move( q_ser ) ); comm->cor_sched = true; } ); return q; }
//-------------------------------------------------------------------------------------------------- bool Index::reachable_bfs(unsigned x, unsigned y) { if (x == y) return true; ++queryId; std::deque<unsigned> queue(1, x); unsigned v; const std::vector<unsigned> *nb; while (!queue.empty()) { v = queue.front(); queue.pop_front(); if (visited[v] == queryId) continue; visited[v] = queryId; ++expanded; nb = g->get_neighbors(v); for (std::vector<unsigned>::const_iterator it = nb->begin(); it != nb->end(); ++it) { if (y == *it) return true; queue.push_back(*it); } } return false; }
int main( void ) { videoQueue_t queue( 320, 240 ); printf( "entry size %u\n", queue.entrySize_ ); printf( "row stride %u\n", queue.rowStride_ ); unsigned idx = NUMENTRIES ; videoQueue_t::entry_t *entry ; while( 0 != ( entry = queue.getEmpty() ) ) { printf( "empty %u, %p\n", idx, entry ); entry->when_ms_ = idx-- ; queue.putFull( entry ); } while( 0 != ( entry = queue.getFull() ) ) { printf( "full " I64FMT ", %p\n", entry->when_ms_, entry ); queue.putEmpty( entry ); } return 0 ; }
void LLEventQueue::flush() { if(!mSignal) return; // Consider the case when a given listener on this LLEventQueue posts yet // another event on the same queue. If we loop over mEventQueue directly, // we'll end up processing all those events during the same flush() call // -- rather like an EventStream. Instead, copy mEventQueue and clear it, // so that any new events posted to this LLEventQueue during flush() will // be processed in the *next* flush() call. EventQueue queue(mEventQueue); mEventQueue.clear(); // NOTE NOTE NOTE: Any new access to member data beyond this point should // cause us to move our LLStandardSignal object to a pimpl class along // with said member data. Then the local shared_ptr will preserve both. // DEV-43463: capture a local copy of mSignal. See LLEventStream::post() // for detailed comments. boost::shared_ptr<LLStandardSignal> signal(mSignal); for ( ; ! queue.empty(); queue.pop_front()) { (*signal)(queue.front()); } }
size_t SegmentedInputStorage::queueSize(InputQueue inputQueue) const { return queue(inputQueue).size(); }
int main(int argc, char *argv[]) { float *h_psum; // vector to hold partial sum int in_nsteps = INSTEPS; // default number of steps (updated later to device prefereable) int niters = ITERS; // number of iterations int nsteps; float step_size; ::size_t nwork_groups; ::size_t max_size, work_group_size = 8; float pi_res; cl::Buffer d_partial_sums; try { cl_uint deviceIndex = 0; parseArguments(argc, argv, &deviceIndex); // Get list of devices std::vector<cl::Device> devices; unsigned numDevices = getDeviceList(devices); // Check device index in range if (deviceIndex >= numDevices) { std::cout << "Invalid device index (try '--list')\n"; return EXIT_FAILURE; } cl::Device device = devices[deviceIndex]; std::string name; getDeviceName(device, name); std::cout << "\nUsing OpenCL device: " << name << "\n"; std::vector<cl::Device> chosen_device; chosen_device.push_back(device); cl::Context context(chosen_device); cl::CommandQueue queue(context, device); // Create the program object cl::Program program(context, util::loadProgram("../pi_ocl.cl"), true); // Create the kernel object for quering information cl::Kernel ko_pi(program, "pi"); // Get the work group size work_group_size = ko_pi.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device); //printf("wgroup_size = %lu\n", work_group_size); cl::make_kernel<int, float, cl::LocalSpaceArg, cl::Buffer> pi(program, "pi"); // Now that we know the size of the work_groups, we can set the number of work // groups, the actual number of steps, and the step size nwork_groups = in_nsteps/(work_group_size*niters); if ( nwork_groups < 1) { nwork_groups = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); work_group_size=in_nsteps / (nwork_groups*niters); } nsteps = work_group_size * niters * nwork_groups; step_size = 1.0f/static_cast<float>(nsteps); std::vector<float> h_psum(nwork_groups); printf( " %d work groups of size %d. %d Integration steps\n", (int)nwork_groups, (int)work_group_size, nsteps); d_partial_sums = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups); util::Timer timer; // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device pi( cl::EnqueueArgs( queue, cl::NDRange(nsteps / niters), cl::NDRange(work_group_size)), niters, step_size, cl::Local(sizeof(float) * work_group_size), d_partial_sums); cl::copy(queue, d_partial_sums, h_psum.begin(), h_psum.end()); // complete the sum and compute final integral value pi_res = 0.0f; for (unsigned int i = 0; i< nwork_groups; i++) { pi_res += h_psum[i]; } pi_res = pi_res * step_size; //rtime = wtime() - rtime; double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.; printf("\nThe calculation ran in %lf seconds\n", rtime); printf(" pi = %f for %d steps\n", pi_res, nsteps); } catch (cl::Error err) { std::cout << "Exception\n"; std::cerr << "ERROR: " << err.what() << "(" << err_code(err.err()) << ")" << std::endl; } }
void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); bool discard = false; uint32_t inputs_remaining = c->num_inputs; uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; int last_vpm_read_index = -1; /* Map from the QIR ops enum order to QPU unpack bits. */ static const uint32_t unpack_map[] = { QPU_UNPACK_8A, QPU_UNPACK_8B, QPU_UNPACK_8C, QPU_UNPACK_8D, QPU_UNPACK_16A_TO_F32, QPU_UNPACK_16B_TO_F32, }; list_inithead(&c->qpu_inst_list); switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: /* There's a 4-entry FIFO for VPMVCD reads, each of which can * load up to 16 dwords (4 vec4s) per vertex. */ while (inputs_remaining) { uint32_t num_entries = MIN2(inputs_remaining, 16); queue(c, qpu_load_imm_ui(qpu_vrsetup(), vpm_read_offset | 0x00001a00 | ((num_entries & 0xf) << 20))); inputs_remaining -= num_entries; vpm_read_offset += num_entries; vpm_read_fifo_count++; } assert(vpm_read_fifo_count <= 4); queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00)); break; case QSTAGE_FRAG: break; } list_for_each_entry(struct qinst, qinst, &c->instructions, link) { #if 0 fprintf(stderr, "translating qinst to qpu: "); qir_dump_inst(qinst); fprintf(stderr, "\n"); #endif static const struct { uint32_t op; } translate[] = { #define A(name) [QOP_##name] = {QPU_A_##name} #define M(name) [QOP_##name] = {QPU_M_##name} A(FADD), A(FSUB), A(FMIN), A(FMAX), A(FMINABS), A(FMAXABS), A(FTOI), A(ITOF), A(ADD), A(SUB), A(SHL), A(SHR), A(ASR), A(MIN), A(MAX), A(AND), A(OR), A(XOR), A(NOT), M(FMUL), M(MUL24), }; struct qpu_reg src[4]; for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { int index = qinst->src[i].index; switch (qinst->src[i].file) { case QFILE_NULL: src[i] = qpu_rn(0); break; case QFILE_TEMP: src[i] = temp_registers[index]; break; case QFILE_UNIF: src[i] = qpu_unif(); break; case QFILE_VARY: src[i] = qpu_vary(); break; case QFILE_SMALL_IMM: src[i].mux = QPU_MUX_SMALL_IMM; src[i].addr = qpu_encode_small_immediate(qinst->src[i].index); /* This should only have returned a valid * small immediate field, not ~0 for failure. */ assert(src[i].addr <= 47); break; case QFILE_VPM: assert((int)qinst->src[i].index >= last_vpm_read_index); (void)last_vpm_read_index; last_vpm_read_index = qinst->src[i].index; src[i] = qpu_ra(QPU_R_VPM); break; } } struct qpu_reg dst; switch (qinst->dst.file) { case QFILE_NULL: dst = qpu_ra(QPU_W_NOP); break; case QFILE_TEMP: dst = temp_registers[qinst->dst.index]; break; case QFILE_VPM: dst = qpu_ra(QPU_W_VPM); break; case QFILE_VARY: case QFILE_UNIF: case QFILE_SMALL_IMM: assert(!"not reached"); break; } switch (qinst->op) { case QOP_MOV: /* Skip emitting the MOV if it's a no-op. */ if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B || dst.mux != src[0].mux || dst.addr != src[0].addr) { queue(c, qpu_a_MOV(dst, src[0])); } break; case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: case QOP_SEL_X_0_CS: case QOP_SEL_X_0_CC: queue(c, qpu_a_MOV(dst, src[0])); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0())); set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^ 1) + QPU_COND_ZS); break; case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: case QOP_SEL_X_Y_CS: case QOP_SEL_X_Y_CC: queue(c, qpu_a_MOV(dst, src[0])); set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + QPU_COND_ZS); queue(c, qpu_a_MOV(dst, src[1])); set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ 1) + QPU_COND_ZS); break; case QOP_RCP: case QOP_RSQ: case QOP_EXP2: case QOP_LOG2: switch (qinst->op) { case QOP_RCP: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP), src[0])); break; case QOP_RSQ: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT), src[0])); break; case QOP_EXP2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP), src[0])); break; case QOP_LOG2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG), src[0])); break; default: abort(); } if (dst.mux != QPU_MUX_R4) queue(c, qpu_a_MOV(dst, qpu_r4())); break; case QOP_PACK_8888_F: queue(c, qpu_m_MOV(dst, src[0])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, QPU_PACK); break; case QOP_PACK_8A_F: case QOP_PACK_8B_F: case QOP_PACK_8C_F: case QOP_PACK_8D_F: queue(c, qpu_m_MOV(dst, src[0]) | QPU_PM | QPU_SET_FIELD(QPU_PACK_MUL_8A + qinst->op - QOP_PACK_8A_F, QPU_PACK)); break; case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, qpu_ra(QPU_R_XY_PIXEL_COORD))); break; case QOP_FRAG_Y: queue(c, qpu_a_ITOF(dst, qpu_rb(QPU_R_XY_PIXEL_COORD))); break; case QOP_FRAG_REV_FLAG: queue(c, qpu_a_ITOF(dst, qpu_rb(QPU_R_MS_REV_FLAGS))); break; case QOP_FRAG_Z: case QOP_FRAG_W: /* QOP_FRAG_Z/W don't emit instructions, just allocate * the register to the Z/W payload. */ break; case QOP_TLB_DISCARD_SETUP: discard = true; queue(c, qpu_a_MOV(src[0], src[0])); *last_inst(c) |= QPU_SF; break; case QOP_TLB_STENCIL_SETUP: queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0])); break; case QOP_TLB_Z_WRITE: queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0])); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } break; case QOP_TLB_COLOR_READ: queue(c, qpu_NOP()); *last_inst(c) = qpu_set_sig(*last_inst(c), QPU_SIG_COLOR_LOAD); if (dst.mux != QPU_MUX_R4) queue(c, qpu_a_MOV(dst, qpu_r4())); break; case QOP_TLB_COLOR_WRITE: queue(c, qpu_a_MOV(qpu_tlbc(), src[0])); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } break; case QOP_VARY_ADD_C: queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); break; case QOP_TEX_S: case QOP_TEX_T: case QOP_TEX_R: case QOP_TEX_B: queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S + (qinst->op - QOP_TEX_S)), src[0])); break; case QOP_TEX_DIRECT: fixup_raddr_conflict(c, dst, &src[0], &src[1]); queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); break; case QOP_TEX_RESULT: queue(c, qpu_NOP()); *last_inst(c) = qpu_set_sig(*last_inst(c), QPU_SIG_LOAD_TMU0); if (dst.mux != QPU_MUX_R4) queue(c, qpu_a_MOV(dst, qpu_r4())); break; case QOP_UNPACK_8A_F: case QOP_UNPACK_8B_F: case QOP_UNPACK_8C_F: case QOP_UNPACK_8D_F: case QOP_UNPACK_16A_F: case QOP_UNPACK_16B_F: { if (src[0].mux == QPU_MUX_R4) { queue(c, qpu_a_MOV(dst, src[0])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A + (qinst->op - QOP_UNPACK_8A_F), QPU_UNPACK); } else { assert(src[0].mux == QPU_MUX_A); /* Since we're setting the pack bits, if the * destination is in A it would get re-packed. */ queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ? qpu_rb(31) : dst), src[0], src[0])); *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - QOP_UNPACK_8A_F], QPU_UNPACK); if (dst.mux == QPU_MUX_A) { queue(c, qpu_a_MOV(dst, qpu_rb(31))); } } } break; case QOP_UNPACK_8A_I: case QOP_UNPACK_8B_I: case QOP_UNPACK_8C_I: case QOP_UNPACK_8D_I: case QOP_UNPACK_16A_I: case QOP_UNPACK_16B_I: { assert(src[0].mux == QPU_MUX_A); /* Since we're setting the pack bits, if the * destination is in A it would get re-packed. */ queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ? qpu_rb(31) : dst), src[0])); *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - QOP_UNPACK_8A_I], QPU_UNPACK); if (dst.mux == QPU_MUX_A) { queue(c, qpu_a_MOV(dst, qpu_rb(31))); } } break; default: assert(qinst->op < ARRAY_SIZE(translate)); assert(translate[qinst->op].op != 0); /* NOPs */ /* If we have only one source, put it in the second * argument slot as well so that we don't take up * another raddr just to get unused data. */ if (qir_get_op_nsrc(qinst->op) == 1) src[1] = src[0]; fixup_raddr_conflict(c, dst, &src[0], &src[1]); if (qir_is_mul(qinst)) { queue(c, qpu_m_alu2(translate[qinst->op].op, dst, src[0], src[1])); if (qinst->dst.pack) { *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, QPU_PACK); } } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, src[0], src[1])); if (qinst->dst.pack) { assert(dst.mux == QPU_MUX_A); *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, QPU_PACK); } } break; } if (qinst->sf) { assert(!qir_is_multi_instruction(qinst)); *last_inst(c) |= QPU_SF; } } qpu_schedule_instructions(c); /* thread end can't have VPM write or read */ if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_WADDR_ADD) == QPU_W_VPM || QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_WADDR_MUL) == QPU_W_VPM || QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_RADDR_A) == QPU_R_VPM || QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_RADDR_B) == QPU_R_VPM) { qpu_serialize_one_inst(c, qpu_NOP()); } /* thread end can't have uniform read */ if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_RADDR_A) == QPU_R_UNIF || QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], QPU_RADDR_B) == QPU_R_UNIF) { qpu_serialize_one_inst(c, qpu_NOP()); } /* thread end can't have TLB operations */ if (qpu_inst_is_tlb(c->qpu_insts[c->qpu_inst_count - 1])) qpu_serialize_one_inst(c, qpu_NOP()); c->qpu_insts[c->qpu_inst_count - 1] = qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1], QPU_SIG_PROG_END); qpu_serialize_one_inst(c, qpu_NOP()); qpu_serialize_one_inst(c, qpu_NOP()); switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: break; case QSTAGE_FRAG: c->qpu_insts[c->qpu_inst_count - 1] = qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1], QPU_SIG_SCOREBOARD_UNLOCK); break; } if (vc4_debug & VC4_DEBUG_QPU) vc4_dump_program(c); vc4_qpu_validate(c->qpu_insts, c->qpu_inst_count); free(temp_registers); }
int main(void) { std::vector<float> h_a(LENGTH); // a vector std::vector<float> h_b(LENGTH); // b vector std::vector<float> h_c (LENGTH, 0xdeadbeef); // c = a + b, from compute device cl::Buffer d_a; // device memory used for the input a vector cl::Buffer d_b; // device memory used for the input b vector cl::Buffer d_c; // device memory used for the output c vector // Fill vectors a and b with random float values int count = LENGTH; for(int i = 0; i < count; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; } try { // Create a context cl::Context context(DEVICE); // Load in kernel source, creating a program object for the context cl::Program program(context, util::loadProgram("vadd.cl"), true); // Get the command queue cl::CommandQueue queue(context); // Create the kernel functor auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd"); d_a = cl::Buffer(context, begin(h_a), end(h_a), true); d_b = cl::Buffer(context, begin(h_b), end(h_b), true); d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH); util::Timer timer; vadd( cl::EnqueueArgs( queue, cl::NDRange(count)), d_a, d_b, d_c, count); queue.finish(); double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0; printf("\nThe kernels ran in %lf seconds\n", rtime); cl::copy(queue, d_c, begin(h_c), end(h_c)); // Test the results int correct = 0; float tmp; for(int i = 0; i < count; i++) { tmp = h_a[i] + h_b[i]; // expected value for d_c[i] tmp -= h_c[i]; // compute errors if(tmp*tmp < TOL*TOL) { // correct if square deviation is less correct++; // than tolerance squared } else { printf( " tmp %f h_a %f h_b %f h_c %f \n", tmp, h_a[i], h_b[i], h_c[i]); } } // summarize results printf( "vector add to find C = A+B: %d out of %d results were correct.\n", correct, count); } catch (cl::Error err) { std::cout << "Exception\n"; std::cerr << "ERROR: " << err.what() << "(" << err_code(err.err()) << ")" << std::endl; } }
std::unique_ptr<RenderQueue> BREW::CreateSpinButtonDrawable( std::shared_ptr<const SpinButton> spinbutton ) const { auto border_color = GetProperty<sf::Color>( "BorderColor", spinbutton ); auto background_color = GetProperty<sf::Color>( "BackgroundColor", spinbutton ); auto text_color = GetProperty<sf::Color>( "Color", spinbutton ); auto cursor_color = GetProperty<sf::Color>( "Color", spinbutton ); auto text_padding = GetProperty<float>( "Padding", spinbutton ); auto cursor_thickness = GetProperty<float>( "Thickness", spinbutton ); auto border_width = GetProperty<float>( "BorderWidth", spinbutton ); auto border_color_shift = GetProperty<int>( "BorderColorShift", spinbutton ); const auto& font_name = GetProperty<std::string>( "FontName", spinbutton ); const auto& font = GetResourceManager().GetFont( font_name ); auto font_size = GetProperty<unsigned int>( "FontSize", spinbutton ); auto stepper_aspect_ratio = GetProperty<float>( "StepperAspectRatio", spinbutton ); auto stepper_color = GetProperty<sf::Color>( "StepperBackgroundColor", spinbutton ); auto stepper_border_color = GetProperty<sf::Color>( "BorderColor", spinbutton ); auto stepper_arrow_color = GetProperty<sf::Color>( "StepperArrowColor", spinbutton ); std::unique_ptr<RenderQueue> queue( new RenderQueue ); // Pane. queue->Add( Renderer::Get().CreatePane( sf::Vector2f( 0.f, 0.f ), sf::Vector2f( spinbutton->GetAllocation().width, spinbutton->GetAllocation().height ), border_width, background_color, border_color, -border_color_shift ) ); auto button_width = ( spinbutton->GetAllocation().height / 2.f ) * stepper_aspect_ratio; // Up Stepper. queue->Add( Renderer::Get().CreatePane( sf::Vector2f( spinbutton->GetAllocation().width - button_width - border_width, border_width ), sf::Vector2f( button_width, spinbutton->GetAllocation().height / 2.f - border_width ), border_width, stepper_color, stepper_border_color, spinbutton->IsIncreaseStepperPressed() ? -border_color_shift : border_color_shift ) ); // Up Stepper Triangle. queue->Add( Renderer::Get().CreateTriangle( sf::Vector2f( spinbutton->GetAllocation().width - button_width / 2.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 6.f ), sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f * 3.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 3.f ), sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f - border_width, ( spinbutton->IsIncreaseStepperPressed() ? 1.f : 0.f ) + border_width + spinbutton->GetAllocation().height / 3.f ), stepper_arrow_color ) ); // Down Stepper. queue->Add( Renderer::Get().CreatePane( sf::Vector2f( spinbutton->GetAllocation().width - button_width - border_width, spinbutton->GetAllocation().height / 2.f ), sf::Vector2f( button_width, spinbutton->GetAllocation().height / 2.f - border_width ), border_width, stepper_color, stepper_border_color, spinbutton->IsDecreaseStepperPressed() ? -border_color_shift : border_color_shift ) ); // Down Stepper Triangle. queue->Add( Renderer::Get().CreateTriangle( sf::Vector2f( spinbutton->GetAllocation().width - button_width / 2.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 6.f ), sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 3.f ), sf::Vector2f( spinbutton->GetAllocation().width - button_width / 4.f * 3.f - border_width, ( spinbutton->IsDecreaseStepperPressed() ? 1.f : 0.f ) + spinbutton->GetAllocation().height - border_width - spinbutton->GetAllocation().height / 3.f ), stepper_arrow_color ) ); auto line_height = GetFontLineHeight( *font, font_size ); sf::Text vis_label( spinbutton->GetVisibleText(), *font, font_size ); vis_label.setFillColor( text_color ); vis_label.setPosition( text_padding, spinbutton->GetAllocation().height / 2.f - line_height / 2.f ); queue->Add( Renderer::Get().CreateText( vis_label ) ); // Draw cursor if spinbutton is active and cursor is visible. if( spinbutton->HasFocus() && spinbutton->IsCursorVisible() ) { sf::String cursor_string( spinbutton->GetVisibleText() ); if( spinbutton->GetCursorPosition() - spinbutton->GetVisibleOffset() < static_cast<int>( cursor_string.getSize() ) ) { cursor_string.erase( static_cast<std::size_t>( spinbutton->GetCursorPosition() - spinbutton->GetVisibleOffset() ), cursor_string.getSize() ); } // Get metrics. sf::Vector2f metrics( GetTextStringMetrics( cursor_string, *font, font_size ) ); queue->Add( Renderer::Get().CreateRect( sf::FloatRect( metrics.x + text_padding, spinbutton->GetAllocation().height / 2.f - line_height / 2.f, cursor_thickness, line_height ), cursor_color ) ); } return queue; }
bool execute() { queue().clear(); return false; }
void null_modem_device::device_reset() { update_serial(0); queue(); }
void null_modem_device::tra_complete() { queue(); }
void io_looper_task_worker::loop() { io_looper_task_queue* looper = dynamic_cast<io_looper_task_queue*>(queue()); looper->loop_worker(); }
io_looper_task_worker::io_looper_task_worker(task_worker_pool* pool, task_queue* q, int index, task_worker* inner_provider) : task_worker(pool, q, index, inner_provider) { io_looper_task_queue* looper = dynamic_cast<io_looper_task_queue*>(queue()); looper->start(nullptr, 0); }