/** * Rasterize/execute all bins within a scene. * Called per thread. */ static void rasterize_scene(struct lp_rasterizer_task *task, struct lp_scene *scene) { task->scene = scene; if (!task->rast->no_rast && !scene->discard) { /* loop over scene bins, rasterize each */ { struct cmd_bin *bin; int i, j; assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { if (!is_empty_bin( bin )) rasterize_bin(task, bin, i, j); } } } if (scene->fence) { lp_fence_signal(scene->fence); } task->scene = NULL; }
/** * Rasterize/execute all bins within a scene. * Called per thread. */ static void rasterize_scene(struct lp_rasterizer_task *task, struct lp_scene *scene) { task->scene = scene; /* loop over scene bins, rasterize each */ #if 0 { unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); rasterize_bin(task, bin, i, j); } } } #else { struct cmd_bin *bin; assert(scene); while ((bin = lp_scene_bin_iter_next(scene))) { if (!is_empty_bin( bin )) rasterize_bin(task, bin); } } #endif if (scene->fence) { lp_fence_signal(scene->fence); } task->scene = NULL; }
/** * Rasterize/execute all bins within a scene. * Called per thread. */ static void rasterize_scene(struct lp_rasterizer_task *task, struct lp_scene *scene) { task->scene = scene; /* Clear the cache tags. This should not always be necessary but simpler for now. */ #if LP_USE_TEXTURE_CACHE memset(task->thread_data.cache->cache_tags, 0, sizeof(task->thread_data.cache->cache_tags)); #if LP_BUILD_FORMAT_CACHE_DEBUG task->thread_data.cache->cache_access_total = 0; task->thread_data.cache->cache_access_miss = 0; #endif #endif if (!task->rast->no_rast && !scene->discard) { /* loop over scene bins, rasterize each */ { struct cmd_bin *bin; int i, j; assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { if (!is_empty_bin( bin )) rasterize_bin(task, bin, i, j); } } } #if LP_BUILD_FORMAT_CACHE_DEBUG { uint64_t total, miss; total = task->thread_data.cache->cache_access_total; miss = task->thread_data.cache->cache_access_miss; if (total) { debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", task->thread_index, (long long unsigned)total, (long long unsigned)miss, (float)(total - miss)/(float)total); } } #endif if (scene->fence) { lp_fence_signal(scene->fence); } task->scene = NULL; }
/** * Try to allocate an entire bin. Flows that are not entirely allocated are written * out to the corresponding outgoing_bin for this core. */ static void try_allocation_bin(struct bin *in_bin, struct admission_core_state *core, struct admissible_status *status) { int rc; while (!is_empty_bin(in_bin)) { struct backlog_edge *edge = peek_head_bin(in_bin); assert(edge != NULL); uint16_t src = edge->src; uint16_t dst = edge->dst; rc = try_allocation(src, dst, core, status); if (rc == 1) { // There is remaining backlog in this flow uint16_t bin_index = bin_index_from_src_dst(status, src, dst); enqueue_bin(core->outgoing_bins[bin_index], src, dst); } dequeue_bin(in_bin); } }
// Determine admissible traffic for one timeslot from queue_in // Puts unallocated traffic in queue_out // Allocate BATCH_SIZE timeslots at once void get_admissible_traffic(struct admission_core_state *core, struct admissible_status *status, struct admitted_traffic **admitted, uint64_t first_timeslot, uint32_t tslot_mul, uint32_t tslot_shift) { assert(status != NULL); // TODO: use multiple cores struct fp_ring *queue_in = core->q_bin_in; struct fp_ring *queue_out = core->q_bin_out; // Initialize this core for a new batch of processing alloc_core_reset(core, status, admitted); struct bin *bin_in; uint16_t bin; uint16_t i; for (i = 0; i < NUM_BINS; i++) { struct bin *out_bin = core->outgoing_bins[i]; assert(is_empty_bin(out_bin) && (out_bin->head == 0)); struct bin *new_request_bin = core->new_request_bins[i]; assert(is_empty_bin(new_request_bin) && (new_request_bin->head == 0)); } process_new_requests(status, core, 0); for (bin = 0; bin < NUM_BINS; bin++) { /* process new requests until bin_in arrives */ while (fp_ring_dequeue(queue_in, (void **)&bin_in) != 0) { process_new_requests(status, core, bin); status->stat.wait_for_q_bin_in++; } try_allocation_bin(bin_in, core, status); // process new requests of this size try_allocation_bin(core->new_request_bins[bin], core, status); // pass outgoing bin along to next core if (bin >= BATCH_SIZE) { while(fp_ring_enqueue(queue_out, core->outgoing_bins[bin - BATCH_SIZE]) == -ENOBUFS) status->stat.wait_for_space_in_q_bin_out++; core->outgoing_bins[bin - BATCH_SIZE] = NULL; } // store bin_in in temporary_bins core->temporary_bins[bin] = bin_in; } /* Output admitted traffic, but continue to process new requests until time to output */ for (bin = 0; bin < BATCH_SIZE; bin++) { /* wait for start time */ if (bin % 4 == 0) { uint64_t start_timeslot = first_timeslot + bin; uint64_t now_timeslot; do { /* process requests */ process_new_requests(status, core, bin); /* at least until the next core finishes allocating */ /* and we reach the start time */ now_timeslot = (fp_get_time_ns() * tslot_mul) >> tslot_shift; status->stat.pacing_wait++; } while (!core->is_head || (now_timeslot < start_timeslot)); } /* enqueue the allocated traffic for this timeslot */ while(fp_ring_enqueue(status->q_admitted_out, core->admitted[bin]) == -ENOBUFS) status->stat.wait_for_space_in_q_admitted_out++; /* disallow further allocations to that timeslot */ core->batch_state.allowed_mask <<= 1; }