static void clear_counters (perfmon_main_t * pm) { int i, j; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; vlib_node_t *n; vlib_worker_thread_barrier_sync (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nm = &stat_vm->node_main; /* Clear the node runtime perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; vlib_node_sync_stats (stat_vm, n); } /* And clear the node perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; n->stats_total.perf_counter0_ticks = 0; n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; n->stats_last_clear.perf_counter0_ticks = 0; n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); }
static int nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size, f64 drop_fraction) { u64 total_buffer_size_in_bytes, per_worker_buffer_size; u64 wheel_slots_per_worker; int i; int num_workers = vlib_num_workers (); u32 pagesize = getpagesize (); vlib_main_t *vm = nsm->vlib_main; if (bandwidth == 0.0) return VNET_API_ERROR_INVALID_VALUE; if (delay == 0.0) return VNET_API_ERROR_INVALID_VALUE_2; if (packet_size < 64.0 || packet_size > 9000.0) return VNET_API_ERROR_INVALID_VALUE_3; /* Toss the old wheel(s)... */ if (nsm->is_configured) { for (i = 0; i < vec_len (nsm->wheel_by_thread); i++) { nsim_wheel_t *wp = nsm->wheel_by_thread[i]; munmap (wp, nsm->mmap_size); nsm->wheel_by_thread[i] = 0; } } nsm->delay = delay; nsm->drop_fraction = drop_fraction; /* delay in seconds, bandwidth in bits/sec */ total_buffer_size_in_bytes = (u32) ((delay * bandwidth) / 8.0) + 0.5; /* * Work out how much buffering each worker needs, assuming decent * RSS behavior. */ if (num_workers) per_worker_buffer_size = total_buffer_size_in_bytes / num_workers; else per_worker_buffer_size = total_buffer_size_in_bytes; wheel_slots_per_worker = per_worker_buffer_size / packet_size; wheel_slots_per_worker++; /* Save these for the show command */ nsm->bandwidth = bandwidth; nsm->packet_size = packet_size; vec_validate (nsm->wheel_by_thread, num_workers); /* Initialize the output scheduler wheels */ for (i = num_workers ? 1 : 0; i < num_workers + 1; i++) { nsim_wheel_t *wp; nsm->mmap_size = sizeof (nsim_wheel_t) + wheel_slots_per_worker * sizeof (nsim_wheel_entry_t); nsm->mmap_size += pagesize - 1; nsm->mmap_size &= ~(pagesize - 1); wp = clib_mem_vm_alloc (nsm->mmap_size); ASSERT (wp != 0); wp->wheel_size = wheel_slots_per_worker; wp->cursize = 0; wp->head = 0; wp->tail = 0; wp->entries = (void *) (wp + 1); nsm->wheel_by_thread[i] = wp; } vlib_worker_thread_barrier_sync (vm); /* turn on the ring scrapers */ for (i = num_workers ? 1 : 0; i < num_workers + 1; i++) { vlib_main_t *this_vm = vlib_mains[i]; vlib_node_set_state (this_vm, nsim_input_node.index, VLIB_NODE_STATE_POLLING); } vlib_worker_thread_barrier_release (vm); nsm->is_configured = 1; return 0; }
void scrape_and_clear_counters (perfmon_main_t * pm) { int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; vlib_node_t ***node_dups = 0; vlib_node_t **nodes; vlib_node_t *n; perfmon_capture_t *c; perfmon_event_config_t *current_event; uword *p; u8 *counter_name; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ vlib_worker_thread_barrier_sync (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nm = &stat_vm->node_main; for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; vlib_node_sync_stats (stat_vm, n); } nodes = 0; vec_validate (nodes, vec_len (nm->nodes) - 1); vec_add1 (node_dups, nodes); /* Snapshoot and clear the per-node perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); n->stats_total.perf_counter0_ticks = 0; n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; n->stats_last_clear.perf_counter0_ticks = 0; n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nodes = node_dups[j]; for (i = 0; i < vec_len (nodes); i++) { u8 *capture_name; n = nodes[i]; if (n->stats_total.perf_counter0_ticks == 0 && n->stats_total.perf_counter1_ticks == 0) goto skip_this_node; for (k = 0; k < 2; k++) { u64 counter_value, counter_last_clear; /* * We collect 2 counters at once, except for the * last counter when the user asks for an odd number of * counters */ if ((pm->current_event + k) >= vec_len (pm->single_events_to_collect)) break; if (k == 0) { counter_value = n->stats_total.perf_counter0_ticks; counter_last_clear = n->stats_last_clear.perf_counter0_ticks; } else { counter_value = n->stats_total.perf_counter1_ticks; counter_last_clear = n->stats_last_clear.perf_counter1_ticks; } capture_name = format (0, "t%d-%v%c", j, n->name, 0); p = hash_get_mem (pm->capture_by_thread_and_node_name, capture_name); if (p == 0) { pool_get (pm->capture_pool, c); memset (c, 0, sizeof (*c)); c->thread_and_node_name = capture_name; hash_set_mem (pm->capture_by_thread_and_node_name, capture_name, c - pm->capture_pool); } else { c = pool_elt_at_index (pm->capture_pool, p[0]); vec_free (capture_name); } /* Snapshoot counters, etc. into the capture */ current_event = pm->single_events_to_collect + pm->current_event + k; counter_name = (u8 *) current_event->name; vectors_this_counter = n->stats_total.perf_counter_vectors - n->stats_last_clear.perf_counter_vectors; vec_add1 (c->counter_names, counter_name); vec_add1 (c->counter_values, counter_value - counter_last_clear); vec_add1 (c->vectors_this_counter, vectors_this_counter); } skip_this_node: clib_mem_free (n); } vec_free (nodes); } vec_free (node_dups); }