static int nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size, f64 drop_fraction) { u64 total_buffer_size_in_bytes, per_worker_buffer_size; u64 wheel_slots_per_worker; int i; int num_workers = vlib_num_workers (); u32 pagesize = getpagesize (); vlib_main_t *vm = nsm->vlib_main; if (bandwidth == 0.0) return VNET_API_ERROR_INVALID_VALUE; if (delay == 0.0) return VNET_API_ERROR_INVALID_VALUE_2; if (packet_size < 64.0 || packet_size > 9000.0) return VNET_API_ERROR_INVALID_VALUE_3; /* Toss the old wheel(s)... */ if (nsm->is_configured) { for (i = 0; i < vec_len (nsm->wheel_by_thread); i++) { nsim_wheel_t *wp = nsm->wheel_by_thread[i]; munmap (wp, nsm->mmap_size); nsm->wheel_by_thread[i] = 0; } } nsm->delay = delay; nsm->drop_fraction = drop_fraction; /* delay in seconds, bandwidth in bits/sec */ total_buffer_size_in_bytes = (u32) ((delay * bandwidth) / 8.0) + 0.5; /* * Work out how much buffering each worker needs, assuming decent * RSS behavior. */ if (num_workers) per_worker_buffer_size = total_buffer_size_in_bytes / num_workers; else per_worker_buffer_size = total_buffer_size_in_bytes; wheel_slots_per_worker = per_worker_buffer_size / packet_size; wheel_slots_per_worker++; /* Save these for the show command */ nsm->bandwidth = bandwidth; nsm->packet_size = packet_size; vec_validate (nsm->wheel_by_thread, num_workers); /* Initialize the output scheduler wheels */ for (i = num_workers ? 1 : 0; i < num_workers + 1; i++) { nsim_wheel_t *wp; nsm->mmap_size = sizeof (nsim_wheel_t) + wheel_slots_per_worker * sizeof (nsim_wheel_entry_t); nsm->mmap_size += pagesize - 1; nsm->mmap_size &= ~(pagesize - 1); wp = clib_mem_vm_alloc (nsm->mmap_size); ASSERT (wp != 0); wp->wheel_size = wheel_slots_per_worker; wp->cursize = 0; wp->head = 0; wp->tail = 0; wp->entries = (void *) (wp + 1); nsm->wheel_by_thread[i] = wp; } vlib_worker_thread_barrier_sync (vm); /* turn on the ring scrapers */ for (i = num_workers ? 1 : 0; i < num_workers + 1; i++) { vlib_main_t *this_vm = vlib_mains[i]; vlib_node_set_state (this_vm, nsim_input_node.index, VLIB_NODE_STATE_POLLING); } vlib_worker_thread_barrier_release (vm); nsm->is_configured = 1; return 0; }
void scrape_and_clear_counters (perfmon_main_t * pm) { int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; vlib_node_t ***node_dups = 0; vlib_node_t **nodes; vlib_node_t *n; perfmon_capture_t *c; perfmon_event_config_t *current_event; uword *p; u8 *counter_name; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ vlib_worker_thread_barrier_sync (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nm = &stat_vm->node_main; for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; vlib_node_sync_stats (stat_vm, n); } nodes = 0; vec_validate (nodes, vec_len (nm->nodes) - 1); vec_add1 (node_dups, nodes); /* Snapshoot and clear the per-node perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); n->stats_total.perf_counter0_ticks = 0; n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; n->stats_last_clear.perf_counter0_ticks = 0; n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nodes = node_dups[j]; for (i = 0; i < vec_len (nodes); i++) { u8 *capture_name; n = nodes[i]; if (n->stats_total.perf_counter0_ticks == 0 && n->stats_total.perf_counter1_ticks == 0) goto skip_this_node; for (k = 0; k < 2; k++) { u64 counter_value, counter_last_clear; /* * We collect 2 counters at once, except for the * last counter when the user asks for an odd number of * counters */ if ((pm->current_event + k) >= vec_len (pm->single_events_to_collect)) break; if (k == 0) { counter_value = n->stats_total.perf_counter0_ticks; counter_last_clear = n->stats_last_clear.perf_counter0_ticks; } else { counter_value = n->stats_total.perf_counter1_ticks; counter_last_clear = n->stats_last_clear.perf_counter1_ticks; } capture_name = format (0, "t%d-%v%c", j, n->name, 0); p = hash_get_mem (pm->capture_by_thread_and_node_name, capture_name); if (p == 0) { pool_get (pm->capture_pool, c); memset (c, 0, sizeof (*c)); c->thread_and_node_name = capture_name; hash_set_mem (pm->capture_by_thread_and_node_name, capture_name, c - pm->capture_pool); } else { c = pool_elt_at_index (pm->capture_pool, p[0]); vec_free (capture_name); } /* Snapshoot counters, etc. into the capture */ current_event = pm->single_events_to_collect + pm->current_event + k; counter_name = (u8 *) current_event->name; vectors_this_counter = n->stats_total.perf_counter_vectors - n->stats_last_clear.perf_counter_vectors; vec_add1 (c->counter_names, counter_name); vec_add1 (c->counter_values, counter_value - counter_last_clear); vec_add1 (c->vectors_this_counter, vectors_this_counter); } skip_this_node: clib_mem_free (n); } vec_free (nodes); } vec_free (node_dups); }
/* Given next hop vector is over-written with normalized one with sorted weights and with weights corresponding to the number of adjacencies for each next hop. Returns number of adjacencies in block. */ static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm, ip_multipath_next_hop_t * raw_next_hops, ip_multipath_next_hop_t ** normalized_next_hops) { ip_multipath_next_hop_t * nhs; uword n_nhs, n_adj, n_adj_left, i; f64 sum_weight, norm, error; n_nhs = vec_len (raw_next_hops); ASSERT (n_nhs > 0); if (n_nhs == 0) return 0; /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ nhs = *normalized_next_hops; vec_validate (nhs, 2*n_nhs - 1); /* Fast path: 1 next hop in block. */ n_adj = n_nhs; if (n_nhs == 1) { nhs[0] = raw_next_hops[0]; nhs[0].weight = 1; _vec_len (nhs) = 1; goto done; } else if (n_nhs == 2) { int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; /* Fast sort. */ nhs[0] = raw_next_hops[cmp]; nhs[1] = raw_next_hops[cmp ^ 1]; /* Fast path: equal cost multipath with 2 next hops. */ if (nhs[0].weight == nhs[1].weight) { nhs[0].weight = nhs[1].weight = 1; _vec_len (nhs) = 2; goto done; } } else { memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); } /* Find total weight to normalize weights. */ sum_weight = 0; for (i = 0; i < n_nhs; i++) sum_weight += nhs[i].weight; /* In the unlikely case that all weights are given as 0, set them all to 1. */ if (sum_weight == 0) { for (i = 0; i < n_nhs; i++) nhs[i].weight = 1; sum_weight = n_nhs; } /* Save copies of all next hop weights to avoid being overwritten in loop below. */ for (i = 0; i < n_nhs; i++) nhs[n_nhs + i].weight = nhs[i].weight; /* Try larger and larger power of 2 sized adjacency blocks until we find one where traffic flows to within 1% of specified weights. */ for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) { error = 0; norm = n_adj / sum_weight; n_adj_left = n_adj; for (i = 0; i < n_nhs; i++) { f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */ word n = flt_round_nearest (nf); n = n > n_adj_left ? n_adj_left : n; n_adj_left -= n; error += fabs (nf - n); nhs[i].weight = n; } nhs[0].weight += n_adj_left; /* Less than 5% average error per adjacency with this size adjacency block? */ if (error <= lm->multipath_next_hop_error_tolerance*n_adj) { /* Truncate any next hops with zero weight. */ _vec_len (nhs) = i; break; } } done: /* Save vector for next call. */ *normalized_next_hops = nhs; return n_adj; }
uword ssvm_eth_interface_tx (ssvm_private_t * intfc, char *buf_to_send, int len_to_send) // , // vlib_frame_t * f) { ssvm_eth_main_t * em = &ssvm_eth_main; ssvm_shared_header_t * sh = intfc->sh; unix_shared_memory_queue_t * q; u32 * from; u32 n_left; ssvm_eth_queue_elt_t * elts, * elt, * prev_elt; u32 my_pid = intfc->my_pid; vlib_buffer_t * b0; u32 bi0; u32 size_this_buffer; u32 chunks_this_buffer; u8 i_am_master = intfc->i_am_master; u32 elt_index; int is_ring_full, interface_down; int i; volatile u32 *queue_lock; u32 n_to_alloc = VLIB_FRAME_SIZE; u32 n_allocated, n_present_in_cache, n_available; u32 * elt_indices; if (i_am_master) q = (unix_shared_memory_queue_t *)sh->opaque [TO_SLAVE_Q_INDEX]; else q = (unix_shared_memory_queue_t *)sh->opaque [TO_MASTER_Q_INDEX]; queue_lock = (u32 *) q; // from = vlib_frame_vector_args (f); //n_left = f->n_vectors; n_left = 1; is_ring_full = 0; interface_down = 0; n_present_in_cache = vec_len (em->chunk_cache); #ifdef XXX /* admin / link up/down check */ if (sh->opaque [MASTER_ADMIN_STATE_INDEX] == 0 || sh->opaque [SLAVE_ADMIN_STATE_INDEX] == 0) { interface_down = 1; goto out; } #endif ssvm_lock (sh, my_pid, 1); elts = (ssvm_eth_queue_elt_t *) (sh->opaque [CHUNK_POOL_INDEX]); elt_indices = (u32 *) (sh->opaque [CHUNK_POOL_FREELIST_INDEX]); n_available = (u32) pointer_to_uword(sh->opaque [CHUNK_POOL_NFREE]); printf("AYXX: n_left: %d, n_present_in_cache: %d\n", n_left, n_present_in_cache); if (n_present_in_cache < n_left*2) { vec_validate (em->chunk_cache, n_to_alloc + n_present_in_cache - 1); n_allocated = n_to_alloc < n_available ? n_to_alloc : n_available; printf("AYXX: n_allocated: %d, n_to_alloc: %d, n_available: %d\n", n_allocated, n_to_alloc, n_available); if (PREDICT_TRUE(n_allocated > 0)) { memcpy (&em->chunk_cache[n_present_in_cache], &elt_indices[n_available - n_allocated], sizeof(u32) * n_allocated); } n_present_in_cache += n_allocated; n_available -= n_allocated; sh->opaque [CHUNK_POOL_NFREE] = uword_to_pointer(n_available, void*); _vec_len (em->chunk_cache) = n_present_in_cache; }