Esempio n. 1
0
File: nsim.c Progetto: chrisy/vpp
static int
nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
		f64 drop_fraction)
{
  u64 total_buffer_size_in_bytes, per_worker_buffer_size;
  u64 wheel_slots_per_worker;
  int i;
  int num_workers = vlib_num_workers ();
  u32 pagesize = getpagesize ();
  vlib_main_t *vm = nsm->vlib_main;

  if (bandwidth == 0.0)
    return VNET_API_ERROR_INVALID_VALUE;

  if (delay == 0.0)
    return VNET_API_ERROR_INVALID_VALUE_2;

  if (packet_size < 64.0 || packet_size > 9000.0)
    return VNET_API_ERROR_INVALID_VALUE_3;

  /* Toss the old wheel(s)... */
  if (nsm->is_configured)
    {
      for (i = 0; i < vec_len (nsm->wheel_by_thread); i++)
	{
	  nsim_wheel_t *wp = nsm->wheel_by_thread[i];
	  munmap (wp, nsm->mmap_size);
	  nsm->wheel_by_thread[i] = 0;
	}
    }

  nsm->delay = delay;
  nsm->drop_fraction = drop_fraction;

  /* delay in seconds, bandwidth in bits/sec */
  total_buffer_size_in_bytes = (u32) ((delay * bandwidth) / 8.0) + 0.5;

  /*
   * Work out how much buffering each worker needs, assuming decent
   * RSS behavior.
   */
  if (num_workers)
    per_worker_buffer_size = total_buffer_size_in_bytes / num_workers;
  else
    per_worker_buffer_size = total_buffer_size_in_bytes;

  wheel_slots_per_worker = per_worker_buffer_size / packet_size;
  wheel_slots_per_worker++;

  /* Save these for the show command */
  nsm->bandwidth = bandwidth;
  nsm->packet_size = packet_size;

  vec_validate (nsm->wheel_by_thread, num_workers);

  /* Initialize the output scheduler wheels */
  for (i = num_workers ? 1 : 0; i < num_workers + 1; i++)
    {
      nsim_wheel_t *wp;

      nsm->mmap_size = sizeof (nsim_wheel_t)
	+ wheel_slots_per_worker * sizeof (nsim_wheel_entry_t);

      nsm->mmap_size += pagesize - 1;
      nsm->mmap_size &= ~(pagesize - 1);

      wp = clib_mem_vm_alloc (nsm->mmap_size);
      ASSERT (wp != 0);
      wp->wheel_size = wheel_slots_per_worker;
      wp->cursize = 0;
      wp->head = 0;
      wp->tail = 0;
      wp->entries = (void *) (wp + 1);
      nsm->wheel_by_thread[i] = wp;
    }

  vlib_worker_thread_barrier_sync (vm);

  /* turn on the ring scrapers */
  for (i = num_workers ? 1 : 0; i < num_workers + 1; i++)
    {
      vlib_main_t *this_vm = vlib_mains[i];

      vlib_node_set_state (this_vm, nsim_input_node.index,
			   VLIB_NODE_STATE_POLLING);
    }

  vlib_worker_thread_barrier_release (vm);

  nsm->is_configured = 1;
  return 0;
}
Esempio n. 2
0
void
scrape_and_clear_counters (perfmon_main_t * pm)
{
  int i, j, k;
  vlib_main_t *vm = pm->vlib_main;
  vlib_main_t *stat_vm;
  vlib_node_main_t *nm;
  vlib_node_t ***node_dups = 0;
  vlib_node_t **nodes;
  vlib_node_t *n;
  perfmon_capture_t *c;
  perfmon_event_config_t *current_event;
  uword *p;
  u8 *counter_name;
  u64 vectors_this_counter;

  /* snapshoot the nodes, including pm counters */
  vlib_worker_thread_barrier_sync (vm);

  for (j = 0; j < vec_len (vlib_mains); j++)
    {
      stat_vm = vlib_mains[j];
      if (stat_vm == 0)
	continue;

      nm = &stat_vm->node_main;

      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  vlib_node_sync_stats (stat_vm, n);
	}

      nodes = 0;
      vec_validate (nodes, vec_len (nm->nodes) - 1);
      vec_add1 (node_dups, nodes);

      /* Snapshoot and clear the per-node perfmon counters */
      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  nodes[i] = clib_mem_alloc (sizeof (*n));
	  clib_memcpy_fast (nodes[i], n, sizeof (*n));
	  n->stats_total.perf_counter0_ticks = 0;
	  n->stats_total.perf_counter1_ticks = 0;
	  n->stats_total.perf_counter_vectors = 0;
	  n->stats_last_clear.perf_counter0_ticks = 0;
	  n->stats_last_clear.perf_counter1_ticks = 0;
	  n->stats_last_clear.perf_counter_vectors = 0;
	}
    }

  vlib_worker_thread_barrier_release (vm);

  for (j = 0; j < vec_len (vlib_mains); j++)
    {
      stat_vm = vlib_mains[j];
      if (stat_vm == 0)
	continue;

      nodes = node_dups[j];

      for (i = 0; i < vec_len (nodes); i++)
	{
	  u8 *capture_name;

	  n = nodes[i];

	  if (n->stats_total.perf_counter0_ticks == 0 &&
	      n->stats_total.perf_counter1_ticks == 0)
	    goto skip_this_node;

	  for (k = 0; k < 2; k++)
	    {
	      u64 counter_value, counter_last_clear;

	      /*
	       * We collect 2 counters at once, except for the
	       * last counter when the user asks for an odd number of
	       * counters
	       */
	      if ((pm->current_event + k)
		  >= vec_len (pm->single_events_to_collect))
		break;

	      if (k == 0)
		{
		  counter_value = n->stats_total.perf_counter0_ticks;
		  counter_last_clear =
		    n->stats_last_clear.perf_counter0_ticks;
		}
	      else
		{
		  counter_value = n->stats_total.perf_counter1_ticks;
		  counter_last_clear =
		    n->stats_last_clear.perf_counter1_ticks;
		}

	      capture_name = format (0, "t%d-%v%c", j, n->name, 0);

	      p = hash_get_mem (pm->capture_by_thread_and_node_name,
				capture_name);

	      if (p == 0)
		{
		  pool_get (pm->capture_pool, c);
		  memset (c, 0, sizeof (*c));
		  c->thread_and_node_name = capture_name;
		  hash_set_mem (pm->capture_by_thread_and_node_name,
				capture_name, c - pm->capture_pool);
		}
	      else
		{
		  c = pool_elt_at_index (pm->capture_pool, p[0]);
		  vec_free (capture_name);
		}

	      /* Snapshoot counters, etc. into the capture */
	      current_event = pm->single_events_to_collect
		+ pm->current_event + k;
	      counter_name = (u8 *) current_event->name;
	      vectors_this_counter = n->stats_total.perf_counter_vectors -
		n->stats_last_clear.perf_counter_vectors;

	      vec_add1 (c->counter_names, counter_name);
	      vec_add1 (c->counter_values,
			counter_value - counter_last_clear);
	      vec_add1 (c->vectors_this_counter, vectors_this_counter);
	    }
	skip_this_node:
	  clib_mem_free (n);
	}
      vec_free (nodes);
    }
  vec_free (node_dups);
}
Esempio n. 3
0
/* Given next hop vector is over-written with normalized one with sorted weights and
   with weights corresponding to the number of adjacencies for each next hop.
   Returns number of adjacencies in block. */
static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm,
					     ip_multipath_next_hop_t * raw_next_hops,
					     ip_multipath_next_hop_t ** normalized_next_hops)
{
  ip_multipath_next_hop_t * nhs;
  uword n_nhs, n_adj, n_adj_left, i;
  f64 sum_weight, norm, error;

  n_nhs = vec_len (raw_next_hops);
  ASSERT (n_nhs > 0);
  if (n_nhs == 0)
    return 0;

  /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
  nhs = *normalized_next_hops;
  vec_validate (nhs, 2*n_nhs - 1);

  /* Fast path: 1 next hop in block. */
  n_adj = n_nhs;
  if (n_nhs == 1)
    {
      nhs[0] = raw_next_hops[0];
      nhs[0].weight = 1;
      _vec_len (nhs) = 1;
      goto done;
    }

  else if (n_nhs == 2)
    {
      int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;

      /* Fast sort. */
      nhs[0] = raw_next_hops[cmp];
      nhs[1] = raw_next_hops[cmp ^ 1];

      /* Fast path: equal cost multipath with 2 next hops. */
      if (nhs[0].weight == nhs[1].weight)
	{
	  nhs[0].weight = nhs[1].weight = 1;
	  _vec_len (nhs) = 2;
	  goto done;
	}
    }
  else
    {
      memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
      qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
    }

  /* Find total weight to normalize weights. */
  sum_weight = 0;
  for (i = 0; i < n_nhs; i++)
    sum_weight += nhs[i].weight;

  /* In the unlikely case that all weights are given as 0, set them all to 1. */
  if (sum_weight == 0)
    {
      for (i = 0; i < n_nhs; i++)
	nhs[i].weight = 1;
      sum_weight = n_nhs;
    }

  /* Save copies of all next hop weights to avoid being overwritten in loop below. */
  for (i = 0; i < n_nhs; i++)
    nhs[n_nhs + i].weight = nhs[i].weight;

  /* Try larger and larger power of 2 sized adjacency blocks until we
     find one where traffic flows to within 1% of specified weights. */
  for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
    {
      error = 0;

      norm = n_adj / sum_weight;
      n_adj_left = n_adj;
      for (i = 0; i < n_nhs; i++)
	{
	  f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */
	  word n = flt_round_nearest (nf);

	  n = n > n_adj_left ? n_adj_left : n;
	  n_adj_left -= n;
	  error += fabs (nf - n);
	  nhs[i].weight = n;
	}
	
      nhs[0].weight += n_adj_left;

      /* Less than 5% average error per adjacency with this size adjacency block? */
      if (error <= lm->multipath_next_hop_error_tolerance*n_adj)
	{
	  /* Truncate any next hops with zero weight. */
	  _vec_len (nhs) = i;
	  break;
	}
    }

 done:
  /* Save vector for next call. */
  *normalized_next_hops = nhs;
  return n_adj;
}
Esempio n. 4
0
uword
ssvm_eth_interface_tx (ssvm_private_t * intfc, char *buf_to_send, int len_to_send)
// , 
  //                     vlib_frame_t * f)
{
  ssvm_eth_main_t * em = &ssvm_eth_main;
  ssvm_shared_header_t * sh = intfc->sh;
  unix_shared_memory_queue_t * q;
  u32 * from;
  u32 n_left;
  ssvm_eth_queue_elt_t * elts, * elt, * prev_elt;
  u32 my_pid = intfc->my_pid;
  vlib_buffer_t * b0;
  u32 bi0;
  u32 size_this_buffer;
  u32 chunks_this_buffer;
  u8 i_am_master = intfc->i_am_master;
  u32 elt_index;
  int is_ring_full, interface_down;
  int i;
  volatile u32 *queue_lock;
  u32 n_to_alloc = VLIB_FRAME_SIZE;
  u32 n_allocated, n_present_in_cache, n_available;
  u32 * elt_indices;
  
  if (i_am_master)
    q = (unix_shared_memory_queue_t *)sh->opaque [TO_SLAVE_Q_INDEX];
  else
    q = (unix_shared_memory_queue_t *)sh->opaque [TO_MASTER_Q_INDEX];

  queue_lock = (u32 *) q;

  // from = vlib_frame_vector_args (f);
  //n_left = f->n_vectors;
  n_left = 1;

  is_ring_full = 0;
  interface_down = 0;

  n_present_in_cache = vec_len (em->chunk_cache);

#ifdef XXX
  /* admin / link up/down check */
  if (sh->opaque [MASTER_ADMIN_STATE_INDEX] == 0 ||
      sh->opaque [SLAVE_ADMIN_STATE_INDEX] == 0)
    {
      interface_down = 1;
      goto out;
    }
#endif

  ssvm_lock (sh, my_pid, 1);

  elts = (ssvm_eth_queue_elt_t *) (sh->opaque [CHUNK_POOL_INDEX]);
  elt_indices = (u32 *) (sh->opaque [CHUNK_POOL_FREELIST_INDEX]);
  n_available = (u32) pointer_to_uword(sh->opaque [CHUNK_POOL_NFREE]);

  printf("AYXX: n_left: %d, n_present_in_cache: %d\n", n_left, n_present_in_cache);

  if (n_present_in_cache < n_left*2)
    {
      vec_validate (em->chunk_cache, 
                    n_to_alloc + n_present_in_cache - 1);

      n_allocated = n_to_alloc < n_available ? n_to_alloc : n_available;
      printf("AYXX: n_allocated: %d, n_to_alloc: %d, n_available: %d\n", n_allocated, n_to_alloc, n_available);

      if (PREDICT_TRUE(n_allocated > 0))
	{
	  memcpy (&em->chunk_cache[n_present_in_cache],
		  &elt_indices[n_available - n_allocated],
		  sizeof(u32) * n_allocated);
	}

      n_present_in_cache += n_allocated;
      n_available -= n_allocated;
      sh->opaque [CHUNK_POOL_NFREE] = uword_to_pointer(n_available, void*);
      _vec_len (em->chunk_cache) = n_present_in_cache;
    }