u8 * format_dpdk_flow (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); u32 flow_index = va_arg (*args, u32); uword private_data = va_arg (*args, uword); dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); dpdk_flow_entry_t *fe; if (flow_index == ~0) { s = format (s, "%-25s: %U\n", "supported flow actions", format_flow_actions, xd->supported_flow_actions); s = format (s, "%-25s: %d\n", "last DPDK error type", xd->last_flow_error.type); s = format (s, "%-25s: %s\n", "last DPDK error message", xd->last_flow_error.message ? xd->last_flow_error.message : "n/a"); return s; } if (private_data >= vec_len (xd->flow_entries)) return format (s, "unknown flow"); fe = vec_elt_at_index (xd->flow_entries, private_data); s = format (s, "mark %u", fe->mark); return s; }
void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm) { uword i, j; vlib_mini_counter_t *my_minis; for (i = 0; i < vec_len (cm->minis); i++) { my_minis = cm->minis[i]; for (j = 0; j < vec_len (my_minis); j++) { cm->maxi[j].packets += my_minis[j].packets; cm->maxi[j].bytes += my_minis[j].bytes; my_minis[j].packets = 0; my_minis[j].bytes = 0; } } j = vec_len (cm->maxi); if (j > 0) vec_validate (cm->value_at_last_clear, j - 1); for (i = 0; i < j; i++) { vlib_counter_t *c = vec_elt_at_index (cm->value_at_last_clear, i); c[0] = cm->maxi[i]; } }
/* Reserves given number of error codes for given node. */ void vlib_register_errors (vlib_main_t * vm, u32 node_index, u32 n_errors, char *error_strings[]) { vlib_error_main_t *em = &vm->error_main; vlib_node_t *n = vlib_get_node (vm, node_index); uword l; ASSERT (os_get_cpu_number () == 0); /* Free up any previous error strings. */ if (n->n_errors > 0) heap_dealloc (em->error_strings_heap, n->error_heap_handle); n->n_errors = n_errors; n->error_strings = error_strings; if (n_errors == 0) return; n->error_heap_index = heap_alloc (em->error_strings_heap, n_errors, n->error_heap_handle); l = vec_len (em->error_strings_heap); clib_memcpy (vec_elt_at_index (em->error_strings_heap, n->error_heap_index), error_strings, n_errors * sizeof (error_strings[0])); /* Allocate a counter/elog type for each error. */ vec_validate (em->counters, l - 1); vec_validate (vm->error_elog_event_types, l - 1); /* Zero counters for re-registrations of errors. */ if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear)) clib_memcpy (em->counters + n->error_heap_index, em->counters_last_clear + n->error_heap_index, n_errors * sizeof (em->counters[0])); else memset (em->counters + n->error_heap_index, 0, n_errors * sizeof (em->counters[0])); { elog_event_type_t t; uword i; memset (&t, 0, sizeof (t)); for (i = 0; i < n_errors; i++) { t.format = (char *) format (0, "%v %s: %%d", n->name, error_strings[i]); vm->error_elog_event_types[n->error_heap_index + i] = t; } } }
static void notify_value (svmdb_value_t * v, svmdb_action_t a) { int i; int rv; union sigval sv; u32 value; u32 *dead_registrations = 0; svmdb_notify_t *np; for (i = 0; i < vec_len (v->notifications); i++) { np = vec_elt_at_index (v->notifications, i); if (np->action == a) { value = (np->action << 28) | (np->opaque); sv.sival_ptr = (void *) (uword) value; do { rv = 0; if (sigqueue (np->pid, np->signum, sv) == 0) break; rv = errno; } while (rv == EAGAIN); if (rv == 0) continue; vec_add1 (dead_registrations, i); } } for (i = 0; i < vec_len (dead_registrations); i++) { np = vec_elt_at_index (v->notifications, dead_registrations[i]); clib_warning ("dead reg pid %d sig %d action %d opaque %x", np->pid, np->signum, np->action, np->opaque); vec_delete (v->notifications, 1, dead_registrations[i]); } vec_free (dead_registrations); }
clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi, struct ether_addr mc_addr_vec[], int naddr) { int error; dpdk_main_t * dm = &dpdk_main; dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); error=rte_eth_dev_set_mc_addr_list(xd->device_index, mc_addr_vec, naddr); if (error) { return clib_error_return (0, "mc addr list failed: %d", error); } else { return NULL; } }
clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address) { int error; dpdk_main_t * dm = &dpdk_main; dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance); error=rte_eth_dev_default_mac_addr_set(xd->device_index, (struct ether_addr *) address); if (error) { return clib_error_return (0, "mac address set failed: %d", error); } else { return NULL; } }
int svmdb_local_add_del_notification (svmdb_client_t * client, svmdb_notification_args_t * a) { uword *h; void *oldheap; hash_pair_t *hp; svmdb_shm_hdr_t *shm; u8 *dummy_value = 0; svmdb_value_t *value; svmdb_notify_t *np; int i; int rv = 0; ASSERT (a->elsize); region_lock (client->db_rp, 18); shm = client->shm; oldheap = svm_push_data_heap (client->db_rp); h = shm->namespaces[a->nspace]; hp = hash_get_pair_mem (h, a->var); if (hp == 0) { local_set_variable_nolock (client, a->nspace, (u8 *) a->var, dummy_value, a->elsize); /* might have moved */ h = shm->namespaces[a->nspace]; hp = hash_get_pair_mem (h, a->var); ASSERT (hp); } value = pool_elt_at_index (shm->values, hp->value[0]); for (i = 0; i < vec_len (value->notifications); i++) { np = vec_elt_at_index (value->notifications, i); if ((np->pid == client->pid) && (np->signum == a->signum) && (np->action == a->action) && (np->opaque == a->opaque)) { if (a->add_del == 0 /* delete */ ) { vec_delete (value->notifications, 1, i); goto out; } else { /* add */ clib_warning ("%s: ignore dup reg pid %d signum %d action %d opaque %x", a->var, client->pid, a->signum, a->action, a->opaque); rv = -2; goto out; } } } if (a->add_del == 0) { rv = -3; goto out; } vec_add2 (value->notifications, np, 1); np->pid = client->pid; np->signum = a->signum; np->action = a->action; np->opaque = a->opaque; out: svm_pop_heap (oldheap); region_unlock (client->db_rp); return rv; }
clib_error_t * pcap_write (pcap_main_t * pm) { clib_error_t * error = 0; if (! (pm->flags & PCAP_MAIN_INIT_DONE)) { pcap_file_header_t fh; int n; if (! pm->file_name) pm->file_name = "/tmp/vnet.pcap"; pm->file_descriptor = open (pm->file_name, O_CREAT | O_TRUNC | O_WRONLY, 0664); if (pm->file_descriptor < 0) { error = clib_error_return_unix (0, "failed to open `%s'", pm->file_name); goto done; } pm->flags |= PCAP_MAIN_INIT_DONE; pm->n_packets_captured = 0; pm->n_pcap_data_written = 0; /* Write file header. */ memset (&fh, 0, sizeof (fh)); fh.magic = 0xa1b2c3d4; fh.major_version = 2; fh.minor_version = 4; fh.time_zone = 0; fh.max_packet_size_in_bytes = 1 << 16; fh.packet_type = pm->packet_type; n = write (pm->file_descriptor, &fh, sizeof (fh)); if (n != sizeof (fh)) { if (n < 0) error = clib_error_return_unix (0, "write file header `%s'", pm->file_name); else error = clib_error_return (0, "short write of file header `%s'", pm->file_name); goto done; } } do { int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written; if (n > 0) { n = write (pm->file_descriptor, vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written), n); if (n < 0 && unix_error_is_fatal (errno)) { error = clib_error_return_unix (0, "write `%s'", pm->file_name); goto done; } } pm->n_pcap_data_written += n; if (pm->n_pcap_data_written >= vec_len (pm->pcap_data)) { vec_reset_length (pm->pcap_data); break; } } while (pm->n_packets_captured >= pm->n_packets_to_capture); if (pm->n_packets_captured >= pm->n_packets_to_capture) { close (pm->file_descriptor); pm->flags &= ~PCAP_MAIN_INIT_DONE; pm->file_descriptor = -1; } done: if (error) { if (pm->file_descriptor >= 0) close (pm->file_descriptor); } return error; }
always_inline vhash_overflow_search_bucket_t * get_overflow_search_bucket (vhash_overflow_buckets_t * obs, u32 i, u32 n_key_u32s) { return ((vhash_overflow_search_bucket_t *) vec_elt_at_index (obs->search_buckets, i)); }
{ copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index); for (j = 0; j < nh->weight; j++) { adj[i] = copy_adj[0]; adj[i].heap_handle = adj_heap_handle; adj[i].n_adj = n_adj; i++; } } /* All adjacencies should have been initialized. */ ASSERT (i == n_adj); vec_validate (lm->multipath_adjacencies, adj_heap_handle); madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle); madj->adj_index = adj_index; madj->n_adj_in_block = n_adj; madj->reference_count = 0; /* caller will set to one. */ madj->normalized_next_hops.count = vec_len (nhs); madj->normalized_next_hops.heap_offset = heap_alloc (lm->next_hop_heap, vec_len (nhs), madj->normalized_next_hops.heap_handle); memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset, nhs, vec_bytes (nhs)); hash_set (lm->multipath_adjacency_by_next_hops, ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle), madj - lm->multipath_adjacencies);
static int dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe) { struct rte_flow_item_ipv4 ip4[2] = { }; struct rte_flow_item_ipv6 ip6[2] = { }; struct rte_flow_item_udp udp[2] = { }; struct rte_flow_item_tcp tcp[2] = { }; struct rte_flow_action_mark mark = { 0 }; struct rte_flow_item *item, *items = 0; struct rte_flow_action *action, *actions = 0; enum { vxlan_hdr_sz = sizeof (vxlan_header_t), raw_sz = sizeof (struct rte_flow_item_raw) }; union { struct rte_flow_item_raw item; u8 val[raw_sz + vxlan_hdr_sz]; } raw[2]; u16 src_port, dst_port, src_port_mask, dst_port_mask; u8 protocol; int rv = 0; if (f->actions & (~xd->supported_flow_actions)) return VNET_FLOW_ERROR_NOT_SUPPORTED; /* Match items */ /* Ethernet */ vec_add2 (items, item, 1); item->type = RTE_FLOW_ITEM_TYPE_ETH; item->spec = any_eth; item->mask = any_eth + 1; /* VLAN */ if (f->type != VNET_FLOW_TYPE_IP4_VXLAN) { vec_add2 (items, item, 1); item->type = RTE_FLOW_ITEM_TYPE_VLAN; item->spec = any_vlan; item->mask = any_vlan + 1; } /* IP */ vec_add2 (items, item, 1); if (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) { vnet_flow_ip6_n_tuple_t *t6 = &f->ip6_n_tuple; clib_memcpy_fast (ip6[0].hdr.src_addr, &t6->src_addr.addr, 16); clib_memcpy_fast (ip6[1].hdr.src_addr, &t6->src_addr.mask, 16); clib_memcpy_fast (ip6[0].hdr.dst_addr, &t6->dst_addr.addr, 16); clib_memcpy_fast (ip6[1].hdr.dst_addr, &t6->dst_addr.mask, 16); item->type = RTE_FLOW_ITEM_TYPE_IPV6; item->spec = ip6; item->mask = ip6 + 1; src_port = t6->src_port.port; dst_port = t6->dst_port.port; src_port_mask = t6->src_port.mask; dst_port_mask = t6->dst_port.mask; protocol = t6->protocol; } else if (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) { vnet_flow_ip4_n_tuple_t *t4 = &f->ip4_n_tuple; ip4[0].hdr.src_addr = t4->src_addr.addr.as_u32; ip4[1].hdr.src_addr = t4->src_addr.mask.as_u32; ip4[0].hdr.dst_addr = t4->dst_addr.addr.as_u32; ip4[1].hdr.dst_addr = t4->dst_addr.mask.as_u32; item->type = RTE_FLOW_ITEM_TYPE_IPV4; item->spec = ip4; item->mask = ip4 + 1; src_port = t4->src_port.port; dst_port = t4->dst_port.port; src_port_mask = t4->src_port.mask; dst_port_mask = t4->dst_port.mask; protocol = t4->protocol; } else if (f->type == VNET_FLOW_TYPE_IP4_VXLAN) { vnet_flow_ip4_vxlan_t *v4 = &f->ip4_vxlan; ip4[0].hdr.src_addr = v4->src_addr.as_u32; ip4[1].hdr.src_addr = -1; ip4[0].hdr.dst_addr = v4->dst_addr.as_u32; ip4[1].hdr.dst_addr = -1; item->type = RTE_FLOW_ITEM_TYPE_IPV4; item->spec = ip4; item->mask = ip4 + 1; dst_port = v4->dst_port; dst_port_mask = -1; src_port = 0; src_port_mask = 0; protocol = IP_PROTOCOL_UDP; } else { rv = VNET_FLOW_ERROR_NOT_SUPPORTED; goto done; } /* Layer 4 */ vec_add2 (items, item, 1); if (protocol == IP_PROTOCOL_UDP) { udp[0].hdr.src_port = clib_host_to_net_u16 (src_port); udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask); udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port); udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask); item->type = RTE_FLOW_ITEM_TYPE_UDP; item->spec = udp; item->mask = udp + 1; } else if (protocol == IP_PROTOCOL_TCP) { tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port); tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask); tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port); tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask); item->type = RTE_FLOW_ITEM_TYPE_TCP; item->spec = tcp; item->mask = tcp + 1; } else { rv = VNET_FLOW_ERROR_NOT_SUPPORTED; goto done; } /* Tunnel header match */ if (f->type == VNET_FLOW_TYPE_IP4_VXLAN) { u32 vni = f->ip4_vxlan.vni; vxlan_header_t spec_hdr = { .flags = VXLAN_FLAGS_I, .vni_reserved = clib_host_to_net_u32 (vni << 8) }; vxlan_header_t mask_hdr = { .flags = 0xff, .vni_reserved = clib_host_to_net_u32 (((u32) - 1) << 8) }; clib_memset (raw, 0, sizeof raw); raw[0].item.relative = 1; raw[0].item.length = vxlan_hdr_sz; clib_memcpy_fast (raw[0].val + raw_sz, &spec_hdr, vxlan_hdr_sz); raw[0].item.pattern = raw[0].val + raw_sz; clib_memcpy_fast (raw[1].val + raw_sz, &mask_hdr, vxlan_hdr_sz); raw[1].item.pattern = raw[1].val + raw_sz; vec_add2 (items, item, 1); item->type = RTE_FLOW_ITEM_TYPE_RAW; item->spec = raw; item->mask = raw + 1; } vec_add2 (items, item, 1); item->type = RTE_FLOW_ITEM_TYPE_END; /* Actions */ vec_add2 (actions, action, 1); action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU; vec_add2 (actions, action, 1); mark.id = fe->mark; action->type = RTE_FLOW_ACTION_TYPE_MARK; action->conf = &mark; vec_add2 (actions, action, 1); action->type = RTE_FLOW_ACTION_TYPE_END; fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions, &xd->last_flow_error); if (!fe->handle) rv = VNET_FLOW_ERROR_NOT_SUPPORTED; done: vec_free (items); vec_free (actions); return rv; } int dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance, u32 flow_index, uword * private_data) { dpdk_main_t *dm = &dpdk_main; vnet_flow_t *flow = vnet_get_flow (flow_index); dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); dpdk_flow_entry_t *fe; dpdk_flow_lookup_entry_t *fle = 0; int rv; /* recycle old flow lookup entries only after the main loop counter increases - i.e. previously DMA'ed packets were handled */ if (vec_len (xd->parked_lookup_indexes) > 0 && xd->parked_loop_count != dm->vlib_main->main_loop_count) { u32 *fl_index; vec_foreach (fl_index, xd->parked_lookup_indexes) pool_put_index (xd->flow_lookup_entries, *fl_index); vec_reset_length (xd->flow_lookup_entries); } if (op == VNET_FLOW_DEV_OP_DEL_FLOW) { ASSERT (*private_data >= vec_len (xd->flow_entries)); fe = vec_elt_at_index (xd->flow_entries, *private_data); if ((rv = rte_flow_destroy (xd->device_index, fe->handle, &xd->last_flow_error))) return VNET_FLOW_ERROR_INTERNAL; if (fe->mark) { /* make sure no action is taken for in-flight (marked) packets */ fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark); clib_memset (fle, -1, sizeof (*fle)); vec_add1 (xd->parked_lookup_indexes, fe->mark); xd->parked_loop_count = dm->vlib_main->main_loop_count; } clib_memset (fe, 0, sizeof (*fe)); pool_put (xd->flow_entries, fe); goto disable_rx_offload; } if (op != VNET_FLOW_DEV_OP_ADD_FLOW) return VNET_FLOW_ERROR_NOT_SUPPORTED; pool_get (xd->flow_entries, fe); fe->flow_index = flow->index; if (flow->actions == 0) { rv = VNET_FLOW_ERROR_NOT_SUPPORTED; goto done; } /* if we need to mark packets, assign one mark */ if (flow->actions & (VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_BUFFER_ADVANCE)) { /* reserve slot 0 */ if (xd->flow_lookup_entries == 0) pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES); pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES); fe->mark = fle - xd->flow_lookup_entries; /* install entry in the lookup table */ clib_memset (fle, -1, sizeof (*fle)); if (flow->actions & VNET_FLOW_ACTION_MARK) fle->flow_id = flow->mark_flow_id; if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE) fle->next_index = flow->redirect_device_input_next_index; if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE) fle->buffer_advance = flow->buffer_advance; } else fe->mark = 0; if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) == 0) { xd->flags |= DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD; dpdk_device_setup (xd); } switch (flow->type) { case VNET_FLOW_TYPE_IP4_N_TUPLE: case VNET_FLOW_TYPE_IP6_N_TUPLE: case VNET_FLOW_TYPE_IP4_VXLAN: if ((rv = dpdk_flow_add (xd, flow, fe))) goto done; break; default: rv = VNET_FLOW_ERROR_NOT_SUPPORTED; goto done; } *private_data = fe - xd->flow_entries; done: if (rv) { clib_memset (fe, 0, sizeof (*fe)); pool_put (xd->flow_entries, fe); if (fle) { clib_memset (fle, -1, sizeof (*fle)); pool_put (xd->flow_lookup_entries, fle); } } disable_rx_offload: if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0 && pool_elts (xd->flow_entries) == 0) { xd->flags &= ~DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD; dpdk_device_setup (xd); } return rv; }
static void enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; struct perf_event_mmap_page *p = 0; perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; u32 index; int i, limit = 1; int cpu; if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) limit = 2; for (i = 0; i < limit; i++) { c = vec_elt_at_index (pm->single_events_to_collect, pm->current_event + i); memset (&pe, 0, sizeof (struct perf_event_attr)); pe.type = c->pe_type; pe.size = sizeof (struct perf_event_attr); pe.config = c->pe_config; pe.disabled = 1; pe.pinned = 1; /* * Note: excluding the kernel makes the * (software) context-switch counter read 0... */ if (pe.type != PERF_TYPE_SOFTWARE) { /* Exclude kernel and hypervisor */ pe.exclude_kernel = 1; pe.exclude_hv = 1; } cpu = vm->cpu_id; fd = perf_event_open (&pe, 0, cpu, -1, 0); if (fd == -1) { clib_unix_warning ("event open: type %d config %d", c->pe_type, c->pe_config); return; } if (pe.type != PERF_TYPE_SOFTWARE) { p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { clib_unix_warning ("mmap"); close (fd); return; } } else p = 0; if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) clib_unix_warning ("reset ioctl"); if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) clib_unix_warning ("enable ioctl"); /* * Software event counters - and others not capable of being * read via the "rdpmc" instruction - will be read * by system calls. */ if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) index = ~0; else index = p->index - 1; pm->rdpmc_indices[i][my_thread_index] = index; pm->perf_event_pages[i][my_thread_index] = (void *) p; pm->pm_fds[i][my_thread_index] = fd; } pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters; }