/*************************************************************************** * The recieve thread doesn't transmit packets. Instead, it queues them * up on the transmit thread. Every so often, the transmit thread needs * to flush this transmit queue and send everything. * * This is an inherent design issue trying to send things as batches rather * than individually. It increases latency, but increases performance. We * don't really care about latency. ***************************************************************************/ void flush_packets(struct Adapter *adapter, PACKET_QUEUE *packet_buffers, PACKET_QUEUE *transmit_queue, struct Throttler *throttler, uint64_t *packets_sent) { uint64_t batch_size; unsigned is_queue_empty = 0; while (!is_queue_empty) { /* * Only send a few packets at a time, throttled according to the max * --max-rate set by the user */ batch_size = throttler_next_batch(throttler, *packets_sent); /* * Send a batch of queued packets */ for ( ; batch_size; batch_size--) { int err; struct PacketBuffer *p; /* * Get the next packet from the transmit queue. This packet was * put there by a receive thread, and will contain things like * an ACK or an HTTP request */ err = rte_ring_sc_dequeue(transmit_queue, (void**)&p); if (err) { is_queue_empty = 1; break; /* queue is empty, nothing to send */ } /* * Actually send the packet */ rawsock_send_packet(adapter, p->px, (unsigned)p->length, 1); /* * Now that we are done with the packet, put it on the free list * of buffers that the transmit thread can reuse */ for (err=1; err; ) { err = rte_ring_sp_enqueue(packet_buffers, p); if (err) { LOG(0, "transmit queue full (should be impossible)\n"); pixie_usleep(10000); } } /* * Remember that we sent a packet, which will be used in * throttling. */ (*packets_sent)++; } } }
void app_ping(void) { unsigned i; uint64_t timestamp, diff_tsc; const uint64_t timeout = rte_get_tsc_hz() * APP_PING_TIMEOUT_SEC; for (i = 0; i < RTE_MAX_LCORE; i++) { struct app_core_params *p = &app.cores[i]; struct rte_ring *ring_req, *ring_resp; void *msg; struct app_msg_req *req; int status; if ((p->core_type != APP_CORE_FC) && (p->core_type != APP_CORE_FW) && (p->core_type != APP_CORE_RT) && (p->core_type != APP_CORE_RX)) continue; ring_req = app_get_ring_req(p->core_id); ring_resp = app_get_ring_resp(p->core_id); /* Fill request message */ msg = (void *)rte_ctrlmbuf_alloc(app.msg_pool); if (msg == NULL) rte_panic("Unable to allocate new message\n"); req = (struct app_msg_req *) rte_ctrlmbuf_data((struct rte_mbuf *)msg); req->type = APP_MSG_REQ_PING; /* Send request */ do { status = rte_ring_sp_enqueue(ring_req, msg); } while (status == -ENOBUFS); /* Wait for response */ timestamp = rte_rdtsc(); do { status = rte_ring_sc_dequeue(ring_resp, &msg); diff_tsc = rte_rdtsc() - timestamp; if (unlikely(diff_tsc > timeout)) rte_panic("Core %u of type %d does not respond " "to requests\n", p->core_id, p->core_type); } while (status != 0); /* Free message buffer */ rte_ctrlmbuf_free(msg); } }
int onvm_nf_send_msg(uint16_t dest, uint8_t msg_type, void *msg_data) { int ret; struct onvm_nf_msg *msg; ret = rte_mempool_get(nf_msg_pool, (void**)(&msg)); if (ret != 0) { RTE_LOG(INFO, APP, "Oh the huge manatee! Unable to allocate msg from pool :(\n"); return ret; } msg->msg_type = msg_type; msg->msg_data = msg_data; return rte_ring_sp_enqueue(nfs[dest].msg_q, (void*)msg); }
/* * Send a reply message to the vswitchd */ static void send_reply_to_vswitchd(struct dpdk_message *reply) { int rslt = 0; struct rte_mbuf *mbuf = NULL; void *ctrlmbuf_data = NULL; struct client *vswd = NULL; struct statistics *vswd_stat = NULL; vswd = &clients[VSWITCHD]; vswd_stat = &vport_stats[VSWITCHD]; /* Preparing the buffer to send */ mbuf = rte_ctrlmbuf_alloc(pktmbuf_pool); if (!mbuf) { RTE_LOG(WARNING, APP, "Error : Unable to allocate an mbuf : %s : %d", __FUNCTION__, __LINE__); switch_tx_drop++; vswd_stat->rx_drop++; return; } ctrlmbuf_data = rte_ctrlmbuf_data(mbuf); rte_memcpy(ctrlmbuf_data, reply, sizeof(*reply)); rte_ctrlmbuf_len(mbuf) = sizeof(*reply); /* Sending the buffer to vswitchd */ rslt = rte_ring_sp_enqueue(vswd->rx_q, mbuf); if (rslt < 0) { if (rslt == -ENOBUFS) { rte_ctrlmbuf_free(mbuf); switch_tx_drop++; vswd_stat->rx_drop++; } else { overruns++; } } vswd_stat->tx++; }
/* * Function sends unmatched packets to vswitchd. */ static void send_packet_to_vswitchd(struct rte_mbuf *mbuf, struct dpdk_upcall *info) { int rslt = 0; struct statistics *vswd_stat = NULL; void *mbuf_ptr = NULL; vswd_stat = &vport_stats[VSWITCHD]; /* send one packet, delete information about segments */ rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); /* allocate space before the packet for the upcall info */ mbuf_ptr = rte_pktmbuf_prepend(mbuf, sizeof(*info)); if (mbuf_ptr == NULL) { printf("Cannot prepend upcall info\n"); rte_pktmbuf_free(mbuf); switch_tx_drop++; vswd_stat->tx_drop++; return; } rte_memcpy(mbuf_ptr, info, sizeof(*info)); /* send the packet and the upcall info to the daemon */ rslt = rte_ring_sp_enqueue(vswitch_packet_ring, mbuf); if (rslt < 0) { if (rslt == -ENOBUFS) { rte_pktmbuf_free(mbuf); switch_tx_drop++; vswd_stat->tx_drop++; return; } else { overruns++; } } vswd_stat->tx++; }
/* * Enqueue a single packet to a client rx ring */ static void send_to_client(uint8_t client, struct rte_mbuf *buf) { struct client *cl = NULL; int rslt = 0; struct statistics *s = NULL; cl = &clients[client]; s = &vport_stats[client]; rslt = rte_ring_sp_enqueue(cl->rx_q, (void *)buf); if (rslt < 0) { if (rslt == -ENOBUFS) { rte_pktmbuf_free(buf); switch_tx_drop++; s->rx_drop++; } else { overruns++; s->rx++; } } else { s->rx++; } }
/* This function will perform re-ordering of packets, and injecting into * the appropriate QID IQ. As LB and DIR QIDs are in the same array, but *NOT* * contiguous in that array, this function accepts a "range" of QIDs to scan. */ static uint16_t sw_schedule_reorder(struct sw_evdev *sw, int qid_start, int qid_end) { /* Perform egress reordering */ struct rte_event *qe; uint32_t pkts_iter = 0; for (; qid_start < qid_end; qid_start++) { struct sw_qid *qid = &sw->qids[qid_start]; int i, num_entries_in_use; if (qid->type != RTE_SCHED_TYPE_ORDERED) continue; num_entries_in_use = rte_ring_free_count( qid->reorder_buffer_freelist); for (i = 0; i < num_entries_in_use; i++) { struct reorder_buffer_entry *entry; int j; entry = &qid->reorder_buffer[qid->reorder_buffer_index]; if (!entry->ready) break; for (j = 0; j < entry->num_fragments; j++) { uint16_t dest_qid; uint16_t dest_iq; int idx = entry->fragment_index + j; qe = &entry->fragments[idx]; dest_qid = qe->queue_id; dest_iq = PRIO_TO_IQ(qe->priority); if (dest_qid >= sw->qid_count) { sw->stats.rx_dropped++; continue; } struct sw_qid *dest_qid_ptr = &sw->qids[dest_qid]; const struct iq_ring *dest_iq_ptr = dest_qid_ptr->iq[dest_iq]; if (iq_ring_free_count(dest_iq_ptr) == 0) break; pkts_iter++; struct sw_qid *q = &sw->qids[dest_qid]; struct iq_ring *r = q->iq[dest_iq]; /* we checked for space above, so enqueue must * succeed */ iq_ring_enqueue(r, qe); q->iq_pkt_mask |= (1 << (dest_iq)); q->iq_pkt_count[dest_iq]++; q->stats.rx_pkts++; } entry->ready = (j != entry->num_fragments); entry->num_fragments -= j; entry->fragment_index += j; if (!entry->ready) { entry->fragment_index = 0; rte_ring_sp_enqueue( qid->reorder_buffer_freelist, entry); qid->reorder_buffer_index++; qid->reorder_buffer_index %= qid->window_size; } } } return pkts_iter; }
/* create the mempool */ struct rte_mempool * rte_mempool_create(const char *name, unsigned n, unsigned elt_size, unsigned cache_size, unsigned private_data_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { char mz_name[RTE_MEMZONE_NAMESIZE]; char rg_name[RTE_RING_NAMESIZE]; struct rte_mempool *mp = NULL; struct rte_ring *r; const struct rte_memzone *mz; size_t mempool_size, total_elt_size; int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; int rg_flags = 0; uint32_t header_size, trailer_size; unsigned i; void *obj; /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & CACHE_LINE_MASK) != 0); #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) & CACHE_LINE_MASK) != 0); #endif #ifdef RTE_LIBRTE_MEMPOOL_DEBUG RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) & CACHE_LINE_MASK) != 0); #endif /* check that we have an initialised tail queue */ if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL, rte_mempool_list) == NULL) { rte_errno = E_RTE_NO_TAILQ; return NULL; } /* asked cache too big */ if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE){ rte_errno = EINVAL; return NULL; } /* "no cache align" imply "no spread" */ if (flags & MEMPOOL_F_NO_CACHE_ALIGN) flags |= MEMPOOL_F_NO_SPREAD; /* ring flags */ if (flags & MEMPOOL_F_SP_PUT) rg_flags |= RING_F_SP_ENQ; if (flags & MEMPOOL_F_SC_GET) rg_flags |= RING_F_SC_DEQ; rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK); /* allocate the ring that will be used to store objects */ /* Ring functions will return appropriate errors if we are * running as a secondary process etc., so no checks made * in this function for that condition */ rte_snprintf(rg_name, sizeof(rg_name), "MP_%s", name); r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags); if (r == NULL) goto exit; /* * In header, we have at least the pointer to the pool, and * optionaly a 64 bits cookie. */ header_size = 0; header_size += sizeof(struct rte_mempool *); /* ptr to pool */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG header_size += sizeof(uint64_t); /* cookie */ #endif if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) header_size = (header_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK); /* trailer contains the cookie in debug mode */ trailer_size = 0; #ifdef RTE_LIBRTE_MEMPOOL_DEBUG trailer_size += sizeof(uint64_t); /* cookie */ #endif /* element size is 8 bytes-aligned at least */ elt_size = (elt_size + 7) & (~7); /* expand trailer to next cache line */ if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) { total_elt_size = header_size + elt_size + trailer_size; trailer_size += ((CACHE_LINE_SIZE - (total_elt_size & CACHE_LINE_MASK)) & CACHE_LINE_MASK); } /* * increase trailer to add padding between objects in order to * spread them accross memory channels/ranks */ if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { unsigned new_size; new_size = optimize_object_size(header_size + elt_size + trailer_size); trailer_size = new_size - header_size - elt_size; } /* this is the size of an object, including header and trailer */ total_elt_size = header_size + elt_size + trailer_size; /* reserve a memory zone for this mempool: private data is * cache-aligned */ private_data_size = (private_data_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK); mempool_size = total_elt_size * n + sizeof(struct rte_mempool) + private_data_size; rte_snprintf(mz_name, sizeof(mz_name), "MP_%s", name); mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags); /* * no more memory: in this case we loose previously reserved * space for the as we cannot free it */ if (mz == NULL) goto exit; /* init the mempool structure */ mp = mz->addr; memset(mp, 0, sizeof(*mp)); rte_snprintf(mp->name, sizeof(mp->name), "%s", name); mp->phys_addr = mz->phys_addr; mp->ring = r; mp->size = n; mp->flags = flags; mp->elt_size = elt_size; mp->header_size = header_size; mp->trailer_size = trailer_size; mp->cache_size = cache_size; mp->cache_flushthresh = (uint32_t)(cache_size * CACHE_FLUSHTHRESH_MULTIPLIER); mp->private_data_size = private_data_size; /* call the initializer */ if (mp_init) mp_init(mp, mp_init_arg); /* fill the headers and trailers, and add objects in ring */ obj = (char *)mp + sizeof(struct rte_mempool) + private_data_size; for (i = 0; i < n; i++) { struct rte_mempool **mpp; obj = (char *)obj + header_size; /* set mempool ptr in header */ mpp = __mempool_from_obj(obj); *mpp = mp; #ifdef RTE_LIBRTE_MEMPOOL_DEBUG __mempool_write_header_cookie(obj, 1); __mempool_write_trailer_cookie(obj); #endif /* call the initializer */ if (obj_init) obj_init(mp, obj_init_arg, obj, i); /* enqueue in ring */ rte_ring_sp_enqueue(mp->ring, obj); obj = (char *)obj + elt_size + trailer_size; } RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp); exit: rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK); return mp; }