/** * Discard `n_bytes' from the pmsg_t buffer slist and free all completely * discarded buffers. */ void pmsg_slist_discard(slist_t *slist, size_t n_bytes) { slist_iter_t *iter; g_assert(slist); iter = slist_iter_removable_on_head(slist); while (n_bytes > 0) { pmsg_t *mb; size_t size; g_assert(slist_iter_has_item(iter)); mb = slist_iter_current(iter); pmsg_check(mb); size = pmsg_size(mb); if (size > n_bytes) { pmsg_discard(mb, n_bytes); break; } else { pmsg_free(mb); n_bytes -= size; slist_iter_remove(iter); } } slist_iter_free(&iter); }
/** * Reset message block, discarding all the data buffered and restoring the * state it had after creation. Upon return, it can be used as if a brand * new message block had been created. */ void pmsg_reset(pmsg_t *mb) { pmsg_check(mb); mb->m_rptr = mb->m_wptr = mb->m_data->d_arena; /* Empty buffer */ mb->m_flags = PMSG_EXT_MAGIC == mb->magic ? PMSG_PF_EXT : 0; mb->m_u.m_check = NULL; /* Clear "pre-send" checks */ }
/** * Increase the reference count on the message block. * * This must be used in TX stacks when there is a free routine installed * on messages and we want to keep another reference to the message, yet * allow upper layers to pmsg_free() the message block as if it had been * sent from their point of view. * * It also allows correct pmsg_was_sent() checks in free routines, whereas * a pmsg_clone() would create a new message. * * @return its argument, for convenience. */ pmsg_t * pmsg_ref(pmsg_t *mb) { pmsg_check(mb); g_assert(mb->m_refcnt != 0); mb->m_refcnt++; g_assert(mb->m_refcnt != 0); /* Safeguard against overflows */ return mb; }
/** * Copy ``len'' bytes from the source message block to the destination by * reading the source bytes and writing them to the recipient. * * @returns amount of bytes written, which may be lower than the requested * amount if the source buffer was shorter or there is not enough room in * the destination. */ int pmsg_copy(pmsg_t *dest, pmsg_t *src, int len) { int copied, available; pmsg_check(dest); pmsg_check(src); g_assert_log(len >= 0, "%s(): len=%d", G_STRFUNC, len); g_assert(pmsg_is_writable(dest)); /* Not shared, or would corrupt data */ copied = src->m_wptr - src->m_rptr; /* Available data in source */ copied = MIN(copied, len); available = pmsg_available(dest); /* Room in destination buffer */ copied = MIN(copied, available); if (copied > 0) { dest->m_wptr = mempcpy(dest->m_wptr, src->m_rptr, copied); src->m_rptr += copied; } return copied; }
/** * Set the pre-transmit hook routine for the buffer. * * This routine, if it exists (non-NULL) is called just before sending * the message at the lowest level. If it returns FALSE, the message is * immediately dropped. * * The callback routine must not modify the message. * * The difference with a "can-send callback" is that a hook is only invoked * on the standalone buffer, the layer perusing this information being * able to gather all its context from the message, using its protocol header * relevant to the layer. */ void pmsg_set_transmit_hook(pmsg_t *mb, pmsg_hook_t hook) { pmsg_check(mb); /* * If there is already something installed (a hook or a callback), * then warn them as it is probably a mistake. */ pmsg_no_presend_check(mb, G_STRFUNC); mb->m_u.m_hook = hook; mb->m_flags |= PMSG_PF_HOOK; /* Is a hook */ }
/** * Set the pre-send checking routine for the buffer. * * This routine, if it exists (non-NULL) is called just before enqueueing * the message for sending. If it returns FALSE, the message is immediately * dropped. * * The callback routine must not modify the message, as the buffer can * be shared among multiple messages, unless its refcount is 1. */ void pmsg_set_send_callback(pmsg_t *mb, pmsg_check_t check) { pmsg_check(mb); /* * If there is already something installed (a hook or a callback), * then warn them as it is probably a mistake. */ pmsg_no_presend_check(mb, G_STRFUNC); mb->m_u.m_check = check; mb->m_flags &= ~PMSG_PF_HOOK; /* Is not a hook */ }
/** * Shallow cloning of message, making sure we have a plain clone even if * the original was extended. */ pmsg_t * pmsg_clone_plain(const pmsg_t *mb) { pmsg_t *nmb; pmsg_check(mb); WALLOC(nmb); memcpy(nmb, mb, sizeof *nmb); nmb->magic = PMSG_MAGIC; /* Force plain message */ nmb->m_flags &= ~PMSG_PF_EXT; /* In case original was extended */ nmb->m_refcnt = 1; pdata_addref(nmb->m_data); return nmb; }
/** * @return amount of data that can be written at the end of the message. */ int pmsg_writable_length(const pmsg_t *mb) { pdata_t *arena; int available; pmsg_check(mb); /* * If buffer is not writable (shared among several readers), it is * forbidden to write any new data to it. */ arena = mb->m_data; available = arena->d_end - mb->m_wptr; return pmsg_is_writable(mb) ? available : 0; }
/** * Shallow cloning of message, result is referencing the same data. * * This is not the same thing as pmsg_ref() because here a new message block * is created (albeit the data are shared with the original message). */ pmsg_t * pmsg_clone(const pmsg_t *mb) { if (pmsg_is_extended(mb)) { return pmsg_clone_ext(cast_to_pmsg_ext(mb)); } else { pmsg_t *nmb; pmsg_check(mb); WALLOC(nmb); *nmb = *mb; /* Struct copy */ nmb->m_refcnt = 1; pdata_addref(nmb->m_data); return nmb; } }
/** * Shift back unread data to the beginning of the buffer. */ void pmsg_compact(pmsg_t *mb) { int shifting; pmsg_check(mb); g_assert(pmsg_is_writable(mb)); /* Not shared, or would corrupt data */ g_assert(mb->m_rptr <= mb->m_wptr); shifting = mb->m_rptr - mb->m_data->d_arena; g_assert(shifting >= 0); if (shifting != 0) { memmove(mb->m_data->d_arena, mb->m_rptr, pmsg_size(mb)); mb->m_rptr -= shifting; mb->m_wptr -= shifting; } }
/** * Read data from the message, returning the amount of bytes transferred. */ int pmsg_read(pmsg_t *mb, void *data, int len) { int available, readable; pmsg_check(mb); g_assert_log(len >= 0, "%s(): len=%d", G_STRFUNC, len); available = mb->m_wptr - mb->m_rptr; g_assert(available >= 0); /* Data cannot go beyond end of arena */ readable = len >= available ? available : len; if (readable != 0) { memcpy(data, mb->m_rptr, readable); mb->m_rptr += readable; } return readable; }
/** * Split a buffer at given offset: the data before that offset are left in * the original buffer whilst the data starting at the offset (included) * are moved to a new buffer. The original buffer no longer holds the data * starting at the offset. * * @return new message block containing the data starting at the offset. */ pmsg_t * pmsg_split(pmsg_t *mb, int offset) { int slen; /* Split length */ const char *start; g_assert(offset >= 0); g_assert(offset < pmsg_size(mb)); pmsg_check(mb); start = mb->m_rptr + offset; slen = mb->m_wptr - start; g_assert(slen > 0); mb->m_wptr -= slen; /* Logically removed */ return pmsg_new(mb->m_prio, start, slen); /* Copies data */ }
/** * Creates an iovec from a singly-linked list of pmsg_t buffers. * It should be freed via hfree(). * * NOTE: The iovec will hold no more than MAX_IOV_COUNT items. That means * the iovec might not cover the whole buffered data. This limit * is applied because writev() could fail with EINVAL otherwise * which would simply add more unnecessary complexity. */ iovec_t * pmsg_slist_to_iovec(slist_t *slist, int *iovcnt_ptr, size_t *size_ptr) { iovec_t *iov; size_t held = 0; int n; g_assert(slist); n = slist_length(slist); if (n > 0) { slist_iter_t *iter; int i; n = MIN(n, MAX_IOV_COUNT); HALLOC_ARRAY(iov, n); iter = slist_iter_before_head(slist); for (i = 0; i < n; i++) { pmsg_t *mb; size_t size; mb = slist_iter_next(iter); pmsg_check(mb); size = pmsg_size(mb); g_assert(size > 0); held += size; iovec_set(&iov[i], deconstify_pointer(pmsg_start(mb)), size); } slist_iter_free(&iter); } else { iov = NULL; } if (iovcnt_ptr) { *iovcnt_ptr = MAX(0, n); } if (size_ptr) { *size_ptr = held; } return iov; }
/** * Discard trailing data from the message, returning the amount of * bytes discarded. */ int pmsg_discard_trailing(pmsg_t *mb, int len) { int available, n; pmsg_check(mb); g_assert_log(len >= 0, "%s(): len=%d", G_STRFUNC, len); available = mb->m_wptr - mb->m_rptr; g_assert(available >= 0); /* Data cannot go beyond end of arena */ /* * The write pointer moves backward to point before the discarded bytes. */ n = len >= available ? available : len; mb->m_wptr -= n; return n; }
/** * Extended cloning of message, adds a free routine callback. */ pmsg_t * pmsg_clone_extend(const pmsg_t *mb, pmsg_free_t free_cb, void *arg) { pmsg_ext_t *nmb; pmsg_check(mb); WALLOC(nmb); nmb->pmsg = *mb; /* Struct copy */ nmb->pmsg.magic = PMSG_EXT_MAGIC; pdata_addref(nmb->pmsg.m_data); nmb->pmsg.m_flags |= PMSG_PF_EXT; nmb->pmsg.m_refcnt = 1; nmb->m_free = free_cb; nmb->m_arg = arg; return cast_to_pmsg(nmb); }
/** * Write data at the end of the message. * The message must be the only reference to the underlying data. * * @returns amount of written data. */ int pmsg_write(pmsg_t *mb, const void *data, int len) { pdata_t *arena; int available, written; pmsg_check(mb); g_assert_log(len >= 0, "%s(): len=%d", G_STRFUNC, len); g_assert(pmsg_is_writable(mb)); /* Not shared, or would corrupt data */ arena = mb->m_data; available = arena->d_end - mb->m_wptr; g_assert(available >= 0); /* Data cannot go beyond end of arena */ written = len >= available ? available : len; if (written != 0) mb->m_wptr = mempcpy(mb->m_wptr, data, written); return written; }
/** * Returns the size of the data held in the buffer list. */ size_t pmsg_slist_size(const slist_t *slist) { slist_iter_t *iter; size_t size = 0; g_assert(slist != NULL); iter = slist_iter_before_head(slist); while (slist_iter_has_next(iter)) { const pmsg_t *mb; mb = slist_iter_next(iter); pmsg_check(mb); size += pmsg_size(mb); } slist_iter_free(&iter); return size; }
/** * Shift back unread data to the beginning of the buffer if that can make * at least 1/nth of the total arena size available for writing. */ void pmsg_fractional_compact(pmsg_t *mb, int n) { int shifting; g_assert(n > 0); pmsg_check(mb); g_assert(pmsg_is_writable(mb)); /* Not shared, or would corrupt data */ g_assert(mb->m_rptr <= mb->m_wptr); shifting = mb->m_rptr - mb->m_data->d_arena; g_assert(shifting >= 0); if (shifting != 0) { unsigned available = pmsg_available(mb) + shifting; if (available >= pmsg_phys_len(mb) / n) { memmove(mb->m_data->d_arena, mb->m_rptr, pmsg_size(mb)); mb->m_rptr -= shifting; mb->m_wptr -= shifting; } } }
/** * Free all message blocks, and decrease ref count on all data buffers. * * If the message block is referenced by more than one place, simply * decrease its reference count. No freeing occurs and the free routine * is therefore not invoked. */ void pmsg_free(pmsg_t *mb) { pdata_t *db = mb->m_data; pmsg_check(mb); g_assert(mb->m_refcnt != 0); /* * Don't free anything if refcnt != 1. */ if (mb->m_refcnt > 1U) { mb->m_refcnt--; return; } /* * Invoke free routine on extended message block. */ if (pmsg_is_extended(mb)) { pmsg_ext_t *emb = cast_to_pmsg_ext(mb); if (emb->m_free) (*emb->m_free)(mb, emb->m_arg); WFREE0(emb); } else { WFREE0(mb); } /* * Unref buffer data only after possible free routine was * invoked, since it may cause a free, preventing access to * memory from within the free routine. */ pdata_unref(db); }
/** * Fill newly created message block. * * @return the message block given as argument. */ static pmsg_t * pmsg_fill(pmsg_t *mb, pdata_t *db, int prio, bool ext, const void *buf, int len) { mb->magic = ext ? PMSG_EXT_MAGIC : PMSG_MAGIC; mb->m_data = db; mb->m_prio = prio; mb->m_flags = ext ? PMSG_PF_EXT : 0; mb->m_u.m_check = NULL; mb->m_refcnt = 1; db->d_refcnt++; if (buf) { mb->m_rptr = db->d_arena; mb->m_wptr = db->d_arena + len; memcpy(db->d_arena, buf, len); } else mb->m_rptr = mb->m_wptr = db->d_arena; g_assert(implies(buf, len == pmsg_size(mb))); pmsg_check(mb); return mb; }
/** * Enqueue message, which becomes owned by the queue. * * The data held in `to' is copied, so the structure can be reclaimed * immediately by the caller. */ void mq_udp_putq(mqueue_t *q, pmsg_t *mb, const gnet_host_t *to) { size_t size; char *mbs; uint8 function; pmsg_t *mbe = NULL; /* Extended message with destination info */ bool error = FALSE; mq_check_consistency(q); dump_tx_udp_packet(to, mb); again: mq_check_consistency(q); g_assert(mb); g_assert(!pmsg_was_sent(mb)); g_assert(pmsg_is_unread(mb)); g_assert(q->ops == &mq_udp_ops); /* Is an UDP queue */ /* * Trap messages enqueued whilst in the middle of an mq_clear() operation * by marking them as sent and dropping them. Idem if queue was * put in "discard" mode. */ if (q->flags & (MQ_CLEAR | MQ_DISCARD)) { pmsg_mark_sent(mb); /* Let them think it was sent */ pmsg_free(mb); /* Drop message */ return; } mq_check(q, 0); size = pmsg_size(mb); if (size == 0) { g_carp("%s: called with empty message", G_STRFUNC); goto cleanup; } /* * Protect against recursion: we must not invoke puthere() whilst in * the middle of another putq() or we would corrupt the qlink array: * Messages received during recursion are inserted into the qwait list * and will be stuffed back into the queue when the initial putq() ends. * --RAM, 2006-12-29 */ if (q->putq_entered > 0) { pmsg_t *extended; if (debugging(20)) g_warning("%s: %s recursion detected (%u already pending)", G_STRFUNC, mq_info(q), slist_length(q->qwait)); /* * We insert extended messages into the waiting queue since we need * the destination information as well. */ extended = mq_udp_attach_metadata(mb, to); slist_append(q->qwait, extended); return; } q->putq_entered++; mbs = pmsg_start(mb); function = gmsg_function(mbs); gnet_stats_count_queued(q->node, function, mbs, size); /* * If queue is empty, attempt a write immediatly. */ if (q->qhead == NULL) { ssize_t written; if (pmsg_check(mb, q)) { written = tx_sendto(q->tx_drv, mb, to); } else { gnet_stats_count_flowc(mbs, FALSE); node_inc_txdrop(q->node); /* Dropped during TX */ written = (ssize_t) -1; } if ((ssize_t) -1 == written) goto cleanup; node_add_tx_given(q->node, written); if ((size_t) written == size) { if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP sent %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); goto cleanup; } /* * Since UDP respects write boundaries, the following can never * happen in practice: either we write the whole datagram, or none * of it. */ if (written > 0) { g_warning( "partial UDP write (%zu bytes) to %s for %zu-byte datagram", written, gnet_host_to_string(to), size); goto cleanup; } /* FALL THROUGH */ } if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP queued %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); /* * Attach the destination information as metadata to the message, unless * it is already known (possible only during unfolding of the queued data * during re-entrant calls). * * This is later extracted via pmsg_get_metadata() on the extended * message by the message queue to get the destination information. * * Then enqueue the extended message. */ if (NULL == mbe) mbe = mq_udp_attach_metadata(mb, to); q->cops->puthere(q, mbe, size); mb = NULL; /* FALL THROUGH */ cleanup: if (mb) { pmsg_free(mb); mb = NULL; } /* * When reaching that point with a zero putq_entered counter, it means * we triggered an early error condition. Bail out. */ g_assert(q->putq_entered >= 0); if (q->putq_entered == 0) error = TRUE; else q->putq_entered--; mq_check(q, 0); /* * If we're exiting here with no other putq() registered, then we must * pop an item off the head of the list and iterate again. */ if (0 == q->putq_entered && !error) { mbe = slist_shift(q->qwait); if (mbe) { struct mq_udp_info *mi = pmsg_get_metadata(mbe); mb = mbe; /* An extended message "is-a" message */ to = &mi->to; if (debugging(20)) g_warning( "%s: %s flushing waiting to %s (%u still pending)", G_STRFUNC, mq_info(q), gnet_host_to_string(to), slist_length(q->qwait)); goto again; } } return; }
/** * Service routine for UDP message queue. */ static void mq_udp_service(void *data) { mqueue_t *q = data; int r; GList *l; unsigned dropped = 0; mq_check(q, 0); g_assert(q->count); /* Queue is serviced, we must have something */ /* * Write as much as possible. */ for (l = q->qtail; l; /* empty */) { pmsg_t *mb = l->data; int mb_size = pmsg_size(mb); struct mq_udp_info *mi = pmsg_get_metadata(mb); if (!pmsg_check(mb, q)) { dropped++; goto skip; } r = tx_sendto(q->tx_drv, mb, &mi->to); if (r < 0) /* Error, drop packet and continue */ goto skip; if (r == 0) /* No more bandwidth */ break; g_assert(r == mb_size); node_add_tx_given(q->node, r); if (q->flags & MQ_FLOWC) q->flowc_written += r; /* * The UDP layer is non-reliable so the message could be dropped * later on by lower layers. * * Therefore, message statistics will be updated by a specific * accounting callback that is known to the datagram layer, such * as node_msg_accounting(). */ skip: if (q->qlink) q->cops->qlink_remove(q, l); /* drop the message from queue, will be freed by mq_rmlink_prev() */ l = q->cops->rmlink_prev(q, l, mb_size); } mq_check(q, 0); g_assert(q->size >= 0 && q->count >= 0); if (dropped) node_add_txdrop(q->node, dropped); /* Dropped during TX */ /* * Update flow-control information. */ q->cops->update_flowc(q); /* * If queue is empty, disable servicing. */ if (q->size == 0) { g_assert(q->count == 0); tx_srv_disable(q->tx_drv); node_tx_service(q->node, FALSE); } mq_check(q, 0); }