/** * Log TX error if unusual. * * @return TRUE if the error was fatal, FALSE if it's a temporary error and * the message needs to be enqueued. */ static bool udp_sched_write_error(const udp_sched_t *us, const gnet_host_t *to, const pmsg_t *mb, const char *func) { (void) us; /* FIXME -- no longer used */ if (is_temporary_error(errno) || ENOBUFS == errno) return FALSE; switch (errno) { /* * The following are probably due to bugs in the libc, but this is in * the same vein as write() failing with -1 whereas errno == 0! Be more * robust against bugs in the components we rely on. --RAM, 09/10/2003 */ case EINPROGRESS: /* Weird, but seen it -- RAM, 07/10/2003 */ { g_warning("%s(to=%s, len=%d) failed with weird errno = %m -- " "assuming EAGAIN", func, gnet_host_to_string(to), pmsg_size(mb)); } break; case EPIPE: case ENOSPC: case ENOMEM: case EINVAL: /* Seen this with "reserved" IP addresses */ #ifdef EDQUOT case EDQUOT: #endif /* EDQUOT */ case EMSGSIZE: /* Message too large */ case EFBIG: case EIO: case EADDRNOTAVAIL: case ECONNABORTED: case ECONNRESET: case ECONNREFUSED: case ENETRESET: case ENETDOWN: case ENETUNREACH: case EHOSTDOWN: case EHOSTUNREACH: case ENOPROTOOPT: case EPROTONOSUPPORT: case ETIMEDOUT: case EACCES: case EPERM: /* * We don't care about lost packets. */ g_warning("%s(): UDP write of %d bytes to %s failed: %m", func, pmsg_size(mb), gnet_host_to_string(to)); break; default: g_critical("%s(): UDP write of %d bytes to %s failed " "with unexpected errno %d: %m", func, pmsg_size(mb), gnet_host_to_string(to), errno); break; } return TRUE; /* Fatal error */ }
/** * Read data from the pmsg list into supplied buffer. Copied data is * removed from the list. * * @param slist the pmsg list * @param buf start of buffer where data must be copied * @param len length of buffer * * @return amount of copied bytes. */ size_t pmsg_slist_read(slist_t *slist, void *buf, size_t len) { slist_iter_t *iter; size_t remain = len; void *p; g_assert(slist != NULL); iter = slist_iter_removable_on_head(slist); p = buf; while (remain != 0 && slist_iter_has_item(iter)) { pmsg_t *mb = slist_iter_current(iter); int n; n = pmsg_read(mb, p, remain); remain -= n; p = ptr_add_offset(p, n); if (0 == pmsg_size(mb)) { /* Fully copied message */ pmsg_free(mb); slist_iter_remove(iter); /* Warning: moves to next */ } else { break; /* No need to continue on partial copy */ } } slist_iter_free(&iter); return len - remain; }
/** * Convenience routine: format tree to memory buffer. * * @param root tree to dump * @param buf buffer where formatting is done * @param len buffer length * @param options formatting options * * @return length of generated string, -1 on failure. */ size_t xfmt_tree_to_buffer(const xnode_t *root, void *buf, size_t len, uint32 options) { ostream_t *os; pdata_t *pd; pmsg_t *mb; bool ok; size_t written = (size_t) -1; g_assert(root != NULL); g_assert(buf != NULL); g_assert(size_is_non_negative(len)); pd = pdata_allocb_ext(buf, len, pdata_free_nop, NULL); mb = pmsg_alloc(PMSG_P_DATA, pd, 0, 0); os = ostream_open_pmsg(mb); ok = xfmt_tree(root, os, options); ok = ostream_close(os) && ok; if (ok) written = pmsg_size(mb); pmsg_free(mb); g_assert((size_t) -1 == written || written <= len); return written; }
/** * Discard `n_bytes' from the pmsg_t buffer slist and free all completely * discarded buffers. */ void pmsg_slist_discard(slist_t *slist, size_t n_bytes) { slist_iter_t *iter; g_assert(slist); iter = slist_iter_removable_on_head(slist); while (n_bytes > 0) { pmsg_t *mb; size_t size; g_assert(slist_iter_has_item(iter)); mb = slist_iter_current(iter); pmsg_check_consistency(mb); size = pmsg_size(mb); if (size > n_bytes) { pmsg_discard(mb, n_bytes); break; } else { pmsg_free(mb); n_bytes -= size; slist_iter_remove(iter); } } slist_iter_free(&iter); }
/** * Log a dropped message. */ void g2_msg_log_dropped_pmsg(const pmsg_t *mb, const char *fmt, ...) { va_list args; va_start(args, fmt); g2_msg_log_dropped(pmsg_start(mb), pmsg_size(mb), fmt, args); va_end(args); }
/** * Send message (eslist iterator callback). * * @return TRUE if message was sent and freed up. */ static bool udp_tx_desc_send(void *data, void *udata) { struct udp_tx_desc *txd = data; udp_sched_t *us = udata; unsigned prio; udp_sched_check(us); udp_tx_desc_check(txd); if (us->used_all) return FALSE; /* * Avoid flushing consecutive queued messages to the same destination, * for regular (non-prioritary) messages. * * This serves two purposes: * * 1- It makes sure one single host does not capture all the available * outgoing bandwidth. * * 2- It somehow delays consecutive packets to a given host thereby reducing * flooding and hopefully avoiding saturation of its RX flow. */ prio = pmsg_prio(txd->mb); if (PMSG_P_DATA == prio && hset_contains(us->seen, txd->to)) { udp_sched_log(2, "%p: skipping mb=%p (%d bytes) to %s", us, txd->mb, pmsg_size(txd->mb), gnet_host_to_string(txd->to)); return FALSE; } if (udp_sched_mb_sendto(us, txd->mb, txd->to, txd->tx, txd->cb)) { if (PMSG_P_DATA == prio && pmsg_was_sent(txd->mb)) hset_insert(us->seen, atom_host_get(txd->to)); } else { return FALSE; /* Unsent, leave it in the queue */ } us->buffered = size_saturate_sub(us->buffered, pmsg_size(txd->mb)); udp_tx_desc_flag_release(txd, us); return TRUE; }
/** * Delayed RPC start. */ static void soap_rpc_launch(cqueue_t *unused_cq, gpointer obj) { soap_rpc_t *sr = obj; http_post_data_t post; (void) unused_cq; soap_rpc_check(sr); sr->delay_ev = NULL; if (GNET_PROPERTY(soap_debug) > 4) { g_debug("SOAP \"%s\" at \"%s\": launching (%s)", sr->action, sr->url, sr->retry ? "retry" : "initial"); } sr->reply_len = 0; /* In case we retry, clear out older data */ /* * Launch the asynchronous POST request. */ post.content_type = SOAP_CONTENT_TYPE; post.data = pmsg_start(sr->mb); post.datalen = pmsg_size(sr->mb); post.data_free = NULL; post.data_free_arg = NULL; sr->ha = http_async_post(sr->url, &post, soap_header_ind, soap_data_ind, soap_error_ind); /* * If we cannot create the HTTP request, it can be the URL is wrong, * or no connection can be established to the host. Hence it's a * contacting error, not an I/O error at this stage. */ if (sr->ha == NULL) { if (GNET_PROPERTY(soap_debug)) { g_warning("SOAP cannot contact \"%s\": %s", sr->url, http_async_strerror(http_async_errno)); } soap_error(sr, SOAP_E_CONTACT); return; } /* * Customize the HTTP layer. */ http_async_set_opaque(sr->ha, sr, NULL); http_async_set_op_post_request(sr->ha, soap_build_request); http_async_set_op_headsent(sr->ha, soap_sent_head); http_async_set_op_datasent(sr->ha, soap_sent_data); http_async_set_op_gotreply(sr->ha, soap_got_reply); http_async_option_ctl(sr->ha, HTTP_O_READ_REPLY, HTTP_CTL_ADD); }
/* * Ensure we do not have a check/hook installed already, otherwise loudly * warn them once, as this is probably not intended! */ static void pmsg_no_presend_check(const pmsg_t * const mb, const char *caller) { /* * Because m_check and m_hook are in the same union and have the same * memory size, it is sufficient to check for one field being non-NULL. */ if G_LIKELY(NULL == mb->m_u.m_check) return; s_carp_once("%s(): mb=%p (%d byte%s, prio=%u, refcnt=%u, flags=0x%x)" " already has %s %s()", caller, mb, pmsg_size(mb), plural(pmsg_size(mb)), mb->m_prio, mb->m_refcnt, mb->m_flags, (mb->m_flags & PMSG_PF_HOOK) ? "transmit hook" : "can-send callback", stacktrace_function_name(mb->m_u.m_check)); }
/** * Flush current /QH2. * * Depending how the QH2 builder is configured, this either sends the message * to the target node or invokes a processing callback. */ static void g2_build_qh2_flush(struct g2_qh2_builder *ctx) { pmsg_t *mb; g_assert(ctx != NULL); g_assert(ctx->t != NULL); g_assert((ctx->n != NULL) ^ (ctx->cb != NULL)); /* * Restore the order of children in the root packet to be the order we * used when we added the nodes, since we prepend new children. */ g2_tree_reverse_children(ctx->t); /* * If sending over UDP, ask for reliable delivery of the query hit. * To be able to monitor the fate of the message, we asssociate a free * routine to it. */ if (ctx->to_udp) { struct g2_qh2_pmsg_info *pmi; WALLOC0(pmi); pmi->magic = G2_QH2_PMI_MAGIC; pmi->hub_id = nid_ref(NODE_ID(ctx->hub)); mb = g2_build_pmsg_extended(ctx->t, g2_qh2_pmsg_free, pmi); pmsg_mark_reliable(mb); } else { mb = g2_build_pmsg(ctx->t); } if (GNET_PROPERTY(g2_debug) > 3) { g_debug("%s(): flushing the following hit for " "Q2 #%s to %s%s (%d bytes):", G_STRFUNC, guid_hex_str(ctx->muid), NULL == ctx->n ? stacktrace_function_name(ctx->cb) : node_infostr(ctx->n), NULL == ctx->n ? "()" : "", pmsg_size(mb)); g2_tfmt_tree_dump(ctx->t, stderr, G2FMT_O_PAYLOAD | G2FMT_O_PAYLEN); } if (ctx->n != NULL) g2_node_send(ctx->n, mb); else (*ctx->cb)(mb, ctx->arg); ctx->messages++; ctx->current_size = 0; g2_tree_free_null(&ctx->t); }
/** * Drop message (eslist iterator). * * @return TRUE to force message to be removed from list. */ static bool udp_tx_desc_drop(void *data, void *udata) { struct udp_tx_desc *txd = data; udp_sched_t *us = udata; udp_sched_check(us); udp_tx_desc_check(txd); g_assert(1 == pmsg_refcnt(txd->mb)); us->buffered = size_saturate_sub(us->buffered, pmsg_size(txd->mb)); udp_tx_desc_flag_release(txd, us); return TRUE; }
/** * Dump relayed or locally-emitted packet. * If ``from'' is NULL, packet was emitted locally. */ static void dump_packet_from_to(struct dump *dump, const struct gnutella_node *from, const struct gnutella_node *to, const pmsg_t *mb) { struct dump_header dh_to; struct dump_header dh_from; g_assert(to != NULL); g_assert(mb != NULL); g_assert(pmsg_read_base(mb) == pmsg_start(mb)); if (!dump_initialize(dump)) return; /* * This is only for Gnutella packets, leave DHT messages out. */ if (GTA_MSG_DHT == gnutella_header_get_function(pmsg_start(mb))) return; if (!ipset_contains_addr(&dump_tx_to_addrs, to->addr, TRUE)) return; if (NULL == from) { struct gnutella_node local; local.peermode = NODE_IS_UDP(to) ? NODE_P_UDP : NODE_P_NORMAL; local.addr = listen_addr(); local.port = GNET_PROPERTY(listen_port); if (!ipset_contains_addr(&dump_tx_from_addrs, local.addr, TRUE)) return; dump_header_set(&dh_from, &local); } else { if (!ipset_contains_addr(&dump_tx_from_addrs, from->addr, TRUE)) return; dump_header_set(&dh_from, from); } dump_header_set(&dh_to, to); dh_to.data[0] |= DH_F_TO; if (pmsg_prio(mb) != PMSG_P_DATA) dh_to.data[0] |= DH_F_CTRL; dump_append(dump, dh_to.data, sizeof dh_to.data); dump_append(dump, dh_from.data, sizeof dh_from.data); dump_append(dump, pmsg_read_base(mb), pmsg_size(mb)); dump_flush(dump); }
/** * Create new message holding serialized tree. * * @param t the tree to serialize * @param prio priority of the message * @param freecb if non-NULL, the free routine to attach to message * @param arg additional argument for the free routine * * @return a message containing the serialized tree. */ static pmsg_t * g2_build_pmsg_prio(const g2_tree_t *t, int prio, pmsg_free_t freecb, void *arg) { size_t len; pmsg_t *mb; len = g2_frame_serialize(t, NULL, 0); if (NULL == freecb) mb = pmsg_new(prio, NULL, len); else mb = pmsg_new_extend(prio, NULL, len, freecb, arg); g2_frame_serialize(t, pmsg_start(mb), len); pmsg_seek(mb, len); g_assert(UNSIGNED(pmsg_size(mb)) == len); return mb; }
/** * Split a buffer at given offset: the data before that offset are left in * the original buffer whilst the data starting at the offset (included) * are moved to a new buffer. The original buffer no longer holds the data * starting at the offset. * * @return new message block containing the data starting at the offset. */ pmsg_t * pmsg_split(pmsg_t *mb, int offset) { int slen; /* Split length */ const char *start; g_assert(offset >= 0); g_assert(offset < pmsg_size(mb)); pmsg_check_consistency(mb); start = mb->m_rptr + offset; slen = mb->m_wptr - start; g_assert(slen > 0); mb->m_wptr -= slen; /* Logically removed */ return pmsg_new(mb->m_prio, start, slen); /* Copies data */ }
/** * Shift back unread data to the beginning of the buffer. */ void pmsg_compact(pmsg_t *mb) { int shifting; pmsg_check_consistency(mb); g_assert(pmsg_is_writable(mb)); /* Not shared, or would corrupt data */ g_assert(mb->m_rptr <= mb->m_wptr); shifting = mb->m_rptr - mb->m_data->d_arena; g_assert(shifting >= 0); if (shifting != 0) { memmove(mb->m_data->d_arena, mb->m_rptr, pmsg_size(mb)); mb->m_rptr -= shifting; mb->m_wptr -= shifting; } }
/** * Creates an iovec from a singly-linked list of pmsg_t buffers. * It should be freed via hfree(). * * NOTE: The iovec will hold no more than MAX_IOV_COUNT items. That means * the iovec might not cover the whole buffered data. This limit * is applied because writev() could fail with EINVAL otherwise * which would simply add more unnecessary complexity. */ iovec_t * pmsg_slist_to_iovec(slist_t *slist, int *iovcnt_ptr, size_t *size_ptr) { iovec_t *iov; size_t held = 0; int n; g_assert(slist); n = slist_length(slist); if (n > 0) { slist_iter_t *iter; int i; n = MIN(n, MAX_IOV_COUNT); iov = halloc(n * sizeof *iov); iter = slist_iter_before_head(slist); for (i = 0; i < n; i++) { pmsg_t *mb; size_t size; mb = slist_iter_next(iter); pmsg_check_consistency(mb); size = pmsg_size(mb); g_assert(size > 0); held += size; iovec_set(&iov[i], deconstify_pointer(pmsg_read_base(mb)), size); } slist_iter_free(&iter); } else { iov = NULL; } if (iovcnt_ptr) { *iovcnt_ptr = MAX(0, n); } if (size_ptr) { *size_ptr = held; } return iov; }
/** * Read data from the message buffer we just received. * * @return TRUE if there was an error. */ static bool thex_download_data_read(struct thex_download *ctx, pmsg_t *mb) { size_t size; g_assert(ctx); g_assert((NULL != ctx->data) ^ (0 == ctx->data_size)); g_assert(ctx->pos <= ctx->data_size); while ((size = pmsg_size(mb)) > 0) { if (ctx->pos + size > ctx->max_size) return TRUE; if (size > ctx->data_size - ctx->pos) { ctx->data_size += MAX(size, ctx->data_size); ctx->data = hrealloc(ctx->data, ctx->data_size); } ctx->pos += pmsg_read(mb, &ctx->data[ctx->pos], size); } return FALSE; }
/** * Returns the size of the data held in the buffer list. */ size_t pmsg_slist_size(const slist_t *slist) { slist_iter_t *iter; size_t size = 0; g_assert(slist != NULL); iter = slist_iter_before_head(slist); while (slist_iter_has_next(iter)) { const pmsg_t *mb; mb = slist_iter_next(iter); pmsg_check_consistency(mb); size += pmsg_size(mb); } slist_iter_free(&iter); return size; }
/** * Remove expired messages (eslist iterator). * * @return TRUE if message has expired and was freed up. */ static bool udp_tx_desc_expired(void *data, void *udata) { struct udp_tx_desc *txd = data; udp_sched_t *us = udata; udp_sched_check(us); udp_tx_desc_check(txd); if (delta_time(tm_time(), txd->expire) > 0) { udp_sched_log(1, "%p: expiring mb=%p (%d bytes) prio=%u", us, txd->mb, pmsg_size(txd->mb), pmsg_prio(txd->mb)); if (txd->cb->add_tx_dropped != NULL) (*txd->cb->add_tx_dropped)(txd->tx->owner, 1); /* Dropped in TX */ return udp_tx_desc_drop(data, udata); /* Returns TRUE */ } return FALSE; }
/** * Shift back unread data to the beginning of the buffer if that can make * at least 1/nth of the total arena size available for writing. */ void pmsg_fractional_compact(pmsg_t *mb, int n) { int shifting; g_assert(n > 0); pmsg_check_consistency(mb); g_assert(pmsg_is_writable(mb)); /* Not shared, or would corrupt data */ g_assert(mb->m_rptr <= mb->m_wptr); shifting = mb->m_rptr - mb->m_data->d_arena; g_assert(shifting >= 0); if (shifting != 0) { unsigned available = pmsg_available(mb) + shifting; if (available >= pmsg_phys_len(mb) / n) { memmove(mb->m_data->d_arena, mb->m_rptr, pmsg_size(mb)); mb->m_rptr -= shifting; mb->m_wptr -= shifting; } } }
/** * Main RPC iteration loop. */ static void natpmp_rpc_iterate(cqueue_t *unused_cq, void *obj) { struct natpmp_rpc *rd = obj; int ret; natpmp_rpc_check(rd); (void) unused_cq; if (rd->count++ > rd->retries) goto finished; ret = urpc_send("NAT-PMP", rd->gateway, NATPMP_SRV_PORT, pmsg_start(rd->mb), pmsg_size(rd->mb), rd->timeout, natpmp_rpc_reply, rd); if (0 != ret) { if (GNET_PROPERTY(natpmp_debug)) { g_warning("NATPMP could not send \"%s\" #%u to %s: %m", natpmp_op_to_string(rd->op), rd->count, host_addr_port_to_string(rd->gateway, NATPMP_SRV_PORT)); } goto finished; } else { if (GNET_PROPERTY(natpmp_debug) > 4) { g_debug("NATPMP sent \"%s\" #%u to %s, with %u ms timeout", natpmp_op_to_string(rd->op), rd->count, host_addr_port_to_string(rd->gateway, NATPMP_SRV_PORT), rd->timeout); } } rd->timeout = uint_saturate_mult(rd->timeout, 2); /* For next time */ return; finished: natpmp_rpc_error(rd); }
/** * Fill newly created message block. * * @return the message block given as argument. */ static pmsg_t * pmsg_fill(pmsg_t *mb, pdata_t *db, int prio, bool ext, const void *buf, int len) { mb->magic = ext ? PMSG_EXT_MAGIC : PMSG_MAGIC; mb->m_data = db; mb->m_prio = prio; mb->m_flags = ext ? PMSG_PF_EXT : 0; mb->m_u.m_check = NULL; mb->m_refcnt = 1; db->d_refcnt++; if (buf) { mb->m_rptr = db->d_arena; mb->m_wptr = db->d_arena + len; memcpy(db->d_arena, buf, len); } else mb->m_rptr = mb->m_wptr = db->d_arena; g_assert(implies(buf, len == pmsg_size(mb))); pmsg_check_consistency(mb); return mb; }
/** * Enqueue message, which becomes owned by the queue. * * The data held in `to' is copied, so the structure can be reclaimed * immediately by the caller. */ void mq_udp_putq(mqueue_t *q, pmsg_t *mb, const gnet_host_t *to) { size_t size; char *mbs; uint8 function; pmsg_t *mbe = NULL; /* Extended message with destination info */ bool error = FALSE; mq_check_consistency(q); dump_tx_udp_packet(to, mb); again: mq_check_consistency(q); g_assert(mb); g_assert(!pmsg_was_sent(mb)); g_assert(pmsg_is_unread(mb)); g_assert(q->ops == &mq_udp_ops); /* Is an UDP queue */ /* * Trap messages enqueued whilst in the middle of an mq_clear() operation * by marking them as sent and dropping them. Idem if queue was * put in "discard" mode. */ if (q->flags & (MQ_CLEAR | MQ_DISCARD)) { pmsg_mark_sent(mb); /* Let them think it was sent */ pmsg_free(mb); /* Drop message */ return; } mq_check(q, 0); size = pmsg_size(mb); if (size == 0) { g_carp("%s: called with empty message", G_STRFUNC); goto cleanup; } /* * Protect against recursion: we must not invoke puthere() whilst in * the middle of another putq() or we would corrupt the qlink array: * Messages received during recursion are inserted into the qwait list * and will be stuffed back into the queue when the initial putq() ends. * --RAM, 2006-12-29 */ if (q->putq_entered > 0) { pmsg_t *extended; if (debugging(20)) g_warning("%s: %s recursion detected (%u already pending)", G_STRFUNC, mq_info(q), slist_length(q->qwait)); /* * We insert extended messages into the waiting queue since we need * the destination information as well. */ extended = mq_udp_attach_metadata(mb, to); slist_append(q->qwait, extended); return; } q->putq_entered++; mbs = pmsg_start(mb); function = gmsg_function(mbs); gnet_stats_count_queued(q->node, function, mbs, size); /* * If queue is empty, attempt a write immediatly. */ if (q->qhead == NULL) { ssize_t written; if (pmsg_check(mb, q)) { written = tx_sendto(q->tx_drv, mb, to); } else { gnet_stats_count_flowc(mbs, FALSE); node_inc_txdrop(q->node); /* Dropped during TX */ written = (ssize_t) -1; } if ((ssize_t) -1 == written) goto cleanup; node_add_tx_given(q->node, written); if ((size_t) written == size) { if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP sent %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); goto cleanup; } /* * Since UDP respects write boundaries, the following can never * happen in practice: either we write the whole datagram, or none * of it. */ if (written > 0) { g_warning( "partial UDP write (%zu bytes) to %s for %zu-byte datagram", written, gnet_host_to_string(to), size); goto cleanup; } /* FALL THROUGH */ } if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP queued %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); /* * Attach the destination information as metadata to the message, unless * it is already known (possible only during unfolding of the queued data * during re-entrant calls). * * This is later extracted via pmsg_get_metadata() on the extended * message by the message queue to get the destination information. * * Then enqueue the extended message. */ if (NULL == mbe) mbe = mq_udp_attach_metadata(mb, to); q->cops->puthere(q, mbe, size); mb = NULL; /* FALL THROUGH */ cleanup: if (mb) { pmsg_free(mb); mb = NULL; } /* * When reaching that point with a zero putq_entered counter, it means * we triggered an early error condition. Bail out. */ g_assert(q->putq_entered >= 0); if (q->putq_entered == 0) error = TRUE; else q->putq_entered--; mq_check(q, 0); /* * If we're exiting here with no other putq() registered, then we must * pop an item off the head of the list and iterate again. */ if (0 == q->putq_entered && !error) { mbe = slist_shift(q->qwait); if (mbe) { struct mq_udp_info *mi = pmsg_get_metadata(mbe); mb = mbe; /* An extended message "is-a" message */ to = &mi->to; if (debugging(20)) g_warning( "%s: %s flushing waiting to %s (%u still pending)", G_STRFUNC, mq_info(q), gnet_host_to_string(to), slist_length(q->qwait)); goto again; } } return; }
/** * Writes the browse host data of the context ``ctx'' to the buffer * ``dest''. This must be called multiple times to retrieve the complete * data until zero is returned i.e., the end of file is reached. * * This routine deals with query hit data generation. * * @param ctx an initialized browse host context. * @param dest the destination buffer. * @param size the amount of bytes ``dest'' can hold. * * @return -1 on failure, zero at the end-of-file condition or if size * was zero. On success, the amount of bytes copied to ``dest'' * is returned. */ static ssize_t browse_host_read_qhits(struct special_upload *ctx, void *const dest, size_t size) { struct browse_host_upload *bh = cast_to_browse_host_upload(ctx); size_t remain = size; char *p = dest; /* * If we have no hit pending that we can send, build some more. */ if (NULL == bh->hits) { GSList *files = NULL; int i; for (i = 0; i < BH_SCAN_AHEAD; i++) { const shared_file_t *sf; do { /* Skip holes in indices */ bh->file_index++; sf = shared_file_sorted(bh->file_index); } while (NULL == sf && bh->file_index <= shared_files_scanned()); if (SHARE_REBUILDING == sf || NULL == sf) break; files = g_slist_prepend(files, deconstify_pointer(sf)); } if (NULL == files) /* Did not find any more file to include */ return 0; /* We're done */ /* * Now build the query hits containing the files we selected. */ files = g_slist_reverse(files); /* Preserve order */ qhit_build_results(files, i, BH_MAX_QHIT_SIZE, browse_host_record_hit, bh, &blank_guid, FALSE, &zero_array); g_assert(bh->hits != NULL); /* At least 1 hit enqueued */ bh->hits = g_slist_reverse(bh->hits); /* Preserve order */ gm_slist_free_null(&files); } /* * Read each query hit in turn. */ while (remain > 0 && NULL != bh->hits) { pmsg_t *mb = bh->hits->data; int r; r = pmsg_read(mb, p, remain); p += r; remain -= r; if (r == 0 || 0 == pmsg_size(mb)) { bh->hits = g_slist_remove(bh->hits, mb); pmsg_free(mb); } } return size - remain; }
/** * Send datagram. * * @param us the UDP scheduler responsible for sending the datagram * @param mb the message to send * @param to the IP:port destination of the message * @param tx the TX stack sending the message * @param cb callback actions on the datagram * * @return 0 if message was unsent, length of message if sent, queued or * dropped. */ size_t udp_sched_send(udp_sched_t *us, pmsg_t *mb, const gnet_host_t *to, const txdrv_t *tx, const struct tx_dgram_cb *cb) { int len; struct udp_tx_desc *txd; uint prio; len = pmsg_size(mb); /* * Try to send immediately if we have bandwidth. */ if (!us->used_all && udp_sched_mb_sendto(us, mb, to, tx, cb)) return len; /* Message "sent" */ /* * If we already have enough data enqueued, flow-control the upper * layer by acting as if we do not have enough bandwidth. * * However, we now always accept traffic sent with the highest priority * since it is important to send those as soon as possible, i.e. ahead * of any other pending data we would otherwise flush locally before * servicing upper queues. * --RAM, 2012-10-12 */ prio = pmsg_prio(mb); if ( PMSG_P_HIGHEST != prio && us->buffered >= UDP_SCHED_FACTOR * udp_sched_bw_per_second(us) ) { udp_sched_log(1, "%p: flow-controlled", us); us->flow_controlled = TRUE; return 0; /* Flow control upper layers */ } /* * Message is going to be enqueued. * * However, from the upper layers (the message queue in particular), * the message is considered as being sent, and therefore these layers * are going to call pmsg_free() on the message. * * We do not want to pmsg_clone() the message because that would render * uses of pmsg_was_sent() useless in free routines, and upper layers * would think the message was dropped if they installed a free routine * on the message. * * Hence we use pmsg_ref(). */ txd = palloc(us->txpool); txd->magic = UDP_TX_DESC_MAGIC; txd->mb = pmsg_ref(mb); /* Take ownership of message */ txd->to = atom_host_get(to); txd->tx = tx; txd->cb = cb; txd->expire = time_advance(tm_time(), UDP_SCHED_EXPIRE); udp_sched_log(4, "%p: queuing mb=%p (%d bytes) prio=%u", us, mb, pmsg_size(mb), pmsg_prio(mb)); /* * The queue used is a LIFO to avoid buffering delaying all the messages. * Since UDP traffic is unordered, it's better to send the most recent * datagrams first, to reduce the perceived average latency. */ g_assert(prio < N_ITEMS(us->lifo)); eslist_prepend(&us->lifo[prio], txd); us->buffered = size_saturate_add(us->buffered, len); return len; /* Message queued, but tell upper layers it's sent */ }
/** * Write back cached value to disk. * @return TRUE on success */ static bool write_back(dbmw_t *dw, const void *key, struct cached *value) { dbmap_datum_t dval; bool ok; g_assert(value->dirty); if (value->absent) { /* Key not present, value is null item */ dval.data = NULL; dval.len = 0; } else { /* * Serialize value into our reused message block if a * serialization routine was provided. */ if (dw->pack) { pmsg_reset(dw->mb); (*dw->pack)(dw->mb, value->data); dval.data = pmsg_start(dw->mb); dval.len = pmsg_size(dw->mb); /* * We allocated the message block one byte larger than the * maximum size, in order to detect unexpected serialization * overflows. */ if (dval.len > dw->value_data_size) { /* Don't s_carp() as this is asynchronous wrt data change */ s_critical("DBMW \"%s\" serialization overflow in %s() " "whilst flushing dirty entry", dw->name, stacktrace_function_name(dw->pack)); return FALSE; } } else { dval.data = value->data; dval.len = value->len; } } /* * If cached entry is absent, delete the key. * Otherwise store the serialized value. * * Dirty bit is cleared on success. */ if ( dbg_ds_debugging(dw->dbg, 1, DBG_DSF_CACHING | DBG_DSF_UPDATE | DBG_DSF_INSERT | DBG_DSF_DELETE) ) { dbg_ds_log(dw->dbg, dw, "%s: %s dirty value (%zu byte%s) key=%s", G_STRFUNC, value->absent ? "deleting" : "flushing", dval.len, plural(dval.len), dbg_ds_keystr(dw->dbg, key, (size_t) -1)); } dw->ioerr = FALSE; ok = value->absent ? dbmap_remove(dw->dm, key) : dbmap_insert(dw->dm, key, dval); if (ok) { value->dirty = FALSE; } else if (dbmap_has_ioerr(dw->dm)) { dw->ioerr = TRUE; dw->error = errno; s_warning("DBMW \"%s\" I/O error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } else { s_warning("DBMW \"%s\" error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } return ok; }
/** * Send message block to IP:port. * * @param us the UDP scheduler * @param mb the message to send * @param to the IP:port destination of the message * @param tx the TX stack sending the message * @param cb callback actions on the datagram * * @return TRUE if message was sent or dropped, FALSE if there is no more * bandwidth to send anything. */ static bool udp_sched_mb_sendto(udp_sched_t *us, pmsg_t *mb, const gnet_host_t *to, const txdrv_t *tx, const struct tx_dgram_cb *cb) { ssize_t r; int len = pmsg_size(mb); bio_source_t *bio = NULL; if (0 == gnet_host_get_port(to)) return TRUE; /* * Check whether message still needs to be sent. */ if (!pmsg_hook_check(mb)) return TRUE; /* Dropped */ /* * Select the proper I/O source depending on the network address type. */ switch (gnet_host_get_net(to)) { case NET_TYPE_IPV4: bio = us->bio[UDP_SCHED_IPv4]; break; case NET_TYPE_IPV6: bio = us->bio[UDP_SCHED_IPv6]; break; case NET_TYPE_NONE: case NET_TYPE_LOCAL: g_assert_not_reached(); } /* * If there is no I/O source, then the socket to send that type of traffic * was cleared, hence we simply need to discard the message. */ if (NULL == bio) { udp_sched_log(4, "%p: discarding mb=%p (%d bytes) to %s", us, mb, pmsg_size(mb), gnet_host_to_string(to)); return udp_tx_drop(tx, cb); /* TRUE, for "sent" */ } /* * OK, proceed if we have bandwidth. */ r = bio_sendto(bio, to, pmsg_start(mb), len); if (r < 0) { /* Error, or no bandwidth */ if (udp_sched_write_error(us, to, mb, G_STRFUNC)) { udp_sched_log(4, "%p: dropped mb=%p (%d bytes): %m", us, mb, pmsg_size(mb)); return udp_tx_drop(tx, cb); /* TRUE, for "sent" */ } udp_sched_log(3, "%p: no bandwidth for mb=%p (%d bytes)", us, mb, pmsg_size(mb)); us->used_all = TRUE; return FALSE; } if (r != len) { g_warning("%s: partial UDP write (%zd bytes) to %s " "for %d-byte datagram", G_STRFUNC, r, gnet_host_to_string(to), len); } else { udp_sched_log(5, "%p: sent mb=%p (%d bytes) prio=%u", us, mb, pmsg_size(mb), pmsg_prio(mb)); pmsg_mark_sent(mb); if (cb->msg_account != NULL) (*cb->msg_account)(tx->owner, mb); inet_udp_record_sent(gnet_host_get_addr(to)); } return TRUE; /* Message sent */ }
/** * Service routine for UDP message queue. */ static void mq_udp_service(void *data) { mqueue_t *q = data; int r; GList *l; unsigned dropped = 0; mq_check(q, 0); g_assert(q->count); /* Queue is serviced, we must have something */ /* * Write as much as possible. */ for (l = q->qtail; l; /* empty */) { pmsg_t *mb = l->data; int mb_size = pmsg_size(mb); struct mq_udp_info *mi = pmsg_get_metadata(mb); if (!pmsg_check(mb, q)) { dropped++; goto skip; } r = tx_sendto(q->tx_drv, mb, &mi->to); if (r < 0) /* Error, drop packet and continue */ goto skip; if (r == 0) /* No more bandwidth */ break; g_assert(r == mb_size); node_add_tx_given(q->node, r); if (q->flags & MQ_FLOWC) q->flowc_written += r; /* * The UDP layer is non-reliable so the message could be dropped * later on by lower layers. * * Therefore, message statistics will be updated by a specific * accounting callback that is known to the datagram layer, such * as node_msg_accounting(). */ skip: if (q->qlink) q->cops->qlink_remove(q, l); /* drop the message from queue, will be freed by mq_rmlink_prev() */ l = q->cops->rmlink_prev(q, l, mb_size); } mq_check(q, 0); g_assert(q->size >= 0 && q->count >= 0); if (dropped) node_add_txdrop(q->node, dropped); /* Dropped during TX */ /* * Update flow-control information. */ q->cops->update_flowc(q); /* * If queue is empty, disable servicing. */ if (q->size == 0) { g_assert(q->count == 0); tx_srv_disable(q->tx_drv); node_tx_service(q->node, FALSE); } mq_check(q, 0); }
/** * Write back cached value to disk. * @return TRUE on success */ static gboolean write_back(dbmw_t *dw, gconstpointer key, struct cached *value) { dbmap_datum_t dval; gboolean ok; g_assert(value->dirty); if (value->absent) { /* Key not present, value is null item */ dval.data = NULL; dval.len = 0; } else { /* * Serialize value into our reused message block if a * serialization routine was provided. */ if (dw->pack) { pmsg_reset(dw->mb); (*dw->pack)(dw->mb, value->data); dval.data = pmsg_start(dw->mb); dval.len = pmsg_size(dw->mb); /* * We allocated the message block one byte larger than the * maximum size, in order to detect unexpected serialization * overflows. */ if (dval.len > dw->value_data_size) { /* Don't g_carp() as this is asynchronous wrt data change */ g_warning("DBMW \"%s\" serialization overflow in %s() " "whilst %s dirty entry", dw->name, stacktrace_routine_name(func_to_pointer(dw->pack), FALSE), value->absent ? "deleting" : "flushing"); return FALSE; } } else { dval.data = value->data; dval.len = value->len; } } /* * If cached entry is absent, delete the key. * Otherwise store the serialized value. * * Dirty bit is cleared on success. */ if (common_dbg > 4) g_debug("DBMW \"%s\" %s dirty value (%lu byte%s)", dw->name, value->absent ? "deleting" : "flushing", (unsigned long) dval.len, 1 == dval.len ? "" : "s"); dw->ioerr = FALSE; ok = value->absent ? dbmap_remove(dw->dm, key) : dbmap_insert(dw->dm, key, dval); if (ok) { value->dirty = FALSE; } else if (dbmap_has_ioerr(dw->dm)) { dw->ioerr = TRUE; dw->error = errno; g_warning("DBMW \"%s\" I/O error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } else { g_warning("DBMW \"%s\" error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } return ok; }
/** * Dechunk more data from the input buffer `mb'. * @returns dechunked data in a new buffer, or NULL if no more data. */ static pmsg_t * dechunk_data(rxdrv_t *rx, pmsg_t *mb) { struct attr *attr = rx->opaque; const char *error_str, *src; size_t size; /* * Prepare call to parse_chunk(). */ size = pmsg_size(mb); src = pmsg_read_base(mb); while (size > 0) { size_t ret; g_assert(CHUNK_STATE_ERROR != attr->state); /* * Copy avoidance: if the data we got fits into the current chunk size, * then we don't have to parse anything: all the data belong to the * current chunk, so we can simply pass them to the upper layer. */ if (CHUNK_STATE_DATA == attr->state) { pmsg_t *nmb; nmb = pmsg_clone(mb); if (size < attr->data_remain) { /* The complete chunk data is forwarded to the upper layer */ mb->m_rptr += size; attr->data_remain -= size; } else { /* Only the first ``data_remain'' bytes are forwarded */ mb->m_rptr += attr->data_remain; nmb->m_wptr = deconstify_pointer(&nmb->m_rptr[attr->data_remain]); attr->data_remain = 0; attr->state = CHUNK_STATE_DATA_CRLF; } if (GNET_PROPERTY(rx_debug) > 9) g_debug("dechunk_data: returning chunk of %u bytes", pmsg_size(nmb)); return nmb; } g_assert(size > 0); g_assert(CHUNK_STATE_DATA != attr->state); /* * Parse chunk headers */ ret = parse_chunk(rx, src, size, &error_str); if (0 == ret) { /* * We can't continue if we meet a dechunking error. Signal * our user so that the connection is terminated. */ errno = EIO; attr->cb->chunk_error(rx->owner, "dechunk() failed: %s", error_str); g_warning("dechunk_data(): %s", error_str); break; } g_assert(ret <= size); size -= ret; mb->m_rptr += ret; /* Read that far */ } return NULL; /* No more data */ }