/** * Called from the callout queue when the Nagle timer expires. * * If we can send the buffer, flush it and send it. Otherwise, reschedule. */ static void deflate_nagle_timeout(cqueue_t *cq, void *arg) { txdrv_t *tx = arg; struct attr *attr = tx->opaque; cq_zero(cq, &attr->tm_ev); if (-1 != attr->send_idx) { /* Send buffer still incompletely sent */ if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) buffer #%d unsent, exiting [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->send_idx, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } attr->tm_ev = cq_insert(attr->cq, BUFFER_NAGLE, deflate_nagle_timeout, tx); return; } attr->flags &= ~DF_NAGLE; if (tx_deflate_debugging(9)) { struct buffer *b = &attr->buf[attr->fill_idx]; /* Buffer to send */ g_debug("TX %s: (%s) flushing %zu bytes (buffer #%d) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), b->wptr - b->rptr, attr->fill_idx, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } deflate_flush_send(tx); }
/** * Log TX error if unusual. * * @return TRUE if the error was fatal, FALSE if it's a temporary error and * the message needs to be enqueued. */ static bool udp_sched_write_error(const udp_sched_t *us, const gnet_host_t *to, const pmsg_t *mb, const char *func) { (void) us; /* FIXME -- no longer used */ if (is_temporary_error(errno) || ENOBUFS == errno) return FALSE; switch (errno) { /* * The following are probably due to bugs in the libc, but this is in * the same vein as write() failing with -1 whereas errno == 0! Be more * robust against bugs in the components we rely on. --RAM, 09/10/2003 */ case EINPROGRESS: /* Weird, but seen it -- RAM, 07/10/2003 */ { g_warning("%s(to=%s, len=%d) failed with weird errno = %m -- " "assuming EAGAIN", func, gnet_host_to_string(to), pmsg_size(mb)); } break; case EPIPE: case ENOSPC: case ENOMEM: case EINVAL: /* Seen this with "reserved" IP addresses */ #ifdef EDQUOT case EDQUOT: #endif /* EDQUOT */ case EMSGSIZE: /* Message too large */ case EFBIG: case EIO: case EADDRNOTAVAIL: case ECONNABORTED: case ECONNRESET: case ECONNREFUSED: case ENETRESET: case ENETDOWN: case ENETUNREACH: case EHOSTDOWN: case EHOSTUNREACH: case ENOPROTOOPT: case EPROTONOSUPPORT: case ETIMEDOUT: case EACCES: case EPERM: /* * We don't care about lost packets. */ g_warning("%s(): UDP write of %d bytes to %s failed: %m", func, pmsg_size(mb), gnet_host_to_string(to)); break; default: g_critical("%s(): UDP write of %d bytes to %s failed " "with unexpected errno %d: %m", func, pmsg_size(mb), gnet_host_to_string(to), errno); break; } return TRUE; /* Fatal error */ }
/** * Make the "filling buffer" the buffer to send, and rotate filling buffers. * Attempt to write the new send buffer immediately. */ static void deflate_rotate_and_send(txdrv_t *tx) { struct attr *attr = tx->opaque; g_assert(-1 == attr->send_idx); /* No pending send */ /* * Cancel any pending Nagle timer. */ if (attr->flags & DF_NAGLE) deflate_nagle_stop(tx); /* * The buffer to send is the one we filled. */ attr->send_idx = attr->fill_idx; attr->fill_idx++; if (attr->fill_idx >= BUFFER_COUNT) attr->fill_idx = 0; if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) fill buffer now #%d [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->fill_idx, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } deflate_send(tx); }
/** * Get rid of the driver's private data. */ static void tx_deflate_destroy(txdrv_t *tx) { struct attr *attr = tx->opaque; int i; int ret; g_assert(attr->outz); for (i = 0; i < BUFFER_COUNT; i++) { struct buffer *b = &attr->buf[i]; wfree(b->arena, attr->buffer_size); } /* * We ignore Z_DATA_ERROR errors (discarded data, probably). */ ret = deflateEnd(attr->outz); if (Z_OK != ret && Z_DATA_ERROR != ret) g_warning("while freeing compressor for peer %s: %s", gnet_host_to_string(&tx->host), zlib_strerror(ret)); WFREE(attr->outz); cq_cancel(&attr->tm_ev); WFREE(attr); }
/** * Write I/O vector. * * @return amount of bytes written, or -1 on error. */ static ssize_t tx_deflate_writev(txdrv_t *tx, iovec_t *iov, int iovcnt) { struct attr *attr = tx->opaque; int sent = 0; if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) (buffer #%d, nagle %s, unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } while (iovcnt-- > 0) { int ret; /* * If we're flow controlled or shut down, stop sending. */ if (attr->flags & (DF_FLOWC|DF_SHUTDOWN)) break; ret = deflate_add(tx, iovec_base(iov), iovec_len(iov)); if (-1 == ret) return -1; sent += ret; if (UNSIGNED(ret) < iovec_len(iov)) { /* Could not write all, flow-controlled */ break; } iov++; } if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) sent %lu bytes (buffer #%d, nagle %s, " "unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), (ulong) sent, attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } return sent; }
/** * Send message (eslist iterator callback). * * @return TRUE if message was sent and freed up. */ static bool udp_tx_desc_send(void *data, void *udata) { struct udp_tx_desc *txd = data; udp_sched_t *us = udata; unsigned prio; udp_sched_check(us); udp_tx_desc_check(txd); if (us->used_all) return FALSE; /* * Avoid flushing consecutive queued messages to the same destination, * for regular (non-prioritary) messages. * * This serves two purposes: * * 1- It makes sure one single host does not capture all the available * outgoing bandwidth. * * 2- It somehow delays consecutive packets to a given host thereby reducing * flooding and hopefully avoiding saturation of its RX flow. */ prio = pmsg_prio(txd->mb); if (PMSG_P_DATA == prio && hset_contains(us->seen, txd->to)) { udp_sched_log(2, "%p: skipping mb=%p (%d bytes) to %s", us, txd->mb, pmsg_size(txd->mb), gnet_host_to_string(txd->to)); return FALSE; } if (udp_sched_mb_sendto(us, txd->mb, txd->to, txd->tx, txd->cb)) { if (PMSG_P_DATA == prio && pmsg_was_sent(txd->mb)) hset_insert(us->seen, atom_host_get(txd->to)); } else { return FALSE; /* Unsent, leave it in the queue */ } us->buffered = size_saturate_sub(us->buffered, pmsg_size(txd->mb)); udp_tx_desc_flag_release(txd, us); return TRUE; }
/** * @return A halloc()ed string. */ static char * proxy_sequence_to_string(const sequence_t *s) { str_t *str; str = str_new(0); if (!sequence_is_empty(s)) { sequence_iter_t *iter; iter = sequence_forward_iterator(s); while (sequence_iter_has_next(iter)) { gnet_host_t *host = sequence_iter_next(iter); str_putc(str, ':'); str_cat(str, gnet_host_to_string(host)); } sequence_iterator_release(&iter); } return str_s2c_null(&str); }
/** * Write data buffer. * * @return amount of bytes written, or -1 on error. */ static ssize_t tx_deflate_write(txdrv_t *tx, const void *data, size_t len) { struct attr *attr = tx->opaque; if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) (buffer #%d, nagle %s, unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * If we're flow controlled or shut down, don't accept anything. */ if (attr->flags & (DF_FLOWC|DF_SHUTDOWN)) return 0; return deflate_add(tx, data, len); }
/** * Enter or leave flow-control. */ static void deflate_set_flowc(txdrv_t *tx, bool on) { struct attr *attr = tx->opaque; if (on) { attr->flags |= DF_FLOWC; /* Enter flow control */ } else { attr->flags &= ~DF_FLOWC; /* Leave flow control state */ } if (tx_deflate_debugging(4)) { g_debug("TX %s: (%s) %s flow-control [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), on ? "entering" : "leaving", (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } if (NULL != attr->cb->flow_control) attr->cb->flow_control(tx->owner, on ? deflate_buffered(tx) : 0); }
/** * Pending data were all flushed. */ static void deflate_flushed(txdrv_t *tx) { struct attr *attr = tx->opaque; double flush = 0.0; g_assert(size_is_non_negative(attr->unflushed)); attr->total_input += attr->unflushed; attr->total_output += attr->flushed; g_return_unless(attr->total_input != 0); attr->ratio = 1.0 - ((double) attr->total_output / attr->total_input); if (0 != attr->unflushed) { /* * Fast EMA for compression ratio is computed for the last n=3 flushes, * so the smoothing factor sm=2/(n+1) is 1/2. */ flush = 1.0 - ((double) attr->flushed / attr->unflushed); attr->ratio_ema += (flush / 2.0) - (attr->ratio_ema / 2.0); } if (tx_deflate_debugging(4)) { g_debug("TX %s: (%s) deflated %zu bytes into %zu " "(%.2f%%, EMA=%.2f%%, overall %.2f%%)", G_STRFUNC, gnet_host_to_string(&tx->host), attr->unflushed, attr->flushed, 100 * flush, 100 * attr->ratio_ema, 100 * attr->ratio); } attr->unflushed = attr->flushed = 0; attr->flags &= ~DF_FLUSH; }
/** * Compress as much data as possible to the output buffer, sending data * as we go along. * * @return the amount of input bytes that were consumed ("added"), -1 on error. */ static int deflate_add(txdrv_t *tx, const void *data, int len) { struct attr *attr = tx->opaque; z_streamp outz = attr->outz; int added = 0; if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) given %u bytes (buffer #%d, nagle %s, " "unflushed %zu) [%c%c]%s", G_STRFUNC, gnet_host_to_string(&tx->host), len, attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-', (tx->flags & TX_ERROR) ? " ERROR" : ""); } /* * If an error was already reported, the whole deflate stream is dead * and we cannot accept any more data. */ if G_UNLIKELY(tx->flags & TX_ERROR) return -1; while (added < len) { struct buffer *b = &attr->buf[attr->fill_idx]; /* Buffer we fill */ int ret; int old_added = added; bool flush_started = (attr->flags & DF_FLUSH) ? TRUE : FALSE; int old_avail; const char *in, *old_in; /* * Prepare call to deflate(). */ outz->next_out = cast_to_pointer(b->wptr); outz->avail_out = old_avail = b->end - b->wptr; in = data; old_in = &in[added]; outz->next_in = deconstify_pointer(old_in); outz->avail_in = len - added; g_assert(outz->avail_out > 0); g_assert(outz->avail_in > 0); /* * Compress data. * * If we previously started to flush, continue the operation, now * that we have more room available for the output. */ ret = deflate(outz, flush_started ? Z_SYNC_FLUSH : 0); if (Z_OK != ret) { attr->flags |= DF_SHUTDOWN; (*attr->cb->shutdown)(tx->owner, "Compression failed: %s", zlib_strerror(ret)); return -1; } /* * Update the parameters. */ b->wptr = cast_to_pointer(outz->next_out); added = ptr_diff(outz->next_in, in); g_assert(added >= old_added); attr->unflushed += added - old_added; attr->flushed += old_avail - outz->avail_out; if (NULL != attr->cb->add_tx_deflated) attr->cb->add_tx_deflated(tx->owner, old_avail - outz->avail_out); if (attr->gzip.enabled) { size_t r; r = ptr_diff(outz->next_in, old_in); attr->gzip.size += r; attr->gzip.crc = crc32(attr->gzip.crc, cast_to_constpointer(old_in), r); } if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) deflated %d bytes into %d " "(buffer #%d, nagle %s, flushed %zu, unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), added, old_avail - outz->avail_out, attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->flushed, attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * If we filled the output buffer, check whether we have a pending * send buffer. If we do, we cannot process more data. Otherwise * send it now and continue. */ if (0 == outz->avail_out) { if (attr->send_idx >= 0) { deflate_set_flowc(tx, TRUE); /* Enter flow control */ return added; } deflate_rotate_and_send(tx); /* Can set TX_ERROR */ if (tx->flags & TX_ERROR) return -1; } /* * If we were flushing and we consumed all the input, then * the flush is done and we're starting normal compression again. * * This must be done after we made sure that we had enough output * space avaialable. */ if (flush_started && 0 == outz->avail_in) deflate_flushed(tx); } g_assert(0 == outz->avail_in); /* * Start Nagle if not already on. */ if (attr->flags & DF_NAGLE) deflate_nagle_delay(tx); else deflate_nagle_start(tx); /* * We're going to ask for a flush if not already started yet and the * amount of bytes we have written since the last flush is greater * than attr->buffer_flush. */ if (attr->unflushed > attr->buffer_flush) { if (!deflate_flush(tx)) return -1; } return added; }
/** * Enqueue message, which becomes owned by the queue. * * The data held in `to' is copied, so the structure can be reclaimed * immediately by the caller. */ void mq_udp_putq(mqueue_t *q, pmsg_t *mb, const gnet_host_t *to) { size_t size; char *mbs; uint8 function; pmsg_t *mbe = NULL; /* Extended message with destination info */ bool error = FALSE; mq_check_consistency(q); dump_tx_udp_packet(to, mb); again: mq_check_consistency(q); g_assert(mb); g_assert(!pmsg_was_sent(mb)); g_assert(pmsg_is_unread(mb)); g_assert(q->ops == &mq_udp_ops); /* Is an UDP queue */ /* * Trap messages enqueued whilst in the middle of an mq_clear() operation * by marking them as sent and dropping them. Idem if queue was * put in "discard" mode. */ if (q->flags & (MQ_CLEAR | MQ_DISCARD)) { pmsg_mark_sent(mb); /* Let them think it was sent */ pmsg_free(mb); /* Drop message */ return; } mq_check(q, 0); size = pmsg_size(mb); if (size == 0) { g_carp("%s: called with empty message", G_STRFUNC); goto cleanup; } /* * Protect against recursion: we must not invoke puthere() whilst in * the middle of another putq() or we would corrupt the qlink array: * Messages received during recursion are inserted into the qwait list * and will be stuffed back into the queue when the initial putq() ends. * --RAM, 2006-12-29 */ if (q->putq_entered > 0) { pmsg_t *extended; if (debugging(20)) g_warning("%s: %s recursion detected (%u already pending)", G_STRFUNC, mq_info(q), slist_length(q->qwait)); /* * We insert extended messages into the waiting queue since we need * the destination information as well. */ extended = mq_udp_attach_metadata(mb, to); slist_append(q->qwait, extended); return; } q->putq_entered++; mbs = pmsg_start(mb); function = gmsg_function(mbs); gnet_stats_count_queued(q->node, function, mbs, size); /* * If queue is empty, attempt a write immediatly. */ if (q->qhead == NULL) { ssize_t written; if (pmsg_check(mb, q)) { written = tx_sendto(q->tx_drv, mb, to); } else { gnet_stats_count_flowc(mbs, FALSE); node_inc_txdrop(q->node); /* Dropped during TX */ written = (ssize_t) -1; } if ((ssize_t) -1 == written) goto cleanup; node_add_tx_given(q->node, written); if ((size_t) written == size) { if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP sent %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); goto cleanup; } /* * Since UDP respects write boundaries, the following can never * happen in practice: either we write the whole datagram, or none * of it. */ if (written > 0) { g_warning( "partial UDP write (%zu bytes) to %s for %zu-byte datagram", written, gnet_host_to_string(to), size); goto cleanup; } /* FALL THROUGH */ } if (GNET_PROPERTY(mq_udp_debug) > 5) g_debug("MQ UDP queued %s", gmsg_infostr_full(pmsg_start(mb), pmsg_written_size(mb))); /* * Attach the destination information as metadata to the message, unless * it is already known (possible only during unfolding of the queued data * during re-entrant calls). * * This is later extracted via pmsg_get_metadata() on the extended * message by the message queue to get the destination information. * * Then enqueue the extended message. */ if (NULL == mbe) mbe = mq_udp_attach_metadata(mb, to); q->cops->puthere(q, mbe, size); mb = NULL; /* FALL THROUGH */ cleanup: if (mb) { pmsg_free(mb); mb = NULL; } /* * When reaching that point with a zero putq_entered counter, it means * we triggered an early error condition. Bail out. */ g_assert(q->putq_entered >= 0); if (q->putq_entered == 0) error = TRUE; else q->putq_entered--; mq_check(q, 0); /* * If we're exiting here with no other putq() registered, then we must * pop an item off the head of the list and iterate again. */ if (0 == q->putq_entered && !error) { mbe = slist_shift(q->qwait); if (mbe) { struct mq_udp_info *mi = pmsg_get_metadata(mbe); mb = mbe; /* An extended message "is-a" message */ to = &mi->to; if (debugging(20)) g_warning( "%s: %s flushing waiting to %s (%u still pending)", G_STRFUNC, mq_info(q), gnet_host_to_string(to), slist_length(q->qwait)); goto again; } } return; }
/** * Start a G2 RPC with the specified host. * * @param host the host to which message is sent * @param mb the message to send * @param cb if non-NULL, callback to invoke on reply or timeout * @param arg additional callback argument * @param timeout amount of seconds before timeout * * @return TRUE if we initiated the RPC, FALSE if another of the same * kind was already in progress with the host. */ bool g2_rpc_launch(const gnet_host_t *host, pmsg_t *mb, g2_rpc_cb_t cb, void *arg, unsigned timeout) { struct g2_rpc *gr; struct g2_rpc_key key; gnutella_node_t *n; key.type = g2_msg_type_mb(mb); key.addr = gnet_host_get_addr(host); /* * Because there is no MUID in /PI and /QKR messages, we cannot use that * as a key to detect the RPC reply. Therefore, we use the message type * and the IP address of the host. When a /PO or /QKA comes back, we'll * be able to see whether we had a pending RPC from that host for that * type of transaction. * * The downside is that we can only have one pending RPC at a time of * a given kind towards a given IP address. We don't use the port in * the key because we cannot assume the reply will come from the same port * we sent the message to, if the remote host is behind NAT or does not * use its listening UDP socket to reply. */ if (hevset_contains(g2_rpc_pending, &key)) { if (GNET_PROPERTY(g2_rpc_debug)) { g_debug("%s(): cannot issue /%s RPC to %s: concurrent request", G_STRFUNC, g2_msg_type_name(key.type), gnet_host_to_string(host)); } return FALSE; } /* * Make sure the node is valid. */ n = node_udp_g2_get_addr_port(key.addr, gnet_host_get_port(host)); if (NULL == n) { if (GNET_PROPERTY(g2_rpc_debug)) { g_debug("%s(): cannot issue /%s RPC to %s: cannot get G2 node", G_STRFUNC, g2_msg_type_name(key.type), gnet_host_to_string(host)); } return FALSE; /* Invalid node, or G2 disabled */ } /* * Good, we can issue the RPC. */ WALLOC(gr); gr->magic = G2_RPC_MAGIC; gr->key = key; /* struct copy */ gr->cb = cb; gr->arg = arg; gr->timeout_ev = cq_main_insert(timeout * 1000, g2_rpc_timeout, gr); hevset_insert(g2_rpc_pending, gr); if (GNET_PROPERTY(g2_rpc_debug) > 1) { g_debug("%s(): issuing /%s RPC to %s, timeout %u sec%s", G_STRFUNC, g2_msg_type_name(key.type), gnet_host_to_string(host), timeout, plural(timeout)); } /* * Do not send RPCs reliably: this can cause problems if we don't receive * the ACK backm yet the message was received and processed remotely: the * remote host will send a reply back and the message will still appear to * be "unsent" locally. * * Furthermore, this alleviates the need for the remote side to actually * acknowledge the request: targeted hosts can be busy so it's best to * make the RPC "unreliable" to limit processing and bandwidth requirements. */ g2_node_send(n, mb); return TRUE; }
static inline int tx_dgram_write_error(txdrv_t *tx, const gnet_host_t *to, const char *func) { if (is_temporary_error(errno) || ENOBUFS == errno) return 0; switch (errno) { /* * The following are probably due to bugs in the libc, but this is in * the same vein as write() failing with -1 whereas errno == 0! Be more * robust against bugs in the components we rely on. --RAM, 09/10/2003 */ case EINPROGRESS: /* Weird, but seen it -- RAM, 07/10/2003 */ { const struct attr *attr = tx->opaque; g_warning("%s(fd=%d) failed with weird errno = %d (%s), " "assuming EAGAIN", func, attr->wio->fd(attr->wio), errno, g_strerror(errno)); } return 0; case EPIPE: case ENOSPC: case ENOMEM: case EINVAL: /* Seen this with "reserved" IP addresses */ #ifdef EDQUOT case EDQUOT: #endif /* EDQUOT */ case EFBIG: case EIO: case EADDRNOTAVAIL: case ECONNABORTED: case ECONNRESET: case ECONNREFUSED: case ENETRESET: case ENETDOWN: case ENETUNREACH: case EHOSTDOWN: case EHOSTUNREACH: case ENOPROTOOPT: case EPROTONOSUPPORT: case ETIMEDOUT: case EACCES: case EPERM: /* * Don't set TX_ERROR here, we don't care about lost packets. */ g_warning("UDP write to %s failed: %s", gnet_host_to_string(to), g_strerror(errno)); return -1; default: { int terr = errno; tx->flags |= TX_ERROR; /* This should be fatal! */ g_error("%s: UDP write to %s failed with unexpected errno: %d (%s)", func, gnet_host_to_string(to), terr, g_strerror(terr)); } } return 0; /* Just in case */ }
/** * Close the layer, flushing all the data there is. * Once this is done, invoke the supplied callback. */ static void tx_deflate_close(txdrv_t *tx, tx_closed_t cb, void *arg) { struct attr *attr = tx->opaque; g_assert(tx->flags & TX_CLOSING); if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) send=%d buffer #%d, nagle %s, " "unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->send_idx, attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * Flush whatever we can. */ tx_deflate_flush(tx); if (attr->gzip.enabled && 0 == tx_deflate_pending(tx)) { /* See RFC 1952 - GZIP file format specification version 4.3 */ struct buffer *b; uint32 trailer[2]; /* 0: CRC32, 1: SIZE % (1 << 32) */ /* We don't want to send the trailer more than once */ attr->gzip.enabled = FALSE; attr->send_idx = 0; b = &attr->buf[attr->send_idx]; poke_le32(&trailer[0], (uint32) attr->gzip.crc); poke_le32(&trailer[1], attr->gzip.size); g_assert(sizeof trailer <= (size_t) (b->end - b->wptr)); b->wptr = mempcpy(b->wptr, trailer, sizeof trailer); deflate_send(tx); } if (0 == tx_deflate_pending(tx)) { if (tx_deflate_debugging(9)) { g_debug("TX %s: flushed everything immediately", G_STRFUNC); } (*cb)(tx, arg); return; } /* * We were unable to flush everything. */ attr->closed = cb; attr->closed_arg = arg; if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) delayed! send=%d buffer #%d, nagle %s, " "flushed %zu, unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), attr->send_idx, attr->fill_idx, (attr->flags & DF_NAGLE) ? "on" : "off", attr->flushed, attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } }
/** * Initialize the driver. * * @return NULL if there is an initialization problem. */ static void * tx_deflate_init(txdrv_t *tx, void *args) { struct attr *attr; struct tx_deflate_args *targs = args; z_streamp outz; int ret; int i; g_assert(tx); g_assert(NULL != targs->cb); WALLOC(outz); outz->zalloc = zlib_alloc_func; outz->zfree = zlib_free_func; outz->opaque = NULL; /* * Reduce memory requirements for deflation when running as an ultrapeer. * * Memory used for deflation is: * * (1 << (window_bits +2)) + (1 << (mem_level + 9)) * * For leaves, we use window_bits = 15 and mem_level = 9, which makes * for 128 KiB + 256 KiB = 384 KiB per connection (TX side). * * For ultra peers, we use window_bits = 14 and mem_level = 6, so this * uses 64 KiB + 32 KiB = 96 KiB only. * * Since ultra peers have many more connections than leaves, the memory * savings are drastic, yet compression levels remain around 50% (varies * depending on the nature of the traffic, of course). * * --RAM, 2009-04-09 * * For Ultra <-> Ultra connections we use window_bits = 15 and mem_level = 9 * and request a best compression because the amount of ultra connections * is far less than the number of leaf connections and modern machines * can cope with a "best" compression overhead. * * This is now controlled with the "reduced" argument, so this layer does * not need to know whether we're an ultra node or even what an ultra * node is... It just knows whether we have to setup a fully compressed * connection or a reduced one (both in terms of memory usage and level * of compression). * * --RAM, 2011-11-29 */ { int window_bits = MAX_WBITS; /* Must be 8 .. MAX_WBITS */ int mem_level = MAX_MEM_LEVEL; /* Must be 1 .. MAX_MEM_LEVEL */ int level = Z_BEST_COMPRESSION; if (targs->reduced) { /* Ultra -> Leaf connection */ window_bits = 14; mem_level = 6; level = Z_DEFAULT_COMPRESSION; } g_assert(window_bits >= 8 && window_bits <= MAX_WBITS); g_assert(mem_level >= 1 && mem_level <= MAX_MEM_LEVEL); g_assert(level == Z_DEFAULT_COMPRESSION || (level >= Z_BEST_SPEED && level <= Z_BEST_COMPRESSION)); ret = deflateInit2(outz, level, Z_DEFLATED, targs->gzip ? (-window_bits) : window_bits, mem_level, Z_DEFAULT_STRATEGY); } if (Z_OK != ret) { g_warning("unable to initialize compressor for peer %s: %s", gnet_host_to_string(&tx->host), zlib_strerror(ret)); WFREE(outz); return NULL; } WALLOC0(attr); attr->cq = targs->cq; attr->cb = targs->cb; attr->buffer_size = targs->buffer_size; attr->buffer_flush = targs->buffer_flush; attr->nagle = booleanize(targs->nagle); attr->gzip.enabled = targs->gzip; attr->outz = outz; attr->tm_ev = NULL; for (i = 0; i < BUFFER_COUNT; i++) { struct buffer *b = &attr->buf[i]; b->arena = b->wptr = b->rptr = walloc(attr->buffer_size); b->end = &b->arena[attr->buffer_size]; } attr->fill_idx = 0; attr->send_idx = -1; /* Signals: none ready */ if (attr->gzip.enabled) { /* See RFC 1952 - GZIP file format specification version 4.3 */ static const unsigned char header[] = { 0x1f, 0x8b, /* gzip magic */ 0x08, /* compression method: deflate */ 0, /* flags: none */ 0, 0, 0, 0, /* modification time: unavailable */ 0, /* extra flags: none */ 0xff, /* filesystem: unknown */ }; struct buffer *b; b = &attr->buf[attr->fill_idx]; /* Buffer we fill */ g_assert(sizeof header <= (size_t) (b->end - b->wptr)); b->wptr = mempcpy(b->wptr, header, sizeof header); attr->gzip.crc = crc32(0, NULL, 0); attr->gzip.size = 0; } tx->opaque = attr; /* * Register our service routine to the lower layer. */ tx_srv_register(tx->lower, deflate_service, tx); return tx; /* OK */ }
/** * Write ready-to-be-sent buffer to the lower layer. */ static void deflate_send(txdrv_t *tx) { struct attr *attr = tx->opaque; struct buffer *b; size_t len; /**< Amount of bytes to send */ ssize_t r; g_assert(attr->send_idx >= 0); /* We have something to send */ g_assert(attr->send_idx < BUFFER_COUNT); /* * Compute data to be sent. */ b = &attr->buf[attr->send_idx]; /* Buffer to send */ len = b->wptr - b->rptr; g_assert(len > 0 && len <= INT_MAX); /* * Write as much as possible. */ r = tx_write(tx->lower, b->rptr, len); if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) wrote %zu/%zu bytes (buffer #%d) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), r, len, attr->send_idx, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } if ((ssize_t) -1 == r) { tx_error(tx); return; } /* * If we wrote everything, we're done. */ if ((size_t) r == len) { if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) buffer #%d is empty", G_STRFUNC, gnet_host_to_string(&tx->host), attr->send_idx); } attr->send_idx = -1; /* Signals: is now free */ b->wptr = b->rptr = b->arena; /* Buffer is now empty */ return; } /* * We were unable to send the whole buffer. Enable servicing when * the lower layer will be ready for more input. */ b->rptr += r; g_assert(b->rptr < b->wptr); /* We haven't written everything */ tx_srv_enable(tx->lower); }
/** * Send message block to IP:port. * * @param us the UDP scheduler * @param mb the message to send * @param to the IP:port destination of the message * @param tx the TX stack sending the message * @param cb callback actions on the datagram * * @return TRUE if message was sent or dropped, FALSE if there is no more * bandwidth to send anything. */ static bool udp_sched_mb_sendto(udp_sched_t *us, pmsg_t *mb, const gnet_host_t *to, const txdrv_t *tx, const struct tx_dgram_cb *cb) { ssize_t r; int len = pmsg_size(mb); bio_source_t *bio = NULL; if (0 == gnet_host_get_port(to)) return TRUE; /* * Check whether message still needs to be sent. */ if (!pmsg_hook_check(mb)) return TRUE; /* Dropped */ /* * Select the proper I/O source depending on the network address type. */ switch (gnet_host_get_net(to)) { case NET_TYPE_IPV4: bio = us->bio[UDP_SCHED_IPv4]; break; case NET_TYPE_IPV6: bio = us->bio[UDP_SCHED_IPv6]; break; case NET_TYPE_NONE: case NET_TYPE_LOCAL: g_assert_not_reached(); } /* * If there is no I/O source, then the socket to send that type of traffic * was cleared, hence we simply need to discard the message. */ if (NULL == bio) { udp_sched_log(4, "%p: discarding mb=%p (%d bytes) to %s", us, mb, pmsg_size(mb), gnet_host_to_string(to)); return udp_tx_drop(tx, cb); /* TRUE, for "sent" */ } /* * OK, proceed if we have bandwidth. */ r = bio_sendto(bio, to, pmsg_start(mb), len); if (r < 0) { /* Error, or no bandwidth */ if (udp_sched_write_error(us, to, mb, G_STRFUNC)) { udp_sched_log(4, "%p: dropped mb=%p (%d bytes): %m", us, mb, pmsg_size(mb)); return udp_tx_drop(tx, cb); /* TRUE, for "sent" */ } udp_sched_log(3, "%p: no bandwidth for mb=%p (%d bytes)", us, mb, pmsg_size(mb)); us->used_all = TRUE; return FALSE; } if (r != len) { g_warning("%s: partial UDP write (%zd bytes) to %s " "for %d-byte datagram", G_STRFUNC, r, gnet_host_to_string(to), len); } else { udp_sched_log(5, "%p: sent mb=%p (%d bytes) prio=%u", us, mb, pmsg_size(mb), pmsg_prio(mb)); pmsg_mark_sent(mb); if (cb->msg_account != NULL) (*cb->msg_account)(tx->owner, mb); inet_udp_record_sent(gnet_host_get_addr(to)); } return TRUE; /* Message sent */ }
/** * Flush compression within filling buffer. * * @return success status, failure meaning we shutdown. */ static bool deflate_flush(txdrv_t *tx) { struct attr *attr = tx->opaque; z_streamp outz = attr->outz; struct buffer *b; int ret; int old_avail; retry: b = &attr->buf[attr->fill_idx]; /* Buffer we fill */ if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) flushing %zu bytes " "(buffer #%d, flushed %zu, unflushed %zu) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), b->wptr - b->rptr, attr->fill_idx, attr->flushed, attr->unflushed, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * Prepare call to deflate(). * * We force avail_in to 0, and don't touch next_in: no input should * be consumed. */ outz->next_out = cast_to_pointer(b->wptr); outz->avail_out = old_avail = b->end - b->wptr; outz->avail_in = 0; g_assert(outz->avail_out > 0); ret = deflate(outz, (tx->flags & TX_CLOSING) ? Z_FINISH : Z_SYNC_FLUSH); switch (ret) { case Z_BUF_ERROR: /* Nothing to flush */ goto done; case Z_OK: case Z_STREAM_END: break; default: attr->flags |= DF_SHUTDOWN; tx_error(tx); /* XXX: The callback must not destroy the tx! */ (*attr->cb->shutdown)(tx->owner, "Compression flush failed: %s", zlib_strerror(ret)); return FALSE; } { size_t written; written = old_avail - outz->avail_out; b->wptr += written; attr->flushed += written; if (NULL != attr->cb->add_tx_deflated) attr->cb->add_tx_deflated(tx->owner, written); } /* * Check whether avail_out is 0. * * If it is, then we lacked room to complete the flush. Try to send the * buffer and continue. */ if (0 == outz->avail_out) { if (attr->send_idx >= 0) { /* Send buffer not sent yet */ attr->flags |= DF_FLUSH; /* In flush mode */ deflate_set_flowc(tx, TRUE); /* Starting flow-control */ return TRUE; } deflate_rotate_and_send(tx); /* Can set TX_ERROR */ if (tx->flags & TX_ERROR) return FALSE; goto retry; } done: deflate_flushed(tx); return TRUE; /* Fully flushed */ }
/** * Service routine for the compressing stage. * * Called by lower layer when it is ready to process more data. */ static void deflate_service(void *data) { txdrv_t *tx = data; struct attr *attr = tx->opaque; struct buffer *b; g_assert(attr->send_idx < BUFFER_COUNT); if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) %s(buffer #%d, %zu bytes held) [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), (tx->flags & TX_ERROR) ? "ERROR " : "", attr->send_idx, attr->send_idx >= 0 ? (attr->buf[attr->send_idx].wptr - attr->buf[attr->send_idx].rptr) : 0, (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * First, attempt to transmit the whole send buffer, if any pending. */ if (attr->send_idx >= 0) deflate_send(tx); /* Send buffer `send_idx' */ if (attr->send_idx >= 0) /* Could not send it entirely */ return; /* Done, servicing still enabled */ /* * NB: In the following operations, order matters. In particular, we * must disable the servicing before attempting to service the upper * layer, since the data it will send us can cause us to flow control * and re-enable the servicing. * * If the `fill' buffer is full, try to send it now. */ b = &attr->buf[attr->fill_idx]; /* Buffer we fill */ if (b->wptr >= b->end) { if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) sending fill buffer #%d, %zu bytes", G_STRFUNC, gnet_host_to_string(&tx->host), attr->fill_idx, b->wptr - b->rptr); } deflate_rotate_and_send(tx); /* Can set TX_ERROR */ if (tx->flags & TX_ERROR) return; } /* * If we were able to send the whole send buffer, disable servicing. */ if (-1 == attr->send_idx) tx_srv_disable(tx->lower); /* * If we entered flow control, we can now safely leave it, since we * have at least a free `fill' buffer. */ if (attr->flags & DF_FLOWC) deflate_set_flowc(tx, FALSE); /* Leave flow control state */ /* * If closing, we're done once we have flushed everything we could. * There's no need to even bother with the upper layer: if we're * closing, we won't accept any further data to write anyway. */ if (tx->flags & TX_CLOSING) { deflate_flush_send(tx); if (tx->flags & TX_ERROR) return; if (0 == tx_deflate_pending(tx)) { (*attr->closed)(tx, attr->closed_arg); return; } } if (tx_deflate_debugging(9)) { g_debug("TX %s: (%s) %sdone locally [%c%c]", G_STRFUNC, gnet_host_to_string(&tx->host), (tx->flags & TX_ERROR) ? "ERROR " : "", (attr->flags & DF_FLOWC) ? 'C' : '-', (attr->flags & DF_FLUSH) ? 'f' : '-'); } /* * If upper layer wants servicing, do it now. * Note that this can put us back into flow control. */ if (tx->flags & TX_SERVICE) { g_assert(tx->srv_routine); tx->srv_routine(tx->srv_arg); } }
/** * Decodes "chunked" data. * * The function returns as soon as it needs more data to proceed, on * error, if the state CHUNK_STATE_END was reached, or if the state * CHUNK_STATE_DATA was reached. In the latter case the chunk payload * itself must be consumed and this function must not be called again * until the state CHUNK_STATE_DATA_CRLF is reached. * * @param rx the current RX driver. * @param src the chunk data. * @param size no document. * @param p_error_str if not NULL and parse_chunk() fails, it will point * to an informational error message. * * @return 0 on failure; non-zero amount of consumed bytes on success. */ static size_t parse_chunk(rxdrv_t *rx, const char *src, size_t size, const char **p_error_str) { struct attr *attr = rx->opaque; const char *error_str; size_t len; g_assert(attr); g_assert(src); g_assert(size > 0); g_assert(attr->state < NUM_CHUNK_STATES); g_assert(0 == attr->data_remain); len = size; do { switch (attr->state) { case CHUNK_STATE_DATA_CRLF: /* The chunk-data must be followed by a CRLF */ while (len > 0) { uchar c; len--; c = *src++; if ('\r' == c) { /* * This allows more than one CR but we must consume * some data or keep state over this otherwise. */ continue; } else if ('\n' == c) { attr->state = CHUNK_STATE_SIZE; break; } else { /* * Normally it is an error, there should be CRLF after * the chunk data. However, they might have forgotten * to send the '\n' or the whole sequence. * * If what follows looks like a valid chunk size, then * we should be able to resync properly: Unread the * character and move on to the chunk size decoding. */ if (!(attr->flags & IF_NO_CRLF)) { attr->flags |= IF_NO_CRLF; g_warning("Host %s forgot CRLF after data", gnet_host_to_string(&rx->host)); } len++; src--; attr->state = CHUNK_STATE_SIZE; break; } } break; case CHUNK_STATE_SIZE: g_assert(attr->hex_pos < sizeof attr->hex_buf); while (len > 0) { uchar c; len--; c = *src++; if (is_ascii_xdigit(c)) { if (attr->hex_pos >= sizeof attr->hex_buf) { error_str = "Overflow in chunk-size"; goto error; } /* Collect up to 16 hex characters */ attr->hex_buf[attr->hex_pos++] = c; } else { /* * There might be a chunk-extension after the * hexadecimal chunk-size but there shouldn't * anything else. */ if ( 0 == attr->hex_pos || (!is_ascii_space(c) && ';' != c) ) { error_str = "Bad chunk-size"; goto error; } attr->state = CHUNK_STATE_EXT; break; } } break; case CHUNK_STATE_EXT: /* Just skip over the chunk-extension */ while (len > 0) { len--; if ('\n' == *src++) { /* * Pick up the collected hex digits and * calculate the chunk-size. */ g_assert(attr->hex_pos > 0); g_assert(attr->hex_pos <= sizeof attr->hex_buf); { uint64 v = 0; uint i; for (i = 0; i < attr->hex_pos; i++) v = (v << 4) | hex2int_inline(attr->hex_buf[i]); attr->data_remain = v; attr->hex_pos = 0; } attr->state = 0 != attr->data_remain ? CHUNK_STATE_DATA : CHUNK_STATE_TRAILER_START; break; } } break; case CHUNK_STATE_TRAILER_START: /* We've reached another trailer line */ if (len < 1) break; if ('\r' == src[0]) { /* * This allows more than one CR but we must consume * some data or keep state over this otherwise. */ src++; len--; } if (len < 1) break; if ('\n' == src[0]) { /* An empty line means the end of all trailers was reached */ src++; len--; attr->state = CHUNK_STATE_END; break; } attr->state = CHUNK_STATE_TRAILER; /* FALL THROUGH */ case CHUNK_STATE_TRAILER: /* Just skip over the trailer line */ while (len > 0) { len--; if ('\n' == *src++) { /* * Now check whether there's another trailer * line or whether we've reached the end */ attr->state = CHUNK_STATE_TRAILER_START; break; } } break; case CHUNK_STATE_END: /* * We're not supposed to receive data after the chunk stream * has been ended. But if we do, it means either we * misinterpreted the chunk end stream or the other end is just * going berserk. */ error_str = "Remaining data after chunk end"; goto error; case CHUNK_STATE_DATA: case CHUNK_STATE_ERROR: case NUM_CHUNK_STATES: g_assert_not_reached(); break; } /* NB: Some data from ``src'' must have been consumed or an * infinite loop may occur. */ if (CHUNK_STATE_DATA == attr->state) { if (GNET_PROPERTY(rx_debug) > 9) g_debug("parse_chunk: chunk size %s bytes", uint64_to_string(attr->data_remain)); break; } } while (len > 0 && CHUNK_STATE_END != attr->state); if (p_error_str) *p_error_str = NULL; return size - len; error: if (p_error_str) *p_error_str = error_str; attr->state = CHUNK_STATE_ERROR; return 0; }