static ssize_t
_mongoc_stream_gridfs_writev (mongoc_stream_t *stream,
                              mongoc_iovec_t *iov,
                              size_t iovcnt,
                              int32_t timeout_msec)
{
   mongoc_stream_gridfs_t *file = (mongoc_stream_gridfs_t *) stream;
   ssize_t ret = 0;

   ENTRY;

   BSON_ASSERT (stream);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   /* timeout_msec is unused by mongoc_gridfs_file_writev */
   ret = mongoc_gridfs_file_writev (file->file, iov, iovcnt, 0);

   if (!ret) {
      RETURN (ret);
   }

   mongoc_counter_streams_egress_add (ret);

   RETURN (ret);
}
static ssize_t
_mongoc_stream_gridfs_writev (mongoc_stream_t *stream,
                              struct iovec    *iov,
                              size_t           iovcnt,
                              bson_int32_t     timeout_msec)
{
   mongoc_stream_gridfs_t *file = (mongoc_stream_gridfs_t *)stream;
   ssize_t ret = 0;

   ENTRY;

   BSON_ASSERT (stream);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   ret = mongoc_gridfs_file_writev (file->file, iov, iovcnt, timeout_msec);

   if (!ret) {
      RETURN (ret);
   }

   mongoc_counter_streams_egress_add (ret);

   RETURN (ret);
}
static ssize_t
_mongoc_stream_tls_writev (mongoc_stream_t *stream,
                           mongoc_iovec_t  *iov,
                           size_t           iovcnt,
                           int32_t          timeout_msec)
{
   mongoc_stream_tls_t *tls = (mongoc_stream_tls_t *)stream;
   char buf[MONGOC_STREAM_TLS_BUFFER_SIZE];

   ssize_t ret = 0;
   ssize_t child_ret;
   size_t i;
   size_t iov_pos = 0;

   /* There's a bit of a dance to coalesce vectorized writes into
    * MONGOC_STREAM_TLS_BUFFER_SIZE'd writes to avoid lots of small tls
    * packets.
    *
    * The basic idea is that we want to combine writes in the buffer if they're
    * smaller than the buffer, flushing as it gets full.  For larger writes, or
    * the last write in the iovec array, we want to ignore the buffer and just
    * write immediately.  We take care of doing buffer writes by re-invoking
    * ourself with a single iovec_t, pointing at our stack buffer.
    */
   char *buf_head = buf;
   char *buf_tail = buf;
   char *buf_end = buf + MONGOC_STREAM_TLS_BUFFER_SIZE;
   size_t bytes;

   char *to_write = NULL;
   size_t to_write_len;

   BSON_ASSERT (tls);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   tls->timeout_msec = timeout_msec;

   for (i = 0; i < iovcnt; i++) {
      iov_pos = 0;

      while (iov_pos < iov[i].iov_len) {
         if (buf_head != buf_tail ||
             ((i + 1 < iovcnt) &&
              ((buf_end - buf_tail) > (iov[i].iov_len - iov_pos)))) {
            /* If we have either of:
             *   - buffered bytes already
             *   - another iovec to send after this one and we don't have more
             *     bytes to send than the size of the buffer.
             *
             * copy into the buffer */

            bytes = BSON_MIN (iov[i].iov_len - iov_pos, buf_end - buf_tail);

            memcpy (buf_tail, iov[i].iov_base + iov_pos, bytes);
            buf_tail += bytes;
            iov_pos += bytes;

            if (buf_tail == buf_end) {
               /* If we're full, request send */

               to_write = buf_head;
               to_write_len = buf_tail - buf_head;

               buf_tail = buf_head = buf;
            }
         } else {
            /* Didn't buffer, so just write it through */

            to_write = (char *)iov[i].iov_base + iov_pos;
            to_write_len = iov[i].iov_len - iov_pos;

            iov_pos += to_write_len;
         }

         if (to_write) {
            /* We get here if we buffered some bytes and filled the buffer, or
             * if we didn't buffer and have to send out of the iovec */

            child_ret = _mongoc_stream_tls_write (tls, to_write, to_write_len);

            if (child_ret < 0) {
               /* Buffer write failed, just return the error */
               return child_ret;
            }

            ret += child_ret;

            if (child_ret < to_write_len) {
               /* we timed out, so send back what we could send */

               return ret;
            }

            to_write = NULL;
         }
      }
   }

   if (buf_head != buf_tail) {
      /* If we have any bytes buffered, send */

      child_ret = _mongoc_stream_tls_write (tls, buf_head, buf_tail - buf_head);

      if (child_ret < 0) {
         return child_ret;
      }

      ret += child_ret;
   }

   if (ret >= 0) {
      mongoc_counter_streams_egress_add (ret);
   }

   return ret;
}
static ssize_t
_mongoc_stream_tls_writev (mongoc_stream_t *stream,
                           mongoc_iovec_t  *iov,
                           size_t           iovcnt,
                           int32_t          timeout_msec)
{
   mongoc_stream_tls_t *tls = (mongoc_stream_tls_t *)stream;
   ssize_t ret = 0;
   size_t i;
   size_t iov_pos = 0;
   int write_ret;

   int64_t now;
   int64_t expire = 0;

   BSON_ASSERT (tls);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   tls->timeout = timeout_msec;

   if (timeout_msec >= 0) {
      expire = bson_get_monotonic_time () + (timeout_msec * 1000UL);
   }

   for (i = 0; i < iovcnt; i++) {
      iov_pos = 0;

      while (iov_pos < iov[i].iov_len) {
         write_ret = BIO_write (tls->bio, (char *)iov[i].iov_base + iov_pos,
                              (int)(iov[i].iov_len - iov_pos));

         if (write_ret < 0) {
            return write_ret;
         }

         if (expire) {
            now = bson_get_monotonic_time ();

            if ((expire - now) < 0) {
               if (write_ret == 0) {
                  mongoc_counter_streams_timeout_inc();
#ifdef _WIN32
                  errno = WSAETIMEDOUT;
#else
                  errno = ETIMEDOUT;
#endif
                  return -1;
               }

               tls->timeout = 0;
            } else {
               tls->timeout = expire - now;
            }
         }

         ret += write_ret;
         iov_pos += write_ret;
      }
   }

   if (ret >= 0) {
      mongoc_counter_streams_egress_add(ret);
   }

   return ret;
}
static ssize_t
_mongoc_socket_try_sendv_slow (mongoc_socket_t *sock, /* IN */
                               mongoc_iovec_t *iov,   /* IN */
                               size_t iovcnt)         /* IN */
{
   ssize_t ret = 0;
   size_t i;
   ssize_t wrote;

   ENTRY;

   BSON_ASSERT (sock);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   for (i = 0; i < iovcnt; i++) {
      wrote = send (sock->sd, iov[i].iov_base, iov[i].iov_len, 0);
#ifdef _WIN32
      if (wrote == SOCKET_ERROR) {
#else
      if (wrote == -1) {
#endif
         _mongoc_socket_capture_errno (sock);

         if (!_mongoc_socket_errno_is_again (sock)) {
            RETURN (-1);
         }
         RETURN (ret ? ret : -1);
      }

      ret += wrote;

      if (wrote != iov[i].iov_len) {
         RETURN (ret);
      }
   }

   RETURN (ret);
}


/*
 *--------------------------------------------------------------------------
 *
 * _mongoc_socket_try_sendv --
 *
 *       Helper used by mongoc_socket_sendv() to try to write as many
 *       bytes to the underlying socket until the socket buffer is full.
 *
 *       This is performed in a non-blocking fashion.
 *
 * Returns:
 *       -1 on failure. the number of bytes written on success.
 *
 * Side effects:
 *       None.
 *
 *--------------------------------------------------------------------------
 */

static ssize_t
_mongoc_socket_try_sendv (mongoc_socket_t *sock, /* IN */
                          mongoc_iovec_t *iov,   /* IN */
                          size_t iovcnt)         /* IN */
{
#ifdef _WIN32
   DWORD dwNumberofBytesSent = 0;
   int ret;
#else
   struct msghdr msg;
   ssize_t ret;
#endif

   ENTRY;

   BSON_ASSERT (sock);
   BSON_ASSERT (iov);
   BSON_ASSERT (iovcnt);

   DUMP_IOVEC (sendbuf, iov, iovcnt);

#ifdef _WIN32
   ret = WSASend (
      sock->sd, (LPWSABUF) iov, iovcnt, &dwNumberofBytesSent, 0, NULL, NULL);
   TRACE ("WSASend sent: %ld (out of: %ld), ret: %d",
          dwNumberofBytesSent,
          iov->iov_len,
          ret);
#else
   memset (&msg, 0, sizeof msg);
   msg.msg_iov = iov;
   msg.msg_iovlen = (int) iovcnt;
   ret = sendmsg (sock->sd,
                  &msg,
#ifdef MSG_NOSIGNAL
                  MSG_NOSIGNAL);
#else
                  0);
#endif
   TRACE ("Send %ld out of %ld bytes", ret, iov->iov_len);
#endif


#ifdef _WIN32
   if (ret == SOCKET_ERROR) {
#else
   if (ret == -1) {
#endif
      _mongoc_socket_capture_errno (sock);

/*
 * Check to see if we have sent an iovec too large for sendmsg to
 * complete. If so, we need to fallback to the slow path of multiple
 * send() commands.
 */
#ifdef _WIN32
      if (mongoc_socket_errno (sock) == WSAEMSGSIZE) {
#else
      if (mongoc_socket_errno (sock) == EMSGSIZE) {
#endif
         RETURN (_mongoc_socket_try_sendv_slow (sock, iov, iovcnt));
      }

      RETURN (-1);
   }

#ifdef _WIN32
   RETURN (dwNumberofBytesSent);
#else
   RETURN (ret);
#endif
}


/*
 *--------------------------------------------------------------------------
 *
 * mongoc_socket_sendv --
 *
 *       A wrapper around using sendmsg() to send an iovec.
 *       This also deals with the structure differences between
 *       WSABUF and struct iovec.
 *
 *       @expire_at is 0 for no blocking, -1 for infinite blocking,
 *       or a time using the monotonic clock to expire. Calculate this
 *       using bson_get_monotonic_time() + N_MICROSECONDS.
 *
 * Returns:
 *       -1 on failure.
 *       the number of bytes written on success.
 *
 * Side effects:
 *       None.
 *
 *--------------------------------------------------------------------------
 */

ssize_t
mongoc_socket_sendv (mongoc_socket_t *sock,  /* IN */
                     mongoc_iovec_t *in_iov, /* IN */
                     size_t iovcnt,          /* IN */
                     int64_t expire_at)      /* IN */
{
   ssize_t ret = 0;
   ssize_t sent;
   size_t cur = 0;
   mongoc_iovec_t *iov;

   ENTRY;

   BSON_ASSERT (sock);
   BSON_ASSERT (in_iov);
   BSON_ASSERT (iovcnt);

   iov = bson_malloc (sizeof (*iov) * iovcnt);
   memcpy (iov, in_iov, sizeof (*iov) * iovcnt);

   for (;;) {
      sent = _mongoc_socket_try_sendv (sock, &iov[cur], iovcnt - cur);
      TRACE (
         "Sent %ld (of %ld) out of iovcnt=%ld", sent, iov[cur].iov_len, iovcnt);

      /*
       * If we failed with anything other than EAGAIN or EWOULDBLOCK,
       * we should fail immediately as there is another issue with the
       * underlying socket.
       */
      if (sent == -1) {
         if (!_mongoc_socket_errno_is_again (sock)) {
            ret = -1;
            GOTO (CLEANUP);
         }
      }

      /*
       * Update internal stream counters.
       */
      if (sent > 0) {
         ret += sent;
         mongoc_counter_streams_egress_add (sent);

         /*
          * Subtract the sent amount from what we still need to send.
          */
         while ((cur < iovcnt) && (sent >= (ssize_t) iov[cur].iov_len)) {
            TRACE ("still got bytes left: sent -= iov_len: %ld -= %ld",
                   sent,
                   iov[cur].iov_len);
            sent -= iov[cur++].iov_len;
         }

         /*
          * Check if that made us finish all of the iovecs. If so, we are done
          * sending data over the socket.
          */
         if (cur == iovcnt) {
            TRACE ("%s", "Finished the iovecs");
            break;
         }

         /*
          * Increment the current iovec buffer to its proper offset and adjust
          * the number of bytes to write.
          */
         TRACE ("Seeked io_base+%ld", sent);
         TRACE (
            "Subtracting iov_len -= sent; %ld -= %ld", iov[cur].iov_len, sent);
         iov[cur].iov_base = ((char *) iov[cur].iov_base) + sent;
         iov[cur].iov_len -= sent;
         TRACE ("iov_len remaining %ld", iov[cur].iov_len);

         BSON_ASSERT (iovcnt - cur);
         BSON_ASSERT (iov[cur].iov_len);
      } else if (OPERATION_EXPIRED (expire_at)) {
         if (expire_at > 0) {
            mongoc_counter_streams_timeout_inc ();
         }
         GOTO (CLEANUP);
      }

      /*
       * Block on poll() until our desired condition is met.
       */
      if (!_mongoc_socket_wait (sock, POLLOUT, expire_at)) {
         GOTO (CLEANUP);
      }
   }

CLEANUP:
   bson_free (iov);

   RETURN (ret);
}


int
mongoc_socket_getsockname (mongoc_socket_t *sock,     /* IN */
                           struct sockaddr *addr,     /* OUT */
                           mongoc_socklen_t *addrlen) /* INOUT */
{
   int ret;

   ENTRY;

   BSON_ASSERT (sock);

   ret = getsockname (sock->sd, addr, addrlen);

   _mongoc_socket_capture_errno (sock);

   RETURN (ret);
}