/*
 *===========================================================================
 *                    ipnet_create_reassembled_packet
 *===========================================================================
 * Description: Reassembles a list of fragments to the orginal packet and
 *              creates a new packet of it.
 * Parameters:  fragment_list - The first fragment in the list.
 *              ip_hdr_size - The size of the IP header.
 *              frag_hdr_size - The size of the header containing the fragmentation id.
 *              update_ip_header - A function that can finish the IP header
 *              of the reassembled packet.
 *              reassemble_err_func - Handler for reassembly error.
 * Returns:     The reassembled packet or IP_NULL if a packet big enough
 *              to hold the whole packet.
 */
IP_GLOBAL Ipcom_pkt *
ipnet_create_reassembled_packet(Ipcom_pkt *fragment_list,
                                int ip_hdr_size,
                                int frag_hdr_size,
                                Ipnet_update_ip_header_func update_ip_header,
                                Ipnet_reassemble_err_func report_reassemble_err)
{
    Ipcom_pkt *pkt = IP_NULL;
    Ipcom_pkt *pkt_it;
    int        total_size = 0;
    int        mtu;

    for (pkt_it = fragment_list; pkt_it != IP_NULL; pkt_it = pkt_it->next_fragment)
    {
        pkt_it->start += frag_hdr_size;
        total_size += pkt_it->end - pkt_it->start;
        ip_assert(pkt_it->ipstart <= pkt_it->start);
        ip_assert(pkt_it->start <= pkt_it->end);
    }

    if (total_size > 65535)
    {
        IPCOM_WV_EVENT_2 (IPCOM_WV_NETD_IP4_DATAPATH_EVENT, IPCOM_WV_NETD_WARNING,
                          1, 4, IPCOM_WV_NETDEVENT_WARNING, IPCOM_WV_NETD_RECV,
                          ipnet_create_reassembled_packet, IPCOM_WV_NETD_BADLEN,
                          IPCOM_WV_IPNET_FRAG_MODULE, IPCOM_WV_NETD_IP_REASSEMBLE);
        IPNET_STATS(pkt_reasm_too_big++);
        if (report_reassemble_err)
            report_reassemble_err(fragment_list);
        goto done;
    }

    total_size += ip_hdr_size;
    mtu = total_size;

    pkt = ipcom_pkt_malloc(mtu,
                           IP_FLAG_FC_STACKCONTEXT);
    if (pkt == IP_NULL || pkt->maxlen < total_size)
    {
        IPCOM_WV_EVENT_2 (IPCOM_WV_NETD_IP4_DATAPATH_EVENT, IPCOM_WV_NETD_CRITICAL,
                          1, 5, IPCOM_WV_NETDEVENT_CRITICAL, IPCOM_WV_NETD_RECV,
                          ipnet_create_reassembled_packet, IPCOM_WV_NETD_NOBUFS,
                          IPCOM_WV_IPNET_FRAG_MODULE, IPCOM_WV_NETD_IP_REASSEMBLE);
        IPNET_STATS(pkt_reasm_nomem++);
        if (pkt != IP_NULL)
        {
            ipcom_pkt_free(pkt);
            pkt = IP_NULL;
        }
        goto done;
    }

    pkt->start = fragment_list->ipstart;
    if (pkt->start + total_size > pkt->maxlen)
        pkt->start  = (pkt->maxlen - total_size) & ~0x3;
    pkt->ipstart    = pkt->start;
    pkt->end        = pkt->start + ip_hdr_size;
    pkt->ifindex    = fragment_list->ifindex;
    pkt->vr_index   = fragment_list->vr_index;
    pkt->flags      = fragment_list->flags;
    pkt->flags &= ~(IPCOM_PKT_FLAG_FRAGMENT
                    | IPCOM_PKT_FLAG_MORE_FRAG
                    | IPCOM_PKT_FLAG_HW_CHECKSUM
                    | IPCOM_PKT_FLAG_TL_CHECKSUM);

    /* Copy the IP header */
    ipcom_memcpy(&pkt->data[pkt->start],
                 &fragment_list->data[fragment_list->ipstart],
                 (Ip_size_t)ip_hdr_size);

    /* Copy the rest of the packet */
    for (pkt_it = fragment_list; pkt_it != IP_NULL; pkt_it = pkt_it->next_fragment)
    {
        ipcom_memcpy(&pkt->data[pkt->end],
                     &pkt_it->data[pkt_it->start],
                     (Ip_size_t)(pkt_it->end - pkt_it->start));
        pkt->end += pkt_it->end - pkt_it->start;
        ip_assert(pkt->end <= pkt->maxlen);
    }

    update_ip_header(pkt, fragment_list);

done:
    ipcom_pkt_free(fragment_list);
    return pkt;
}
示例#2
0
/*
 *===========================================================================
 *                   ipnet_usr_sock_tcp_pkts_from_iov
 *===========================================================================
 * Description: Copies the user data into MSS sized packets.
 * Parameters:  sock - The socket that will be use to send.
 *              msg - 'msg_iov' contains the buffer(s) to send.
 *              flags - IP_MSG_xxx flags.
 *              offset - Number of bytes into the msg->msg_iov buffers
 *                      where the copy into packets should start.
 *              total_buf_len - the sum of the length of all
 *                      msg->msg_iov buffers in 'msg'.
 *              ppkt -  Will point to the created packet(s) if the
 *                      operation is successfil. The (*ppkt)->next
 *                      points to the next packet in case of more than
 *                      one packet was allocated.
 * Returns:     <0 = error code (-IPNET_ERRNO_xxx)
 *               0 = this operation released the socket lock, the
 *                      sendmsg operation must be restarted
 *              >0 = number of bytes that has been allocated in the
 *                      socket send buffer and copied into the
 *                      packet(s) pointed to by *ppkt.
 */
IP_STATIC int
ipnet_usr_sock_tcp_pkts_from_iov(Ipnet_socket *sock,
                                 IP_CONST struct Ip_msghdr *msg,
                                 int flags,
                                 int offset,
                                 int total_buf_len,
                                 Ipcom_pkt **ppkt)
{
    Ipcom_pkt *pkt_head;
    Ipcom_pkt *pkt_tail;
    Iptcp_tcb *tcb = sock->tcb;
    int        mss = tcb->mss;
    Ip_u8     *buf;
    int        buf_len;
    int        i = 0;
    int        len = 0;
    int        bytes_to_copy = total_buf_len - offset;
    Ip_bool    non_blocking;
    Ip_bool    urgent = IP_BIT_ISSET(flags, IP_MSG_OOB);

    *ppkt = IP_NULL;

    /*
     * If offset == total_buf_len, which can in particular happen
     * if both are zero, then the loop immediately below can
     * access beyond the end of the iovec array. Avoid this.
     */
    if (offset == total_buf_len)
        return 0;

    for (buf_len = 0; buf_len <= 0; offset = - buf_len)
    {
        struct Ip_iovec *iov = &msg->msg_iov[i++];

        buf = (Ip_u8 *) iov->iov_base + offset;
        buf_len = (int) iov->iov_len - offset;
    }

    if (IP_LIKELY(urgent == IP_FALSE))
    {
        iptcp_partial_lock(tcb->send.partial_lock);
        if (tcb->send.partial != IP_NULL)
        {
            len = ipnet_sock_tcp_append_send_data(tcb->send.partial,
                                                  &buf,
                                                  &buf_len,
                                                  mss);
            if (len > 0)
                ipcom_atomic_add(&sock->snd_bytes, len);
        }
        iptcp_partial_unlock(tcb->send.partial_lock);

        if (len == bytes_to_copy)
            /* All data that should be sent was queued to the end of the
               partial finished TCP segment */
            return len;
    }
    /* else: urgent packet must be sent in a separate signal since the
       flag field must have IP_MSG_OOB set for correct processing. */

    non_blocking = IP_BIT_ISSET(sock->flags, IPNET_SOCKET_FLAG_NONBLOCKING)
        || IP_BIT_ISSET(flags, IP_MSG_DONTWAIT);

    bytes_to_copy = 0;
    do
    {
        int bytes_to_alloc = IP_MIN(mss, buf_len - bytes_to_copy);

        if (ipcom_atomic_add_and_return(&sock->snd_bytes, bytes_to_alloc)
            < sock->send_max_bytes + bytes_to_alloc)
        {
            bytes_to_copy += bytes_to_alloc;
        }
        else
        {
            int lowat;

            ipcom_atomic_sub(&sock->snd_bytes, bytes_to_alloc);

            if (bytes_to_copy > 0 || non_blocking)
                break;

            lowat = IP_MAX(mss, (int) sock->tcb->send.lowat);
            lowat = IP_MIN(lowat, sock->send_max_bytes);
            (void)ipnet_usr_sock_wait_until_writable(sock,
                                                     lowat,
                                                     IP_NULL);
            if (sock->ipcom.so_errno)
            {
                ipcom_atomic_sub(&sock->snd_bytes, bytes_to_copy);
                return len > 0 && sock->ipcom.so_errno != IP_ERRNO_EINTR ? len : -sock->ipcom.so_errno;
            }
        }
    } while (bytes_to_copy < buf_len);

    pkt_head = IP_NULL;
    pkt_tail = IP_NULL;
    while (bytes_to_copy > 0)
    {
        Ipcom_pkt *pkt;
        int        seg_len = IP_MIN(bytes_to_copy, mss);

        pkt = ipcom_pkt_malloc(mss + sock->max_hdrspace,
                               IP_FLAG_FC_STACKCONTEXT | (non_blocking ? 0 : IP_FLAG_FC_BLOCKOK));
        if (IP_UNLIKELY(pkt == IP_NULL))
            break;

        pkt->start = (pkt->maxlen - mss) & ~0x3;
        pkt->end   = pkt->start + seg_len;

        pkt->chk = ipnet_in_checksum_memcpy(&pkt->data[pkt->start],
                                            buf,
                                            seg_len);

        /* Add this packet to the list of packets that will be sent
           to ipnetd */
        if (pkt_head == IP_NULL)
            pkt_head = pkt;

        else
            pkt_tail->next = pkt;
        pkt_tail = pkt;

        len += seg_len;
        buf += seg_len;
        bytes_to_copy -= seg_len;
    }

    if (IP_LIKELY(pkt_tail != IP_NULL))
    {
        if (IP_UNLIKELY(urgent))
            IPNET_MARK_PKT_AS_URGENT(pkt_tail);

        iptcp_partial_lock(tcb->send.partial_lock);
        tcb->send.partial = pkt_tail;
        iptcp_partial_unlock(tcb->send.partial_lock);
    }

    *ppkt = pkt_head;
    return (len == 0 ? -IP_ERRNO_EWOULDBLOCK : len);
}
/*
 *===========================================================================
 *                    ipnet_fragment_packet
 *===========================================================================
 * Description: Splits a packet that is to big to fit into the MTU of the
 *              network interface.
 * Parameters:  pkt - The packet to split. pkt->start is the offset to the
 *              extension header (or transport layer protocol) with the
 *              id of 'next_hdr'. This packet is released by this function.
 *              ip_hdr_size - The size of the IP header.
 *              other_hdr_size - The size of other headers that will be
 *              added due to fragmentation.
 *              mtu - The MTU of the network interface.
 *              has_more_fragments - function that returns IP_TRUE if the
 *              packet passed as an argument has more fragments. This
 *              parameter might be IP_NULL.
 * Returns:     A list of fragments or IP_NULL if not enough memory could
 *              be allocated to create all fragments.
 */
IP_GLOBAL Ipcom_pkt *
ipnet_fragment_packet(Ipcom_pkt *pkt,
                      int ip_hdr_size,
                      int other_hdr_size,
                      int mtu,
                      Ipnet_more_fragments_func more_fragments)
{
    Ipcom_pkt *frag_pkt_head;
    Ipcom_pkt *frag_pkt_tail;
    Ipcom_pkt *frag_pkt;
    int        frag_size;
    int        total_size;
    int        size_of_this_fragment;
    int        frag_offset;
    Ipnet_pkt_info_sock_snd_buf *ssb;

    total_size = pkt->end - pkt->start;
    frag_size = (mtu - ip_hdr_size - other_hdr_size) & 0xfff8;
    frag_pkt_head = IP_NULL;
    frag_pkt_tail = IP_NULL;
    frag_offset = 0;

    while (frag_offset < total_size)
    {
        if (total_size - frag_offset > frag_size)
            size_of_this_fragment = frag_size;
        else
            size_of_this_fragment = total_size - frag_offset;

        frag_pkt = ipcom_pkt_malloc(mtu, IP_FLAG_FC_STACKCONTEXT);
        if (frag_pkt == IP_NULL)
        {
            IPCOM_WV_MARKER_1 ( IPCOM_WV_NETD_IP4_DATAPATH_EVENT, IPCOM_WV_NETD_CRITICAL, 1, 1, IPCOM_WV_NETD_NOBUFS,
                                ipnet_fragment_packet, IPCOM_WV_IPNET_FRAG_MODULE, IPCOM_WV_NETD_IP_REASSEMBLE);
            IPNET_STATS(pkt_frag_nomem++);
            ipcom_pkt_free(pkt);
            if (frag_pkt_head != IP_NULL)
                ipcom_pkt_free(frag_pkt_head);
            return IP_NULL;
        }

#ifdef IPCOM_USE_MIB2
        if(other_hdr_size == 0)
        {
#ifdef IPCOM_USE_INET
            Ipnet_netif *netif = ipnet_if_indextonetif(pkt->vr_index, pkt->ifindex);

            IPCOM_MIB2(ipFragCreates++);
            if (netif != IP_NULL)
            {
                IPCOM_MIB2_SYSWI_U32_ADD(v4, ipSystemStatsOutFragCreates, 1);
                IPCOM_MIB2_PERIF_U32_ADD(v4, ipIfStatsOutFragCreates, 1, netif);
            }
#endif
        }
        else
        {
#ifdef IPCOM_USE_INET6
            Ipnet_netif *netif = ipnet_if_indextonetif(pkt->vr_index, pkt->ifindex);

            IPCOM_MIB2_NETIF(ipv6IfStatsOutFragCreates++);
            if (netif != IP_NULL)
            {
                IPCOM_MIB2_SYSWI_U32_ADD(v6, ipSystemStatsOutFragCreates, 1);
                IPCOM_MIB2_PERIF_U32_ADD(v6, ipIfStatsOutFragCreates, 1, netif);
            }
#endif
        }
#endif /* IPCOM_USE_MIB2 */

        if (frag_pkt_head == IP_NULL)
            frag_pkt_head = frag_pkt;
        else
            frag_pkt_tail->next_fragment = frag_pkt;
        frag_pkt_tail = frag_pkt;

        IPCOM_PKT_TRACE(frag_pkt, IPCOM_PKT_ID_FRAG);
        frag_pkt->fd       = pkt->fd;
        frag_pkt->ifindex  = pkt->ifindex;
        frag_pkt->flags    = pkt->flags;
        frag_pkt->vr_index = pkt->vr_index;
        /* Copy IP packet payload */
        frag_pkt->start      = (frag_pkt->maxlen - size_of_this_fragment) & ~0x3;
        frag_pkt->end        = frag_pkt->start + size_of_this_fragment;
        ip_assert(frag_pkt->start < frag_pkt->maxlen);
        ipcom_memcpy(&frag_pkt->data[frag_pkt->start],
                     &pkt->data[pkt->start + frag_offset],
                     (Ip_size_t)size_of_this_fragment);

        frag_pkt->offset     = (Ip_u16) (frag_offset + pkt->offset);
        frag_offset += size_of_this_fragment;
        IP_BIT_SET(frag_pkt->flags, IPCOM_PKT_FLAG_MORE_FRAG);
    }

    if (IP_BIT_ISFALSE(pkt->flags, IPCOM_PKT_FLAG_FRAGMENT)
        || (more_fragments && IP_FALSE == more_fragments(pkt)))
        /* This is either an IP datagram sourced from this node or the
           last fragment of an IP datagram that has been forwarded by
           this node. */
        IP_BIT_CLR(frag_pkt_tail->flags, IPCOM_PKT_FLAG_MORE_FRAG);

    ip_assert(pkt->start + frag_offset == pkt->end);
    ip_assert(frag_pkt_head != IP_NULL);
    if (IP_NULL != (ssb = ipcom_pkt_get_info(pkt, IPNET_PKT_INFO_SOCK_SND_BUF)))
    {
        /* Transfer the socket send buffer allocation from the
           original packet to the last fragment to make sure that the
           send flow control is still driven by TX done events. */
        ipcom_pkt_set_info(frag_pkt_tail,
                           IPNET_PKT_INFO_SOCK_SND_BUF,
                           sizeof(Ipnet_pkt_info_sock_snd_buf),
                           ssb);
        frag_pkt_tail->dtor = pkt->dtor;
        pkt->dtor = IP_NULL;
    }

    /* Do not free original yet, since it might hold the
       callers signal buffer. Put it in the fragment tails
       next original instead and it will be freed when the
       last segment has been transmitted */
    if (frag_pkt_tail != IP_NULL)
        frag_pkt_tail->next_original = pkt;
    else
        ipcom_pkt_free(pkt);

    return frag_pkt_head;
}