Пример #1
0
/*
 *---------------------------------------------------------------------------
 *
 * vmxnet3_tx_prepare_offload --
 *
 *    Build the offload context of a msg.
 *
 * Results:
 *    0 if everything went well.
 *    +n if n bytes need to be pulled up.
 *    -1 in case of error (not used).
 *
 * Side effects:
 *    None.
 *
 *---------------------------------------------------------------------------
 */
static int
vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp,
                           vmxnet3_offload_t *ol,
                           mblk_t *mp)
{
   int ret = 0;
   uint32_t start, stuff, value, flags, lso_flag, mss;

   ol->om = VMXNET3_OM_NONE;
   ol->hlen = 0;
   ol->msscof = 0;

   hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
   mac_lso_get(mp, &mss, &lso_flag);

   if (flags || lso_flag) {
      struct ether_vlan_header *eth = (void *) mp->b_rptr;
      uint8_t ethLen;

      if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
         ethLen = sizeof(struct ether_vlan_header);
      } else {
         ethLen = sizeof(struct ether_header);
      }

      VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, stuff=%u, value=%u\n",
                            flags,      ethLen,    start,    stuff,    value);

      if (lso_flag & HW_LSO) {
         mblk_t *mblk = mp;
         uint8_t *ip, *tcp;
         uint8_t ipLen, tcpLen;

         /*
          * Copy e1000g's behavior:
          * - Do not assume all the headers are in the same mblk.
          * - Assume each header is always within one mblk.
          * - Assume the ethernet header is in the first mblk.
          */
         ip = mblk->b_rptr + ethLen;
         if (ip >= mblk->b_wptr) {
            mblk = mblk->b_cont;
            ip = mblk->b_rptr;
         }
         ipLen = IPH_HDR_LENGTH((ipha_t *) ip);
         tcp = ip + ipLen;
         if (tcp >= mblk->b_wptr) {
            mblk = mblk->b_cont;
            tcp = mblk->b_rptr;
         }
         tcpLen = TCP_HDR_LENGTH((tcph_t *) tcp);
         if (tcp + tcpLen > mblk->b_wptr) { // careful, '>' instead of '>=' here
            mblk = mblk->b_cont;
         }

         ol->om = VMXNET3_OM_TSO;
         ol->hlen = ethLen + ipLen + tcpLen;
         ol->msscof = mss;

         if (mblk != mp) {
            ret = ol->hlen;
         }
      } else if (flags & HCK_PARTIALCKSUM) {
         ol->om = VMXNET3_OM_CSUM;
         ol->hlen = start + ethLen;
         ol->msscof = stuff + ethLen;
      }

   }

   return ret;
}
Пример #2
0
static enum proto_parse_status tcp_parse(struct parser *parser, struct proto_info *parent, unsigned way, uint8_t const *packet, size_t cap_len, size_t wire_len, struct timeval const *now, size_t tot_cap_len, uint8_t const *tot_packet)
{
    struct mux_parser *mux_parser = DOWNCAST(parser, parser, mux_parser);
    struct tcp_hdr const *tcphdr = (struct tcp_hdr *)packet;

    // Sanity checks
    if (wire_len < sizeof(*tcphdr)) {
        SLOG(LOG_DEBUG, "Bogus TCP packet: too short (%zu < %zu)", wire_len, sizeof(*tcphdr));
        return PROTO_PARSE_ERR;
    }

    if (cap_len < sizeof(*tcphdr)) return PROTO_TOO_SHORT;

    size_t tcphdr_len = TCP_HDR_LENGTH(tcphdr);

    if (tcphdr_len < sizeof(*tcphdr)) {
        SLOG(LOG_DEBUG, "Bogus TCP packet: header size too smal (%zu < %zu)", tcphdr_len, sizeof(*tcphdr));
        return PROTO_PARSE_ERR;
    }

    if (tcphdr_len > wire_len) {
        SLOG(LOG_DEBUG, "Bogus TCP packet: wrong length %zu > %zu", tcphdr_len, wire_len);
        return PROTO_PARSE_ERR;
    }

    if (tcphdr_len > cap_len) return PROTO_TOO_SHORT;

    // TODO: move this below call to tcp_proto_info_ctor() and use info instead of reading tcphdr directly
    uint16_t const sport = READ_U16N(&tcphdr->src);
    uint16_t const dport = READ_U16N(&tcphdr->dst);
    bool const syn = !!(READ_U8(&tcphdr->flags) & TCP_SYN_MASK);
    bool const fin = !!(READ_U8(&tcphdr->flags) & TCP_FIN_MASK);
    bool const ack = !!(READ_U8(&tcphdr->flags) & TCP_ACK_MASK);
    bool const rst = !!(READ_U8(&tcphdr->flags) & TCP_RST_MASK);
    bool const urg = !!(READ_U8(&tcphdr->flags) & TCP_URG_MASK);
    bool const psh = !!(READ_U8(&tcphdr->flags) & TCP_PSH_MASK);
    SLOG(LOG_DEBUG, "New TCP packet of %zu bytes (%zu captured), %zu payload, ports %"PRIu16" -> %"PRIu16" Flags: %s%s%s%s%s%s, Seq:%"PRIu32", Ack:%"PRIu32,
        wire_len, cap_len, wire_len - tcphdr_len, sport, dport,
        syn ? "Syn":"", fin ? "Fin":"", ack ? "Ack":"", rst ? "Rst":"", urg ? "Urg":"", psh ? "Psh":"",
        READ_U32N(&tcphdr->seq_num), READ_U32N(&tcphdr->ack_seq));

    // Parse

    struct tcp_proto_info info;
    tcp_proto_info_ctor(&info, parser, parent, tcphdr_len, wire_len - tcphdr_len, sport, dport, tcphdr);

    // Parse TCP options
    uint8_t const *options = (uint8_t *)(tcphdr+1);
    assert(tcphdr_len >= sizeof(*tcphdr));
    for (size_t rem_len = tcphdr_len - sizeof(*tcphdr); rem_len > 0; ) {
        ssize_t const len = parse_next_option(&info, options, rem_len);
        if (len < 0) return PROTO_PARSE_ERR;
        rem_len -= len;
        options += len;
    }

    // Search an already spawned subparser
    struct port_key key;
    port_key_init(&key, sport, dport, way);
    struct mux_subparser *subparser = mux_subparser_lookup(mux_parser, NULL, NULL, &key, now);
    if (subparser) SLOG(LOG_DEBUG, "Found subparser@%p for this cnx, for proto %s", subparser->parser, subparser->parser->proto->name);

    if (! subparser) {
        struct proto *requestor = NULL;
        struct proto *sub_proto = NULL;
        // Use connection tracking first
        ASSIGN_INFO_OPT2(ip, ip6, parent);
        if (! ip) ip = ip6;
        if (ip) sub_proto = cnxtrack_ip_lookup(IPPROTO_TCP, ip->key.addr+0, sport, ip->key.addr+1, dport, now, &requestor);
        if (! sub_proto) { // Then try predefined ports
            sub_proto = port_muxer_find(&tcp_port_muxers, info.key.port[0], info.key.port[1]);
        }
        if (sub_proto) {
            subparser = mux_subparser_and_parser_new(mux_parser, sub_proto, requestor, &key, now);
        } else {
            // Even if we have no child parser to send payload to, we want to submit payload in stream order to our plugins
            subparser = tcp_subparser_new(mux_parser, NULL, NULL, &key, now);
        }
    }

    if (! subparser) goto fallback;

    // Keep track of TCP flags & ISN
    struct tcp_subparser *tcp_sub = DOWNCAST(subparser, mux_subparser, tcp_subparser);
    mutex_lock(tcp_sub->mutex);
    if (
        info.ack &&
        (!IS_SET_FOR_WAY(way, tcp_sub->ack) || seqnum_gt(info.ack_num, tcp_sub->max_acknum[way]))
    ) {
        SET_FOR_WAY(way, tcp_sub->ack);
        tcp_sub->max_acknum[way] = info.ack_num;
    }
    if (info.fin) {
        SET_FOR_WAY(way, tcp_sub->fin);
        tcp_sub->fin_seqnum[way] = info.seq_num + info.info.payload;    // The FIN is acked after the payload
    }
    if (info.syn && !IS_SET_FOR_WAY(way, tcp_sub->syn)) {
        SET_FOR_WAY(way, tcp_sub->syn);
        tcp_sub->isn[way] = info.seq_num;
    }
    if (!IS_SET_FOR_WAY(way, tcp_sub->origin)) {
        SET_FOR_WAY(way, tcp_sub->origin);
        tcp_sub->wl_origin[way] = info.seq_num;
        if (! IS_SET_FOR_WAY(way, tcp_sub->syn)) SLOG(LOG_DEBUG, "Starting a WL while SYN is yet to be received!");
    }

    // Set relative sequence number if we know it
    if (IS_SET_FOR_WAY(way, tcp_sub->syn)) info.rel_seq_num = info.seq_num - tcp_sub->isn[way];

    // Set srv_way
    assert(tcp_sub->srv_set < 3);
    if (tcp_sub->srv_set == 0 || (tcp_sub->srv_set == 1 && info.syn)) {
        if (comes_from_client(info.key.port, info.syn, info.ack)) {
            // this packet comes from the client
            tcp_sub->srv_way = !way;
        } else {
            tcp_sub->srv_way = way;
        }
        tcp_sub->srv_set = info.syn ? 2:1;
    }
    // Now patch it into tcp info
    info.to_srv = tcp_sub->srv_way != way;

    SLOG(LOG_DEBUG, "Subparser@%p state: >ISN:%"PRIu32"%s Fin:%"PRIu32" Ack:%"PRIu32" <ISN:%"PRIu32"%s Fin:%"PRIu32" Ack:%"PRIu32", SrvWay=%u%s",
        subparser->parser,
        IS_SET_FOR_WAY(0, tcp_sub->syn) ? tcp_sub->isn[0] : IS_SET_FOR_WAY(0, tcp_sub->origin) ? tcp_sub->wl_origin[0] : 0,
        IS_SET_FOR_WAY(0, tcp_sub->syn) ? "" : " (approx)",
        IS_SET_FOR_WAY(0, tcp_sub->fin) ? tcp_sub->fin_seqnum[0] : 0,
        IS_SET_FOR_WAY(0, tcp_sub->ack) ? tcp_sub->max_acknum[0] : 0,
        IS_SET_FOR_WAY(1, tcp_sub->syn) ? tcp_sub->isn[1] : IS_SET_FOR_WAY(1, tcp_sub->origin) ? tcp_sub->wl_origin[1] : 0,
        IS_SET_FOR_WAY(1, tcp_sub->syn) ? "" : " (approx)",
        IS_SET_FOR_WAY(1, tcp_sub->fin) ? tcp_sub->fin_seqnum[1] : 0,
        IS_SET_FOR_WAY(1, tcp_sub->ack) ? tcp_sub->max_acknum[1] : 0,
        tcp_sub->srv_way,
        tcp_sub->srv_set == 0 ? " (unset)":
            tcp_sub->srv_set == 1 ? " (unsure)":"(certain)");

    enum proto_parse_status err;
    /* Use the wait_list to parse this packet.
       Notice that we do queue empty packets because subparser (or subscriber) want to receive all packets in order, including empty ones. */

    size_t const packet_len = wire_len - tcphdr_len;
    assert(IS_SET_FOR_WAY(way, tcp_sub->origin));
    unsigned const offset = info.seq_num - tcp_sub->wl_origin[way];
    unsigned const next_offset = offset + packet_len + info.syn + info.fin;
    unsigned const sync_offset = info.ack_num - tcp_sub->wl_origin[!way];  // we must not parse this one before we parsed (or timeouted) this one from wl[!way]
    // FIXME: Here the parser is chosen before we actually parse anything. If later the parser fails we cannot try another one.
    //        Choice of parser should be delayed until we start actual parse.
    bool const do_sync = info.ack && IS_SET_FOR_WAY(!way, tcp_sub->origin);
    err = pkt_wait_list_add(tcp_sub->wl+way, offset, next_offset, do_sync, sync_offset, true, &info.info, way, packet + tcphdr_len, cap_len - tcphdr_len, packet_len, now, tot_cap_len, tot_packet);
    SLOG(LOG_DEBUG, "Waiting list returned %s", proto_parse_status_2_str(err));

    if (err == PROTO_OK) {
        // Try advancing each WL until we are stuck or met an error
        pkt_wait_list_try_both(tcp_sub->wl+!way, &err, now, false);
    }

    bool const term = tcp_subparser_term(tcp_sub);
    mutex_unlock(tcp_sub->mutex);

    if (term || err == PROTO_PARSE_ERR) {
        if (term) {
            SLOG(LOG_DEBUG, "TCP cnx terminated (was %s)", parser_name(subparser->parser));
        } else {
            SLOG(LOG_DEBUG, "No suitable subparser for this payload");
        }
        mux_subparser_deindex(subparser);
    }
    mux_subparser_unref(&subparser);

    if (err == PROTO_OK) return PROTO_OK;

fallback:
    (void)proto_parse(NULL, &info.info, way, packet + tcphdr_len, cap_len - tcphdr_len, wire_len - tcphdr_len, now, tot_cap_len, tot_packet);
    return PROTO_OK;
}
Пример #3
0
/*
 *---------------------------------------------------------------------------
 *
 * vmxnet3_tx_prepare_offload --
 *
 *    Build the offload context of a msg.
 *
 * Results:
 *    0 if everything went well.
 *    +n if n bytes need to be pulled up.
 *    -1 in case of error (not used).
 *
 * Side effects:
 *    None.
 *
 *---------------------------------------------------------------------------
 */
static int
vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp,
                           vmxnet3_offload_t *ol,
                           mblk_t *mp)
{
   int ret = 0;
   uint32_t start, stuff, value, flags;
#if defined(OPEN_SOLARIS) || defined(SOL11)
   uint32_t lso_flag, mss;
#endif

   ol->om = VMXNET3_OM_NONE;
   ol->hlen = 0;
   ol->msscof = 0;

   hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
#if defined(OPEN_SOLARIS) || defined(SOL11)
   mac_lso_get(mp, &mss, &lso_flag);

   if (flags || lso_flag) {
#else
   if (flags) {
#endif
      struct ether_vlan_header *eth = (void *) mp->b_rptr;
      uint8_t ethLen;

      if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
         ethLen = sizeof(struct ether_vlan_header);
      } else {
         ethLen = sizeof(struct ether_header);
      }

      VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, stuff=%u, value=%u\n",
                            flags,      ethLen,    start,    stuff,    value);

#if defined(OPEN_SOLARIS) || defined(SOL11)
      if (lso_flag & HW_LSO) {
#else
      if (flags & HCK_PARTIALCKSUM) {
         ol->om = VMXNET3_OM_CSUM;
         ol->hlen = start + ethLen;
         ol->msscof = stuff + ethLen;
      }
      if (flags & HW_LSO) {
#endif
         mblk_t *mblk = mp;
         uint8_t *ip, *tcp;
         uint8_t ipLen, tcpLen;

         /*
          * Copy e1000g's behavior:
          * - Do not assume all the headers are in the same mblk.
          * - Assume each header is always within one mblk.
          * - Assume the ethernet header is in the first mblk.
          */
         ip = mblk->b_rptr + ethLen;
         if (ip >= mblk->b_wptr) {
            mblk = mblk->b_cont;
            ip = mblk->b_rptr;
         }
         ipLen = IPH_HDR_LENGTH((ipha_t *) ip);
         tcp = ip + ipLen;
         if (tcp >= mblk->b_wptr) {
            mblk = mblk->b_cont;
            tcp = mblk->b_rptr;
         }
         tcpLen = TCP_HDR_LENGTH((tcph_t *) tcp);
         if (tcp + tcpLen > mblk->b_wptr) { // careful, '>' instead of '>=' here
            mblk = mblk->b_cont;
         }

         ol->om = VMXNET3_OM_TSO;
         ol->hlen = ethLen + ipLen + tcpLen;
#if defined(OPEN_SOLARIS) || defined(SOL11)
         ol->msscof = mss;
#else
         /* OpenSolaris fills 'value' with the MSS but Solaris doesn't. */
         ol->msscof = DB_LSOMSS(mp);
#endif
         if (mblk != mp) {
            ret = ol->hlen;
         }
      }
#if defined(OPEN_SOLARIS) || defined(SOL11)
      else if (flags & HCK_PARTIALCKSUM) {
         ol->om = VMXNET3_OM_CSUM;
         ol->hlen = start + ethLen;
         ol->msscof = stuff + ethLen;
      }
#endif
   }

   return ret;
}

/*
 *---------------------------------------------------------------------------
 *
 * vmxnet3_tx_one --
 *
 *    Map a msg into the Tx command ring of a vmxnet3 device.
 *
 * Results:
 *    VMXNET3_TX_OK if everything went well.
 *    VMXNET3_TX_RINGFULL if the ring is nearly full.
 *    VMXNET3_TX_PULLUP if the msg is overfragmented.
 *    VMXNET3_TX_FAILURE if there was a DMA or offload error.
 *
 * Side effects:
 *    The ring is filled if VMXNET3_TX_OK is returned.
 *
 *---------------------------------------------------------------------------
 */
static vmxnet3_txstatus
vmxnet3_tx_one(vmxnet3_softc_t *dp,
               vmxnet3_txqueue_t *txq,
               vmxnet3_offload_t *ol,
               mblk_t *mp,
               boolean_t retry)
{
   int ret = VMXNET3_TX_OK;
   unsigned int frags = 0, totLen = 0;
   vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
   Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
   Vmxnet3_GenericDesc *txDesc;
   uint16_t sopIdx, eopIdx;
   uint8_t sopGen, curGen;
   mblk_t *mblk;

   mutex_enter(&dp->txLock);

   sopIdx = eopIdx = cmdRing->next2fill;
   sopGen = cmdRing->gen;
   curGen = !cmdRing->gen;

   for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
      unsigned int len = MBLKL(mblk);
      ddi_dma_cookie_t cookie;
      uint_t cookieCount;

      if (len) {
         totLen += len;
      } else {
         continue;
      }

      if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
                                   (caddr_t) mblk->b_rptr, len,
                                   DDI_DMA_RDWR | DDI_DMA_STREAMING,
                                   DDI_DMA_DONTWAIT, NULL,
                                   &cookie, &cookieCount) != DDI_DMA_MAPPED) {
         VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
         ret = VMXNET3_TX_FAILURE;
         goto error;
      }

      ASSERT(cookieCount);

      do {
         uint64_t addr = cookie.dmac_laddress;
         size_t len = cookie.dmac_size;

         do {
            uint32_t dw2, dw3;
            size_t chunkLen;

            ASSERT(!txq->metaRing[eopIdx].mp);
            ASSERT(cmdRing->avail - frags);

            if (frags >= cmdRing->size - 1 ||
                (ol->om != VMXNET3_OM_TSO && frags >= VMXNET3_MAX_TXD_PER_PKT)) {

               if (retry) {
                  VMXNET3_DEBUG(dp, 2, "overfragmented, frags=%u ring=%hu om=%hu\n",
                                frags, cmdRing->size, ol->om);
               }
               ddi_dma_unbind_handle(dp->txDmaHandle);
               ret = VMXNET3_TX_PULLUP;
               goto error;
            }
            if (cmdRing->avail - frags <= 1) {
               dp->txMustResched = B_TRUE;
               ddi_dma_unbind_handle(dp->txDmaHandle);
               ret = VMXNET3_TX_RINGFULL;
               goto error;
            }

            if (len > VMXNET3_MAX_TX_BUF_SIZE) {
               chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
            } else {
               chunkLen = len;
            }

            frags++;
            eopIdx = cmdRing->next2fill;

            txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
            ASSERT(txDesc->txd.gen != cmdRing->gen);

            // txd.addr
            txDesc->txd.addr = addr;
            // txd.dw2
            dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ? 0 : chunkLen;
            dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
            txDesc->dword[2] = dw2;
            ASSERT(txDesc->txd.len == len || txDesc->txd.len == 0);
            // txd.dw3
            dw3 = 0;
            txDesc->dword[3] = dw3;

            VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
            curGen = cmdRing->gen;

            addr += chunkLen;
            len -= chunkLen;
         } while (len);

         if (--cookieCount) {
            ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
         }
      } while (cookieCount);

      ddi_dma_unbind_handle(dp->txDmaHandle);
   }

   /* Update the EOP descriptor */
   txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
   txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;

   /* Update the SOP descriptor. Must be done last */
   txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
   if (ol->om == VMXNET3_OM_TSO &&
       txDesc->txd.len != 0 &&
       txDesc->txd.len < ol->hlen) {
      ret = VMXNET3_TX_FAILURE;
      goto error;
   }
   txDesc->txd.om = ol->om;
   txDesc->txd.hlen = ol->hlen;
   txDesc->txd.msscof = ol->msscof;
   membar_producer();
   txDesc->txd.gen = sopGen;

   /* Update the meta ring & metadata */
   txq->metaRing[sopIdx].mp = mp;
   txq->metaRing[eopIdx].sopIdx = sopIdx;
   txq->metaRing[eopIdx].frags = frags;
   cmdRing->avail -= frags;
   if (ol->om == VMXNET3_OM_TSO) {
      txqCtrl->txNumDeferred +=
         (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
   } else {
      txqCtrl->txNumDeferred++;
   }

   VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx);

   goto done;

error:
   /* Reverse the generation bits */
   while (sopIdx != cmdRing->next2fill) {
      VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
      txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
      txDesc->txd.gen = !cmdRing->gen;
   }

done:
   mutex_exit(&dp->txLock);

   return ret;
}

/*
 *---------------------------------------------------------------------------
 *
 * vmxnet3_tx --
 *
 *    Send packets on a vmxnet3 device.
 *
 * Results:
 *    NULL in case of success or failure.
 *    The mps to be retransmitted later if the ring is full.
 *
 * Side effects:
 *    None.
 *
 *---------------------------------------------------------------------------
 */
mblk_t *
vmxnet3_tx(void *data, mblk_t *mps)
{
   vmxnet3_softc_t *dp = data;
   vmxnet3_txqueue_t *txq = &dp->txQueue;
   vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
   Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
   vmxnet3_txstatus status = VMXNET3_TX_OK;
   mblk_t *mp;

   ASSERT(mps != NULL);

   do {
      vmxnet3_offload_t ol;
      int pullup;

      mp = mps;
      mps = mp->b_next;
      mp->b_next = NULL;

      if (DB_TYPE(mp) != M_DATA) {
         /*
          * PR #315560: Solaris might pass M_PROTO mblks for some reason.
          * Drop them because we don't understand them and because their
          * contents are not Ethernet frames anyway.
          */
         ASSERT(B_FALSE);
         freemsg(mp);
         continue;
      }

      /*
       * Prepare the offload while we're still handling the original
       * message -- msgpullup() discards the metadata afterwards.
       */
      pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
      if (pullup) {
         mblk_t *new_mp = msgpullup(mp, pullup);
         freemsg(mp);
         if (new_mp) {
            mp = new_mp;
         } else {
            continue;
         }
      }

      /*
       * Try to map the message in the Tx ring.
       * This call might fail for non-fatal reasons.
       */
      status = vmxnet3_tx_one(dp, txq, &ol, mp, B_FALSE);
      if (status == VMXNET3_TX_PULLUP) {
         /*
          * Try one more time after flattening
          * the message with msgpullup().
          */
         if (mp->b_cont != NULL) {
            mblk_t *new_mp = msgpullup(mp, -1);
            freemsg(mp);
            if (new_mp) {
               mp = new_mp;
               status = vmxnet3_tx_one(dp, txq, &ol, mp, B_TRUE);
            } else {
               continue;
            }
         }
      }
      if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
         /* Fatal failure, drop it */
         freemsg(mp);
      }
   } while (mps && status != VMXNET3_TX_RINGFULL);

   if (status == VMXNET3_TX_RINGFULL) {
      mp->b_next = mps;
      mps = mp;
   } else {
      ASSERT(!mps);
   }

   /* Notify the device */
   mutex_enter(&dp->txLock);
   if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
      txqCtrl->txNumDeferred = 0;
      VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
   }
   mutex_exit(&dp->txLock);

   return mps;
}
Пример #4
0
/* ARGSUSED */
static int
vmxnet3s_tx_prepare_offload(vmxnet3s_softc_t *dp, vmxnet3s_offload_t *ol,
    mblk_t *mp, int *to_copy)
{
	int		ret = 0;
	uint32_t	start;
	uint32_t	stuff;
	uint32_t	value;
	uint32_t	flags;
	uint32_t	lsoflags;
	uint32_t	mss;

	ol->om = VMXNET3_OM_NONE;
	ol->hlen = 0;
	ol->msscof = 0;

	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);

	mac_lso_get(mp, &mss, &lsoflags);
	if (lsoflags & HW_LSO)
		flags |= HW_LSO;

	if (flags) {
		struct ether_vlan_header *eth = (void *) mp->b_rptr;
		uint8_t		ethlen;

		if (eth->ether_tpid == htons(ETHERTYPE_VLAN))
			ethlen = sizeof (struct ether_vlan_header);
		else
			ethlen = sizeof (struct ether_header);

		if (flags & HCK_PARTIALCKSUM) {
			ol->om = VMXNET3_OM_CSUM;
			ol->hlen = start + ethlen;
			ol->msscof = stuff + ethlen;
		}
		if (flags & HW_LSO) {
			mblk_t		*mblk = mp;
			uint8_t		*ip;
			uint8_t		*tcp;
			uint8_t		iplen;
			uint8_t		tcplen;

			/*
			 * Copy e1000g's behavior:
			 * - Do not assume all the headers are in the same mblk.
			 * - Assume each header is always within one mblk.
			 * - Assume the ethernet header is in the first mblk.
			 */
			ip = mblk->b_rptr + ethlen;
			if (ip >= mblk->b_wptr) {
				mblk = mblk->b_cont;
				ip = mblk->b_rptr;
			}
			iplen = IPH_HDR_LENGTH((ipha_t *)ip);
			tcp = ip + iplen;
			if (tcp >= mblk->b_wptr) {
				mblk = mblk->b_cont;
				tcp = mblk->b_rptr;
			}
			tcplen = TCP_HDR_LENGTH((tcph_t *)tcp);
			/* Careful, '>' instead of '>=' here */
			if (tcp + tcplen > mblk->b_wptr)
				mblk = mblk->b_cont;

			ol->om = VMXNET3_OM_TSO;
			ol->hlen = ethlen + iplen + tcplen;
			ol->msscof = DB_LSOMSS(mp);

			if (mblk != mp)
				ret = ol->hlen;
			else
				*to_copy = ol->hlen;
		}
	}

	return (ret);
}