void octeon_se_fastpath_fragc_uninit(SeFastpathCoreContext core, SeFastpath fastpath, SeFastpathFragmentContext fragc) { if (cvmx_likely(fragc->pc)) { cvmx_helper_free_packet_data(fragc->pc->wqe); cvmx_fpa_free(fragc->pc->wqe, CVMX_FPA_WQE_POOL, 0); fragc->pc->wqe = NULL; } }
/** * Process incoming packets. */ int inic_data_loop(void) { cvm_common_wqe_t *swp = NULL; cvm_tcp_in_endpoints_t conn; cvm_tcp_tcphdr_t *th = NULL; cvm_ip_ip_t *ih = NULL; cvmx_sysinfo_t *sys_info_ptr = cvmx_sysinfo_get(); uint64_t cpu_clock_hz = sys_info_ptr->cpu_clock_hz; uint64_t tick_cycle = cvmx_get_cycle(); uint64_t tick_step; uint32_t idle_processing_interval_ticks = (CVM_COMMON_IDLE_PROCESSING_INTERVAL)*(1000*1000)/(CVM_COMMON_TICK_LEN_US); uint32_t idle_processing_last_ticks = 0; #ifdef INET6 struct cvm_ip6_ip6_hdr *ip6 = NULL; #ifdef CVM_ENET_TUNNEL struct cvm_ip6_ip6_hdr *i6h = NULL; #endif #endif #ifdef CVM_CLI_APP uint64_t idle_cycle_start_value; #endif /* for the simulator */ if (cpu_clock_hz == 0) { cpu_clock_hz = 333000000; } tick_step = (CVM_COMMON_TICK_LEN_US * cpu_clock_hz) / 1000000; cvm_debug_print_interval = cpu_clock_hz; #ifndef REAL_HW /* for the simulator, set the debug interval to be 3M cycles */ cvm_debug_print_interval = 3000000; #endif #ifdef DUTY_CYCLE start_cycle = cvmx_get_cycle(); process_count = 0; #endif if (cvmx_coremask_first_core(coremask_data)) { /* Initiate a timer transaction for arp entry timeouts */ //if(cvm_enet_arp_timeout_init() != CVMX_TIM_STATUS_SUCCESS) //{ // printf("Failed init of cvm_ip_arp_timeout_init\n"); //} } #if defined(CVM_COMBINED_APP_STACK) /* Flush the packets sent by main_global and main_local */ /* printf("before cvm_send_packet () \n "); if (out_swp) { cvm_send_packet (); } printf("after cvm_send_packet () \n "); */ uint64_t app_timeout = cvmx_get_cycle (); #endif /* start the main loop */ while (1) { #ifdef DUTY_CYCLE end_cycle = cvmx_get_cycle(); /* check the wrap around case */ if (end_cycle < start_cycle) end_cycle += cpu_clock_hz; if ((end_cycle - start_cycle) > cvm_debug_print_interval) { inic_do_per_second_duty_cycle_processing(); } #endif /* DUTY_CYCLE */ cvmx_pow_work_request_async_nocheck(CVMX_SCR_WORK, 1); /* update the ticks variable */ while (cvmx_get_cycle() - tick_cycle > tick_step) { tick_cycle += tick_step; cvm_tcp_ticks++; if (!(cvm_tcp_ticks & 0x1f)) CVM_COMMON_HISTORY_SET_CYCLE(); } /* do common idle processing */ if ( (cvm_tcp_ticks - idle_processing_last_ticks) > idle_processing_interval_ticks) { if (cvmx_coremask_first_core(coremask_data)) { cvm_common_do_idle_processing(); } idle_processing_last_ticks = cvm_tcp_ticks; } #ifdef CVM_CLI_APP idle_cycle_start_value = cvmx_get_cycle(); #endif /* get work entry */ swp = (cvm_common_wqe_t *)cvmx_pow_work_response_async(CVMX_SCR_WORK); if (swp == NULL) { idle_counter++; if(core_id == highest_core_id) { cvm_enet_check_link_status(); } #ifdef CVM_CLI_APP cvmx_fau_atomic_add64(core_idle_cycles[core_id], (cvmx_get_cycle()-idle_cycle_start_value) ); #endif continue; } CVM_COMMON_EXTRA_STATS_ADD64 (CVM_FAU_REG_WQE_RCVD, 1); #ifdef WORK_QUEUE_ENTRY_SIZE_128 // { CVMX_PREFETCH0(swp); #else /* Prefetch work-queue entry */ CVMX_PREFETCH0(swp); CVMX_PREFETCH128(swp); #endif // WORK_QUEUE_ENTRY_SIZE_128 } out_swp = 0; out_swp_tail = 0; #ifdef DUTY_CYCLE /* we are about to start processing the packet - remember the cycle count */ process_start_cycle = cvmx_get_cycle(); #endif /* Short cut the common case */ if (cvmx_likely(swp->hw_wqe.unused == 0)) { goto packet_from_the_wire; } printf("Get work with unused is %X\n", swp->hw_wqe.unused); { { packet_from_the_wire: #if CVM_PKO_DONTFREE swp->hw_wqe.packet_ptr.s.i = 0; #endif #ifdef SANITY_CHECKS /* we have a work queue entry - do input sanity checks */ ret = cvm_common_input_sanity_and_buffer_count_update(swp); #endif if (cvmx_unlikely(swp->hw_wqe.word2.s.rcv_error)) { goto discard_swp; /* Receive error */ } #ifndef WORK_QUEUE_ENTRY_SIZE_128 // { { /* Make sure pre-fetch completed */ uint64_t dp = *(volatile uint64_t*)&swp->next; } #endif // WORK_QUEUE_ENTRY_SIZE_128 } { /* Initialize SW portion of the work-queue entry */ uint64_t *dptr = (uint64_t*)(&swp->next); dptr[0] = 0; dptr[1] = 0; dptr[2] = 0; dptr[3] = 0; } if(cvmx_unlikely(swp->hw_wqe.word2.s.not_IP)) { goto output; } /* Shortcut classification to avoid multiple lookups */ if( #ifndef INET6 swp->hw_wqe.word2.s.is_v6 || #endif swp->hw_wqe.word2.s.is_bcast #ifndef INET6 || swp->hw_wqe.word2.s.is_mcast #endif ) { goto discard_swp; /* Receive error */ } /* Packet is unicast IPv4, without L2 errors */ /* (All IP exceptions are dropped. This currently includes * IPv4 options and IPv6 extension headers.) */ if(cvmx_unlikely(swp->hw_wqe.word2.s.IP_exc)) { goto discard_swp; } /* Packet is Ipv4 (and no IP exceptions) */ if (cvmx_unlikely(swp->hw_wqe.word2.s.is_frag || !swp->hw_wqe.word2.s.tcp_or_udp)) { goto output; } #ifdef ANVL_RFC_793_COMPLIANCE /* RFC 793 says that: - We should send a RST out when we get a packet with FIN set without the ACK bit set in the flags field. - We should send a RST out when we get a packet with no flag set. Hence, let TCP stack handle these conditions. */ if (cvmx_unlikely(swp->hw_wqe.word2.s.L4_error && (cvmx_pip_l4_err_t)(swp->hw_wqe.word2.s.err_code != CVMX_PIP_TCP_FLG8_ERR) && (cvmx_pip_l4_err_t)(swp->hw_wqe.word2.s.err_code != CVMX_PIP_TCP_FLG9_ERR))) #else if (cvmx_unlikely(swp->hw_wqe.word2.s.L4_error)) #endif { cvm_tcp_handle_error(swp); goto discard_swp; } /* Packet is not fragmented, TCP/UDP, no IP exceptions/L4 errors */ /* We can try an L4 lookup now, but we need all the information */ ih = ((cvm_ip_ip_t *)&(swp->hw_wqe.packet_data[CVM_COMMON_PD_ALIGN])); if (!swp->hw_wqe.word2.s.is_v6) { /* for IPv4, we must subtract CVM_COMMON_PD_ALIGN rom tcp_offset to get the offset in the mbuf */ swp->l4_offset = ((uint16_t)(ih->ip_hl) << 2) + CVM_COMMON_PD_ALIGN; swp->l4_prot = ih->ip_p; } #ifdef INET6 else { ip6 = (struct cvm_ip6_ip6_hdr *) &swp->hw_wqe.packet_data[CVM_COMMON_IP6_PD_ALIGN]; CVM_COMMON_DBG_MSG (CVM_COMMON_DBG_LVL_5, "%s: %d Packet trace Src: %s/%d Dest: %s/%d prot: %d len: %d\n", __FUNCTION__, __LINE__, cvm_ip6_ip6_sprintf (&ip6->ip6_dst), conn.ie_fport, cvm_ip6_ip6_sprintf (&ip6->ip6_src), conn.ie_lport, swp->l4_prot, swp->hw_wqe.len); /* for IPv4, we must subtract CVM_COMMON_PD_ALIGN rom tcp_offset to get the offset in the mbuf */ swp->l4_offset = CVM_IP6_IP6_HDRLEN; swp->l4_prot = ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt; } #endif th = ((cvm_tcp_tcphdr_t *)&(swp->hw_wqe.packet_data[swp->l4_offset])); /* check if it is a TCP packet */ if (swp->l4_prot == CVM_IP_IPPROTO_TCP) { process_handle(swp); #ifdef INET6 if (!swp->hw_wqe.word2.s.is_v6) #endif { CVM_TCP_TCP_DUMP ((void*)ih); /* assume IPv4 for now */ conn.ie_laddr = ih->ip_dst.s_addr; conn.ie_faddr = ih->ip_src.s_addr; conn.ie_lport = th->th_dport; conn.ie_fport = th->th_sport; } #ifdef INET6 else { /* assume IPv4 for now */ memcpy (&conn.ie6_laddr, &ip6->ip6_dst, sizeof (struct cvm_ip6_in6_addr)); memcpy (&conn.ie6_faddr, &ip6->ip6_src, sizeof (struct cvm_ip6_in6_addr)); conn.ie_lport = th->th_dport; conn.ie_fport = th->th_sport; /* do a TCP lookup */ swp->tcb = cvm_tcp6_lookup (swp); CVM_COMMON_DBG_MSG (CVM_COMMON_DBG_LVL_5, "%s: %d TCPv6 lookup Src: %s/%d Dest: %s/%d ret_tcb: 0x%llx\n", __FUNCTION__, __LINE__, cvm_ip6_ip6_sprintf ((cvm_ip6_in6_addr_t *) &conn.ie6_faddr), conn.ie_fport, cvm_ip6_ip6_sprintf ((cvm_ip6_in6_addr_t *) &conn.ie6_laddr), conn.ie_lport, CAST64(swp->tcb)); } #endif // INET6 } goto output; } /* packet from wire */ } /* switch */ output: CVMX_SYNCWS; /* Send packet out */ if (out_swp) { cvm_send_packet(); } if(swp != NULL) { S3_send_packet((cvmx_wqe_t *)swp); swp = NULL; } #ifdef DUTY_CYCLE process_end_cycle = cvmx_get_cycle(); process_count += (process_end_cycle - process_start_cycle); #endif } return (0); discard_swp: /* Free the chained buffers */ cvm_common_packet_free(swp); /* Free the work queue entry */ cvm_common_free_fpa_buffer(swp, CVMX_FPA_WQE_POOL, CVMX_FPA_WQE_POOL_SIZE / CVMX_CACHE_LINE_SIZE); swp = NULL; goto output; } /* inic_data_loop */
/** Execute outbound transforms */ SeFastpathRet octeon_se_fastpath_transform_out(SeFastpathCoreContext core, SeFastpath fastpath, SeFastpathPacketContext pc) { cvmx_buf_ptr_t packet_out; uint64_t packet_out_num_segs; size_t packet_out_len; SeFastpathTransformData se_trd; SeFastpathCombinedTransform combined; SeFastpathPacketBufferStruct src, dst; SeFastpathEspExtraInfoStruct extra_info[1]; SeFastpathMacExtraInfoStruct mac_info[1]; SeFastpathRet ret; uint8_t *header; uint32_t trd_i, tos, flow_label; uint64_t ipsec_seq; uint16_t csum, prefix_ofs; uint16_t esp_ah_ofs, prefix_len = 0, trailer_len = 0, pad_len = 0; uint8_t esp_ah_nh; uint64_t icv[OCTEON_SE_FASTPATH_MAX_HASH_WORDS] = { 0 }; size_t i; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_AH size_t icv_pad_len = 0; #endif /* OCTEON_SE_FASTPATH_TRANSFORM_AH */ uint32_t run_time; size_t alignment = 0; #ifdef OCTEON_SE_FASTPATH_STATISTICS size_t out_octets; #endif /* OCTEON_SE_FASTPATH_STATISTICS */ OCTEON_SE_DEBUG(9, "Execute transform out\n"); packet_out.u64 = 0; OCTEON_SE_ASSERT(pc->transform_index != OCTEON_SE_FASTPATH_INVALID_INDEX); trd_i = pc->transform_index & 0x00ffffff; OCTEON_SE_ASSERT(trd_i < OCTEON_SE_FASTPATH_TRD_TABLE_SIZE); se_trd = OCTEON_SE_FASTPATH_TRD(fastpath, trd_i); OCTEON_SE_FASTPATH_TRD_READ_LOCK(fastpath, trd_i, se_trd); OCTEON_SE_FASTPATH_PREFETCH_TRD(se_trd); /* If transform is complex, pass packet to slowpath. */ if (cvmx_unlikely(se_trd->is_special)) { OCTEON_SE_DEBUG(9, "Special transform %08x, passing to slowpath\n", se_trd->transform); goto slowpath; } combined = octeon_se_fastpath_get_combined_transform(se_trd->transform, se_trd->mac_key_size); if (cvmx_unlikely(combined == NULL)) { OCTEON_SE_DEBUG(9, "Unsupported transform %08x, passing to slowpath\n", se_trd->transform); goto slowpath; } /* Update trd output timestamp. */ run_time = cvmx_fau_fetch_and_add32(OCTEON_SE_FASTPATH_FAU_RUNTIME, 0); cvmx_atomic_set32((int32_t *) &se_trd->last_out_packet_time, (int32_t) run_time); (*combined->init)(core->transform_context, se_trd->keymat + OCTEON_MAX_KEYMAT_LEN /2, se_trd->cipher_key_size, se_trd->keymat + OCTEON_MAX_KEYMAT_LEN /2 + OCTEON_MAX_ESP_KEY_BITS /8, se_trd->mac_key_size); prefix_ofs = pc->s->ip_offset; /* Check ttl. */ if (cvmx_unlikely(pc->s->ttl == 0)) { OCTEON_SE_DEBUG(3, "Zero TTL, dropping\n"); goto corrupt; } #ifdef OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE if (cvmx_unlikely(!se_trd->tunnel_mode)) { /* In transport mode insert the ESP/AH header between IP and transport headers. */ prefix_ofs += pc->s->tr_offset; esp_ah_nh = pc->s->ipproto; prefix_len = 0; } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE */ { /* In tunnel mode insert IP and ESP/AH headers before IP header. */ if (se_trd->ip_version_6) prefix_len = OCTEON_SE_FASTPATH_IP6_HDRLEN; else prefix_len = OCTEON_SE_FASTPATH_IP4_HDRLEN; if (pc->s->ip_version_6) esp_ah_nh = OCTEON_SE_FASTPATH_IPPROTO_IPV6; else esp_ah_nh = OCTEON_SE_FASTPATH_IPPROTO_IPIP; } /* Calculate IPsec overhead. */ #ifdef OCTEON_SE_FASTPATH_TRANSFORM_NATT /* Reserve space for UDP NAT-T. */ if (se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_NATT) prefix_len += OCTEON_SE_FASTPATH_UDP_HDRLEN; #endif /* OCTEON_SE_FASTPATH_TRANSFORM_NATT */ #ifdef OCTEON_SE_FASTPATH_TRANSFORM_AH if (cvmx_unlikely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_AH)) { prefix_len += OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_SHA2 if (cvmx_unlikely((se_trd->ip_version_6 == 1) && (se_trd->transform & OCTEON_SE_FASTPATH_MAC_HMAC_SHA2)) ) { icv_pad_len = 4; prefix_len += 4; /* Align AH header to 64 bit boundary */ } #endif /* OCTEON_SE_FASTPATH_TRANSFORM_SHA2 */ trailer_len = 0; pad_len = 0; } else if (se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_ESP) #endif /* OCTEON_SE_FASTPATH_TRANSFORM_AH */ { prefix_len += (OCTEON_SE_FASTPATH_ESP_HDRLEN + combined->cipher_iv_len); trailer_len = 2 + combined->icv_len; pad_len = (pc->s->ip_len + pc->s->ip_offset - prefix_ofs + 2) % combined->pad_boundary; if (pad_len != 0) pad_len = combined->pad_boundary - pad_len; } /* The actual length of the packet */ packet_out_len = pc->s->ip_len + prefix_len + pad_len + trailer_len; OCTEON_SE_DEBUG(9, "Resultant packet len is %d\n", (int) packet_out_len); /* Check result packet length. */ if (cvmx_unlikely(se_trd->pmtu_received && pc->mtu > se_trd->pmtu_received)) pc->mtu = se_trd->pmtu_received; ret = octeon_se_fastpath_transform_check_pmtu(pc, packet_out_len); if (cvmx_unlikely(ret == OCTEON_SE_FASTPATH_RET_DROP)) goto drop; else if (cvmx_unlikely(ret == OCTEON_SE_FASTPATH_RET_SLOWPATH)) goto slowpath; /* In tunnel mode decrement ttl of inner header. */ #ifdef OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE if (cvmx_likely(se_trd->tunnel_mode)) #endif /* OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE */ { header = cvmx_phys_to_ptr(pc->wqe->packet_ptr.s.addr) + pc->s->ip_offset; if (pc->s->ip_version_6) { /* Assert that header is in the first packet segment */ OCTEON_SE_ASSERT(pc->wqe->packet_ptr.s.size >= OCTEON_SE_FASTPATH_IP6_HDRLEN); OCTEON_SE_FASTPATH_IPH6_SET_HL(header, pc->s->ttl - 1); } else { /* Assert that header is in the first packet segment */ OCTEON_SE_ASSERT(pc->wqe->packet_ptr.s.size >= OCTEON_SE_FASTPATH_IP4_HDRLEN); OCTEON_SE_FASTPATH_IPH4_SET_TTL(header, pc->s->ttl - 1); OCTEON_SE_FASTPATH_IPH4_CHECKSUM(header, csum); csum = octeon_se_fastpath_csum_update_byte(csum, SSH_IPH4_OFS_TTL, pc->s->ttl, pc->s->ttl - 1); OCTEON_SE_FASTPATH_IPH4_SET_CHECKSUM(header, csum); } } /* Save df bit processing state */ pc->s->df_bit_processing = se_trd->df_bit_processing; /* Allocate packet buffer chain for result packet. Request that crypto result offset is 8 byte aligned. */ alignment = OCTEON_SE_ALIGN_64(prefix_ofs + prefix_len) - (prefix_ofs + prefix_len); packet_out.u64 = octeon_se_fastpath_alloc_packet_chain(packet_out_len + pc->s->ip_offset, alignment, &packet_out_num_segs); if (cvmx_unlikely(packet_out.u64 == 0)) { OCTEON_SE_DEBUG(3, "Result packet allocation failed\n"); goto drop; } #ifdef OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE /* In case of transport mode copy the l3 header.*/ if (cvmx_unlikely(prefix_ofs > pc->s->ip_offset)) { OCTEON_SE_DEBUG(9, "Copying headers to %p\n", cvmx_phys_to_ptr(packet_out.s.addr) + pc->s->ip_offset); /* Assert that l3 headers are in the first packet segment. */ OCTEON_SE_ASSERT(packet_out.s.size > prefix_ofs); memcpy(cvmx_phys_to_ptr(packet_out.s.addr) + pc->s->ip_offset, cvmx_phys_to_ptr(pc->wqe->packet_ptr.s.addr) + pc->s->ip_offset, prefix_ofs - pc->s->ip_offset); } #endif /* OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE */ /* Prepare Source buffer */ octeon_se_fastpath_packet_buffer_create(&src, pc->wqe->packet_ptr, prefix_ofs, pc->s->ip_len + pc->s->ip_offset - prefix_ofs, pc->wqe->word2.s.bufs); /* Count the number of bytes input to crypto processing. */ OCTEON_SE_FASTPATH_STATS(out_octets = pc->s->ip_len + pc->s->ip_offset - prefix_ofs); /* Build headers */ header = ((uint8_t *) cvmx_phys_to_ptr(packet_out.s.addr)) + prefix_ofs; /* Build outer header for tunnel mode and modify IP header for transport mode.*/ #ifdef OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE if (cvmx_unlikely(!se_trd->tunnel_mode && pc->s->ip_version_6 == 0)) { /* IPv4 transport mode. */ OCTEON_SE_DEBUG(9, "Modifying IPv4 header at %p\n", header); /* Modify original IPv4 header and change IP protocol and len. */ OCTEON_SE_FASTPATH_IPH4_SET_LEN(header, packet_out_len); OCTEON_SE_FASTPATH_IPH4_SET_PROTO(header, se_trd->nh); OCTEON_SE_FASTPATH_IPH4_CHECKSUM(header, csum); csum = octeon_se_fastpath_csum_update_byte(csum, OCTEON_SE_FASTPATH_IPH4_OFS_PROTO, pc->s->ipproto, se_trd->nh); csum = octeon_se_fastpath_csum_update_short(csum, OCTEON_SE_FASTPATH_IPH4_OFS_LEN, pc->s->ip_len, packet_out_len); OCTEON_SE_FASTPATH_IPH4_SET_CHECKSUM(header, csum); } else if (cvmx_unlikely(!se_trd->tunnel_mode && pc->s->ip_version_6 == 1)) { /* IPv6 transport mode. */ OCTEON_SE_DEBUG(9, "Modifying IPv6 header at %p\n", header); OCTEON_SE_FASTPATH_IPH6_SET_LEN(header, packet_out_len - OCTEON_SE_FASTPATH_IP6_HDRLEN); OCTEON_SE_FASTPATH_IPH6_SET_NH(header, se_trd->nh); } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_TRANSPORT_MODE */ if (se_trd->ip_version_6 == 0) { OCTEON_SE_ASSERT(se_trd->tunnel_mode); /* IPv4 tunnel mode. */ OCTEON_SE_DEBUG(9, "Building outer IPv4 header at %p\n", header); OCTEON_SE_ASSERT(packet_out.s.size > prefix_ofs + OCTEON_SE_FASTPATH_IP4_HDRLEN); OCTEON_SE_FASTPATH_IPH4_SET_VERSION(header, 4); OCTEON_SE_FASTPATH_IPH4_SET_HLEN(header, 5); tos = 0; OCTEON_SE_FASTPATH_IPH4_SET_TOS(header, tos); OCTEON_SE_FASTPATH_IPH4_SET_LEN(header, packet_out_len); if (pc->s->df_bit_processing == OCTEON_SE_FASTPATH_DF_CLEAR || (pc->s->df_bit_processing == OCTEON_SE_FASTPATH_DF_KEEP && pc->s->ipv4_df == 0)) { uint32_t id; OCTEON_SE_FASTPATH_GET_NEXT_IPV4_PACKET_ID(core, id); OCTEON_SE_FASTPATH_IPH4_SET_ID(header, id); OCTEON_SE_FASTPATH_IPH4_SET_FRAG(header, 0); pc->s->ipv4_df = 0; } else { OCTEON_SE_FASTPATH_IPH4_SET_ID(header, 0); OCTEON_SE_FASTPATH_IPH4_SET_FRAG(header, OCTEON_SE_FASTPATH_IPH4_FRAGOFF_DF); pc->s->ipv4_df = 1; } OCTEON_SE_FASTPATH_IPH4_SET_TTL(header, OCTEON_SE_FASTPATH_IP4_TUNNEL_MODE_TTL); OCTEON_SE_FASTPATH_IPH4_SET_PROTO(header, se_trd->nh); OCTEON_SE_FASTPATH_IPH4_SET_CHECKSUM(header, 0); OCTEON_SE_FASTPATH_IPH4_SET_SRC(header, se_trd->own_addr_low); OCTEON_SE_FASTPATH_IPH4_SET_DST(header, se_trd->gw_addr_low); csum = octeon_se_fastpath_ip_cksum(header, OCTEON_SE_FASTPATH_IP4_HDRLEN); OCTEON_SE_FASTPATH_IPH4_SET_CHECKSUM(header, csum); prefix_ofs += OCTEON_SE_FASTPATH_IP4_HDRLEN; } else if (se_trd->ip_version_6 == 1) { OCTEON_SE_ASSERT(se_trd->tunnel_mode); /* IPv6 tunnel mode. */ OCTEON_SE_DEBUG(9, "Building outer IPv6 header at %p\n", header); OCTEON_SE_FASTPATH_IPH6_SET_VERSION(header, 6); tos = 0; OCTEON_SE_FASTPATH_IPH6_SET_CLASS(header, tos); flow_label = 0; OCTEON_SE_FASTPATH_IPH6_SET_FLOW(header, flow_label); OCTEON_SE_FASTPATH_IPH6_SET_LEN(header, packet_out_len - OCTEON_SE_FASTPATH_IP6_HDRLEN); OCTEON_SE_FASTPATH_IPH6_SET_NH(header, se_trd->nh); OCTEON_SE_FASTPATH_IPH6_SET_HL(header, OCTEON_SE_FASTPATH_IP6_TUNNEL_MODE_HL); OCTEON_SE_FASTPATH_IPH6_SET_SRC_LOW(header, se_trd->own_addr_low); OCTEON_SE_FASTPATH_IPH6_SET_SRC_HIGH(header, se_trd->own_addr_high); OCTEON_SE_FASTPATH_IPH6_SET_DST_LOW(header, se_trd->gw_addr_low); OCTEON_SE_FASTPATH_IPH6_SET_DST_HIGH(header, se_trd->gw_addr_high); prefix_ofs += OCTEON_SE_FASTPATH_IP6_HDRLEN; } #ifdef OCTEON_SE_FASTPATH_TRANSFORM_NATT /* Should we add NATT header as well ? */ if (cvmx_unlikely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_NATT)) { header = ((uint8_t *) cvmx_phys_to_ptr(packet_out.s.addr)) + prefix_ofs; OCTEON_SE_DEBUG(9, "Building UDP NAT-T header at %p\n", header); OCTEON_SE_ASSERT(packet_out.s.size > prefix_ofs + OCTEON_SE_FASTPATH_UDP_HDRLEN); OCTEON_SE_ASSERT((se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_AH) == 0); OCTEON_SE_ASSERT(se_trd->nh == OCTEON_SE_FASTPATH_IPPROTO_UDP); OCTEON_SE_FASTPATH_UDPH_SET_SRCPORT(header, se_trd->natt_local_port); OCTEON_SE_FASTPATH_UDPH_SET_DSTPORT(header, se_trd->natt_remote_port); OCTEON_SE_FASTPATH_UDPH_SET_LEN(header, packet_out_len - (prefix_ofs - pc->s->ip_offset)); OCTEON_SE_FASTPATH_UDPH_SET_CHECKSUM(header, 0); prefix_ofs += OCTEON_SE_FASTPATH_UDP_HDRLEN; } #endif /* OCTEON_SE_FASTPATH_TRANSFORM_NATT */ /* Build ESP/AH */ esp_ah_ofs = prefix_ofs; header = ((uint8_t *) cvmx_phys_to_ptr(packet_out.s.addr)) + prefix_ofs; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_AH if (se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_AH) { uint32_t low_seq; OCTEON_SE_DEBUG(9, "Building AH header at %p\n", header); OCTEON_SE_ASSERT(packet_out.s.size > prefix_ofs + OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len + icv_pad_len); /* Get and increment next sequence atomically. Note that se_trd contains the last sequence number transmitted, thus sequence is incremented by one here. */ ipsec_seq = (uint64_t) cvmx_atomic_fetch_and_add64((int64_t *)&se_trd->seq, 1); ipsec_seq++; OCTEON_SE_FASTPATH_AHH_SET_NH(header, esp_ah_nh); OCTEON_SE_FASTPATH_AHH_SET_LEN(header, (combined->icv_len + icv_pad_len + 12) / 4 - 2); OCTEON_SE_FASTPATH_AHH_SET_RESERVED(header, 0); OCTEON_SE_FASTPATH_AHH_SET_SPI(header, se_trd->spi_out); CVMX_DEXT(low_seq, ipsec_seq, 0, 32); OCTEON_SE_FASTPATH_AHH_SET_SEQ(header, low_seq); prefix_ofs += OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len; /* ICV computation also needs ICV field initialized to zero. */ memcpy(mac_info->prefix.u8, header, OCTEON_SE_FASTPATH_AH_HDRLEN); memset(mac_info->prefix.u8 + OCTEON_SE_FASTPATH_AH_HDRLEN, 0, combined->icv_len); mac_info->prefix_len = OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_SHA2 if (cvmx_unlikely((se_trd->ip_version_6 == 1) && (se_trd->transform & OCTEON_SE_FASTPATH_MAC_HMAC_SHA2)) ) { prefix_ofs += 4; mac_info->prefix_len += 4; /* Use IPsec seq as AH padding for making 64 bit aligned. */ OCTEON_SE_PUT_32BIT_ALIGNED(mac_info->prefix.u8 + OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len, low_seq); } #endif /* OCTEON_SE_FASTPATH_TRANSFORM_SHA2 */ #ifdef OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ if (cvmx_unlikely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_LONGSEQ)) { CVMX_DEXT(mac_info->suffix, ipsec_seq, 32, 32); mac_info->suffix_available = 1; } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ */ mac_info->suffix_available = 0; /* Assert that crypto offset is 8 byte aligned */ OCTEON_SE_ASSERT(((uint64_t) (cvmx_phys_to_ptr(packet_out.s.addr) + prefix_ofs)) % 8 == 0); octeon_se_fastpath_packet_buffer_create(&dst, packet_out, prefix_ofs, packet_out_len + pc->s->ip_offset, packet_out_num_segs); if (se_trd->ip_version_6 == 1) octeon_se_fastpath_mac_add_ah_header6(packet_out, pc->s->ip_offset, combined->update, core->transform_context, 0); else octeon_se_fastpath_mac_add_ah_header4(packet_out, pc->s->ip_offset, combined->update, core->transform_context, 0); OCTEON_SE_DEBUG(9, "MAC prefix, len %d\n", mac_info->prefix_len); OCTEON_SE_HEXDUMP(9, mac_info->prefix.u8, mac_info->prefix_len); /* Do the actual transform */ (*combined->encrypt)(core->transform_context, &dst, &src, mac_info, NULL, icv); /* Copy ICV to packet. */ if (cvmx_likely(combined->icv_len % 4 == 0)) { for (i = 0; i < combined->icv_len; i += 4) { OCTEON_SE_PUT_32BIT_ALIGNED(cvmx_phys_to_ptr(packet_out.s.addr) + esp_ah_ofs + OCTEON_SE_FASTPATH_AH_HDRLEN + i, *(uint32_t *)(((uint8_t *)icv) + i)); } } else { memcpy(cvmx_phys_to_ptr(packet_out.s.addr) + esp_ah_ofs + OCTEON_SE_FASTPATH_AH_HDRLEN, icv, combined->icv_len); } #ifdef OCTEON_SE_FASTPATH_TRANSFORM_SHA2 if (cvmx_unlikely((se_trd->ip_version_6 == 1) && (se_trd->transform & OCTEON_SE_FASTPATH_MAC_HMAC_SHA2)) ) { /* Use IPsec seq as AH padding for making 64 bit aligned. */ OCTEON_SE_PUT_32BIT(cvmx_phys_to_ptr(packet_out.s.addr) + esp_ah_ofs + OCTEON_SE_FASTPATH_AH_HDRLEN + combined->icv_len, low_seq); } #endif /* OCTEON_SE_FASTPATH_TRANSFORM_SHA2 */ } else if (cvmx_likely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_ESP)) #endif /* OCTEON_SE_FASTPATH_TRANSFORM_AH */ { uint32_t low_seq; OCTEON_SE_DEBUG(9, "Building ESP header at %p\n", header); /* Assert that there is enough space for ESP */ OCTEON_SE_ASSERT(packet_out.s.size > prefix_ofs + OCTEON_SE_FASTPATH_ESP_HDRLEN); /* Get and increment next sequence atomically. Note that se_trd contains the last sequence number transmitted, thus sequence is incremented by one here. */ ipsec_seq = (uint64_t) cvmx_atomic_fetch_and_add64((int64_t *)&se_trd->seq, 1); ipsec_seq++; /* Build ESP header. */ OCTEON_SE_FASTPATH_ESPH_SET_SPI(header, se_trd->spi_out); CVMX_DEXT(low_seq, ipsec_seq, 0, 32); OCTEON_SE_FASTPATH_ESPH_SET_SEQ(header, low_seq); prefix_ofs += OCTEON_SE_FASTPATH_ESP_HDRLEN; /* Fill in extra info for transform. */ extra_info->pad_len = pad_len; extra_info->nh = esp_ah_nh; /* Fill in extra data form MAC. */ OCTEON_SE_PUT_32BIT_ALIGNED(mac_info->prefix.u8, se_trd->spi_out); #ifdef OCTEON_SE_FASTPATH_TRANSFORM_AES_GCM if (cvmx_likely(combined->is_auth_cipher)) { /* Extract cipher nonce. */ OCTEON_SE_ASSERT(se_trd->cipher_nonce_size == 4); OCTEON_SE_GET_32BIT_ALIGNED(se_trd->keymat + OCTEON_MAX_KEYMAT_LEN /2 + se_trd->cipher_key_size, extra_info->cipher_nonce); /* Use IPsec seq# as counter. */ extra_info->iv[0] = ipsec_seq; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ if (cvmx_unlikely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_LONGSEQ)) { OCTEON_SE_PUT_64BIT(&mac_info->prefix.u8[4], ipsec_seq); mac_info->prefix_len = 12; } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ */ { OCTEON_SE_PUT_32BIT_ALIGNED(&mac_info->prefix.u8[4], low_seq); mac_info->prefix_len = 8; } } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_AES_GCM */ { for (i = 0; i < combined->cipher_iv_len / 8; i++) extra_info->iv[i] = cvmx_rng_get_random64(); /* Prepare extra mac information */ OCTEON_SE_PUT_32BIT_ALIGNED(&mac_info->prefix.u8[4], low_seq); mac_info->prefix_len = 8; #ifdef OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ if (cvmx_unlikely(se_trd->transform & OCTEON_SE_FASTPATH_IPSEC_LONGSEQ)) { CVMX_DEXT(mac_info->suffix, ipsec_seq, 32, 32); mac_info->suffix_available = 1; } else #endif /* OCTEON_SE_FASTPATH_TRANSFORM_LONGSEQ */ mac_info->suffix_available = 0; } /* Assert that crypto offset is 8 byte aligned */ OCTEON_SE_ASSERT(((uint64_t) (cvmx_phys_to_ptr(packet_out.s.addr) + prefix_ofs)) % 8 == 0); octeon_se_fastpath_packet_buffer_create(&dst, packet_out, prefix_ofs, packet_out_len + pc->s->ip_offset - prefix_ofs, packet_out_num_segs); OCTEON_SE_DEBUG(9, "Performing crypto transform\n"); /* Do the actual transform. */ (*combined->encrypt)(core->transform_context, &dst, &src, mac_info, extra_info, icv); /* The trailer should be appended at the end of encrypted data. Write ptr is pointing to correct location which may be unaligned if aes-gcm is used. */ OCTEON_SE_ASSERT(dst.total_bytes == combined->icv_len); OCTEON_SE_DEBUG(9, "Inserting ICV, len %d:\n", (int) combined->icv_len); OCTEON_SE_HEXDUMP(9, icv, combined->icv_len); octeon_se_fastpath_buffer_copy_in(&dst, icv, combined->icv_len); } /* Update trd statistics only after successful encryption. */ OCTEON_SE_FASTPATH_STATS({ cvmx_atomic_add64((int64_t *) &se_trd->out_octets, out_octets); cvmx_atomic_add64((int64_t *) &se_trd->out_packets, 1); });