/* see shabal_small.h */ void mshabal256_init(mshabal256_context *sc, unsigned out_size) { unsigned u; for (u = 0; u < (12 + 16 + 16) * 4 * MSHABAL256_FACTOR; u++) sc->state[u] = 0; memset(sc->buf0, 0, sizeof sc->buf0); memset(sc->buf1, 0, sizeof sc->buf1); memset(sc->buf2, 0, sizeof sc->buf2); memset(sc->buf3, 0, sizeof sc->buf3); memset(sc->buf4, 0, sizeof sc->buf4); memset(sc->buf5, 0, sizeof sc->buf5); memset(sc->buf6, 0, sizeof sc->buf6); memset(sc->buf7, 0, sizeof sc->buf7); for (u = 0; u < 16; u++) { sc->buf0[4 * u + 0] = (out_size + u); sc->buf0[4 * u + 1] = (out_size + u) >> 8; sc->buf1[4 * u + 0] = (out_size + u); sc->buf1[4 * u + 1] = (out_size + u) >> 8; sc->buf2[4 * u + 0] = (out_size + u); sc->buf2[4 * u + 1] = (out_size + u) >> 8; sc->buf3[4 * u + 0] = (out_size + u); sc->buf3[4 * u + 1] = (out_size + u) >> 8; sc->buf4[4 * u + 0] = (out_size + u); sc->buf4[4 * u + 1] = (out_size + u) >> 8; sc->buf5[4 * u + 0] = (out_size + u); sc->buf5[4 * u + 1] = (out_size + u) >> 8; sc->buf6[4 * u + 0] = (out_size + u); sc->buf6[4 * u + 1] = (out_size + u) >> 8; sc->buf7[4 * u + 0] = (out_size + u); sc->buf7[4 * u + 1] = (out_size + u) >> 8; } sc->Whigh = sc->Wlow = C32(0xFFFFFFFF); mshabal256_compress(sc, sc->buf0, sc->buf1, sc->buf2, sc->buf3, sc->buf4, sc->buf5, sc->buf6, sc->buf7, 1); for (u = 0; u < 16; u++) { sc->buf0[4 * u + 0] = (out_size + u + 16); sc->buf0[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf1[4 * u + 0] = (out_size + u + 16); sc->buf1[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf2[4 * u + 0] = (out_size + u + 16); sc->buf2[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf3[4 * u + 0] = (out_size + u + 16); sc->buf3[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf4[4 * u + 0] = (out_size + u + 16); sc->buf4[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf5[4 * u + 0] = (out_size + u + 16); sc->buf5[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf6[4 * u + 0] = (out_size + u + 16); sc->buf6[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf7[4 * u + 0] = (out_size + u + 16); sc->buf7[4 * u + 1] = (out_size + u + 16) >> 8; } mshabal256_compress(sc, sc->buf0, sc->buf1, sc->buf2, sc->buf3, sc->buf4, sc->buf5, sc->buf6, sc->buf7, 1); sc->ptr = 0; sc->out_size = out_size; }
/* Some transmit errors cause the transmitter to shut * down. We now issue a restart transmit. Since the * errors close the BD and update the pointers, the restart * _should_ pick up without having to reset any of our * pointers either. Also, To workaround 8260 device erratum * CPM37, we must disable and then re-enable the transmitter * following a Late Collision, Underrun, or Retry Limit error. */ static void tx_restart(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; C32(fccp, fcc_gfmr, FCC_GFMR_ENT); udelay(10); S32(fccp, fcc_gfmr, FCC_GFMR_ENT); fcc_cr_cmd(fep, CPM_CR_RESTART_TX); }
static void stop(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; /* stop ethernet */ C32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT); /* clear events */ W16(fccp, fcc_fcce, 0xffff); /* clear interrupt mask */ W16(fccp, fcc_fccm, 0); fs_cleanup_bds(dev); }
/* Some transmit errors cause the transmitter to shut * down. We now issue a restart transmit. * Also, to workaround 8260 device erratum CPM37, we must * disable and then re-enable the transmitterfollowing a * Late Collision, Underrun, or Retry Limit error. * In addition, tbptr may point beyond BDs beyond still marked * as ready due to internal pipelining, so we need to look back * through the BDs and adjust tbptr to point to the last BD * marked as ready. This may result in some buffers being * retransmitted. */ static void tx_restart(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; const struct fs_platform_info *fpi = fep->fpi; fcc_enet_t __iomem *ep = fep->fcc.ep; cbd_t __iomem *curr_tbptr; cbd_t __iomem *recheck_bd; cbd_t __iomem *prev_bd; cbd_t __iomem *last_tx_bd; last_tx_bd = fep->tx_bd_base + (fpi->tx_ring * sizeof(cbd_t)); /* get the current bd held in TBPTR and scan back from this point */ recheck_bd = curr_tbptr = (cbd_t __iomem *) ((R32(ep, fen_genfcc.fcc_tbptr) - fep->ring_mem_addr) + fep->ring_base); prev_bd = (recheck_bd == fep->tx_bd_base) ? last_tx_bd : recheck_bd - 1; /* Move through the bds in reverse, look for the earliest buffer * that is not ready. Adjust TBPTR to the following buffer */ while ((CBDR_SC(prev_bd) & BD_ENET_TX_READY) != 0) { /* Go back one buffer */ recheck_bd = prev_bd; /* update the previous buffer */ prev_bd = (prev_bd == fep->tx_bd_base) ? last_tx_bd : prev_bd - 1; /* We should never see all bds marked as ready, check anyway */ if (recheck_bd == curr_tbptr) break; } /* Now update the TBPTR and dirty flag to the current buffer */ W32(ep, fen_genfcc.fcc_tbptr, (uint) (((void *)recheck_bd - fep->ring_base) + fep->ring_mem_addr)); fep->dirty_tx = recheck_bd; C32(fccp, fcc_gfmr, FCC_GFMR_ENT); udelay(10); S32(fccp, fcc_gfmr, FCC_GFMR_ENT); fcc_cr_cmd(fep, CPM_CR_RESTART_TX); }
static void stop(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; int i; for (i = 0; (R16(sccp, scc_sccm) == 0) && i < SCC_RESET_DELAY; i++) udelay(1); if (i == SCC_RESET_DELAY) printk(KERN_WARNING DRV_MODULE_NAME ": %s SCC timeout on graceful transmit stop\n", dev->name); W16(sccp, scc_sccm, 0); C32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); fs_cleanup_bds(dev); }
static void set_multicast_finish(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; fcc_enet_t __iomem *ep = fep->fcc.ep; /* clear promiscuous always */ C32(fccp, fcc_fpsmr, FCC_PSMR_PRO); /* if all multi or too many multicasts; just enable all */ if ((dev->flags & IFF_ALLMULTI) != 0 || dev->mc_count > FCC_MAX_MULTICAST_ADDRS) { W32(ep, fen_gaddrh, 0xffffffff); W32(ep, fen_gaddrl, 0xffffffff); } /* read back */ fep->fcc.gaddrh = R32(ep, fen_gaddrh); fep->fcc.gaddrl = R32(ep, fen_gaddrl); }
/* see shabal_small.h */ void avx2_mshabal_init(mshabal_context *sc, unsigned out_size) { unsigned u; for (u = 0; u < 176; u++) sc->state[u] = 0; memset(sc->buf0, 0, sizeof sc->buf0); memset(sc->buf1, 0, sizeof sc->buf1); memset(sc->buf2, 0, sizeof sc->buf2); memset(sc->buf3, 0, sizeof sc->buf3); for (u = 0; u < 16; u++) { sc->buf0[4 * u + 0] = (out_size + u); sc->buf0[4 * u + 1] = (out_size + u) >> 8; sc->buf1[4 * u + 0] = (out_size + u); sc->buf1[4 * u + 1] = (out_size + u) >> 8; sc->buf2[4 * u + 0] = (out_size + u); sc->buf2[4 * u + 1] = (out_size + u) >> 8; sc->buf3[4 * u + 0] = (out_size + u); sc->buf3[4 * u + 1] = (out_size + u) >> 8; } sc->Whigh = sc->Wlow = C32(0xFFFFFFFF); avx2_mshabal_compress(sc, sc->buf0, sc->buf1, sc->buf2, sc->buf3, 1); for (u = 0; u < 16; u++) { sc->buf0[4 * u + 0] = (out_size + u + 16); sc->buf0[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf1[4 * u + 0] = (out_size + u + 16); sc->buf1[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf2[4 * u + 0] = (out_size + u + 16); sc->buf2[4 * u + 1] = (out_size + u + 16) >> 8; sc->buf3[4 * u + 0] = (out_size + u + 16); sc->buf3[4 * u + 1] = (out_size + u + 16) >> 8; } avx2_mshabal_compress(sc, sc->buf0, sc->buf1, sc->buf2, sc->buf3, 1); sc->ptr = 0; sc->out_size = out_size; }
static void restart(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); const struct fs_platform_info *fpi = fep->fpi; fcc_t __iomem *fccp = fep->fcc.fccp; fcc_c_t __iomem *fcccp = fep->fcc.fcccp; fcc_enet_t __iomem *ep = fep->fcc.ep; dma_addr_t rx_bd_base_phys, tx_bd_base_phys; u16 paddrh, paddrm, paddrl; const unsigned char *mac; int i; C32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT); /* clear everything (slow & steady does it) */ for (i = 0; i < sizeof(*ep); i++) out_8((u8 __iomem *)ep + i, 0); /* get physical address */ rx_bd_base_phys = fep->ring_mem_addr; tx_bd_base_phys = rx_bd_base_phys + sizeof(cbd_t) * fpi->rx_ring; /* point to bds */ W32(ep, fen_genfcc.fcc_rbase, rx_bd_base_phys); W32(ep, fen_genfcc.fcc_tbase, tx_bd_base_phys); /* Set maximum bytes per receive buffer. * It must be a multiple of 32. */ W16(ep, fen_genfcc.fcc_mrblr, PKT_MAXBLR_SIZE); W32(ep, fen_genfcc.fcc_rstate, (CPMFCR_GBL | CPMFCR_EB) << 24); W32(ep, fen_genfcc.fcc_tstate, (CPMFCR_GBL | CPMFCR_EB) << 24); /* Allocate space in the reserved FCC area of DPRAM for the * internal buffers. No one uses this space (yet), so we * can do this. Later, we will add resource management for * this area. */ W16(ep, fen_genfcc.fcc_riptr, fpi->dpram_offset); W16(ep, fen_genfcc.fcc_tiptr, fpi->dpram_offset + 32); W16(ep, fen_padptr, fpi->dpram_offset + 64); /* fill with special symbol... */ memset_io(fep->fcc.mem + fpi->dpram_offset + 64, 0x88, 32); W32(ep, fen_genfcc.fcc_rbptr, 0); W32(ep, fen_genfcc.fcc_tbptr, 0); W32(ep, fen_genfcc.fcc_rcrc, 0); W32(ep, fen_genfcc.fcc_tcrc, 0); W16(ep, fen_genfcc.fcc_res1, 0); W32(ep, fen_genfcc.fcc_res2, 0); /* no CAM */ W32(ep, fen_camptr, 0); /* Set CRC preset and mask */ W32(ep, fen_cmask, 0xdebb20e3); W32(ep, fen_cpres, 0xffffffff); W32(ep, fen_crcec, 0); /* CRC Error counter */ W32(ep, fen_alec, 0); /* alignment error counter */ W32(ep, fen_disfc, 0); /* discard frame counter */ W16(ep, fen_retlim, 15); /* Retry limit threshold */ W16(ep, fen_pper, 0); /* Normal persistence */ /* set group address */ W32(ep, fen_gaddrh, fep->fcc.gaddrh); W32(ep, fen_gaddrl, fep->fcc.gaddrh); /* Clear hash filter tables */ W32(ep, fen_iaddrh, 0); W32(ep, fen_iaddrl, 0); /* Clear the Out-of-sequence TxBD */ W16(ep, fen_tfcstat, 0); W16(ep, fen_tfclen, 0); W32(ep, fen_tfcptr, 0); W16(ep, fen_mflr, PKT_MAXBUF_SIZE); /* maximum frame length register */ W16(ep, fen_minflr, PKT_MINBUF_SIZE); /* minimum frame length register */ /* set address */ mac = dev->dev_addr; paddrh = ((u16)mac[5] << 8) | mac[4]; paddrm = ((u16)mac[3] << 8) | mac[2]; paddrl = ((u16)mac[1] << 8) | mac[0]; W16(ep, fen_paddrh, paddrh); W16(ep, fen_paddrm, paddrm); W16(ep, fen_paddrl, paddrl); W16(ep, fen_taddrh, 0); W16(ep, fen_taddrm, 0); W16(ep, fen_taddrl, 0); W16(ep, fen_maxd1, 1520); /* maximum DMA1 length */ W16(ep, fen_maxd2, 1520); /* maximum DMA2 length */ /* Clear stat counters, in case we ever enable RMON */ W32(ep, fen_octc, 0); W32(ep, fen_colc, 0); W32(ep, fen_broc, 0); W32(ep, fen_mulc, 0); W32(ep, fen_uspc, 0); W32(ep, fen_frgc, 0); W32(ep, fen_ospc, 0); W32(ep, fen_jbrc, 0); W32(ep, fen_p64c, 0); W32(ep, fen_p65c, 0); W32(ep, fen_p128c, 0); W32(ep, fen_p256c, 0); W32(ep, fen_p512c, 0); W32(ep, fen_p1024c, 0); W16(ep, fen_rfthr, 0); /* Suggested by manual */ W16(ep, fen_rfcnt, 0); W16(ep, fen_cftype, 0); fs_init_bds(dev); /* adjust to speed (for RMII mode) */ if (fpi->use_rmii) { if (fep->phydev->speed == 100) C8(fcccp, fcc_gfemr, 0x20); else S8(fcccp, fcc_gfemr, 0x20); } fcc_cr_cmd(fep, CPM_CR_INIT_TRX); /* clear events */ W16(fccp, fcc_fcce, 0xffff); /* Enable interrupts we wish to service */ W16(fccp, fcc_fccm, FCC_ENET_TXE | FCC_ENET_RXF | FCC_ENET_TXB); /* Set GFMR to enable Ethernet operating mode */ W32(fccp, fcc_gfmr, FCC_GFMR_TCI | FCC_GFMR_MODE_ENET); /* set sync/delimiters */ W16(fccp, fcc_fdsr, 0xd555); W32(fccp, fcc_fpsmr, FCC_PSMR_ENCRC); if (fpi->use_rmii) S32(fccp, fcc_fpsmr, FCC_PSMR_RMII); /* adjust to duplex mode */ if (fep->phydev->duplex) S32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB); else C32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB); /* Restore multicast and promiscuous settings */ set_multicast_list(dev); S32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT); }
* As of round 2 of the SHA-3 competition, the published reference * implementation and test vectors are wrong, because they use * big-endian AES tables while the internal decoding uses little-endian. * The code below follows the specification. To turn it into a code * which follows the reference implementation (the one called "BugFix" * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out * the code below (from the '#define AES_BIG_ENDIAN...' to the definition * of the AES_ROUND_NOKEY macro) and replace it with the version which * is commented out afterwards. */ #define AES_BIG_ENDIAN 0 #include "aes_helper.h" static const sph_u32 IV224[] = { C32(0x6774F31C), C32(0x990AE210), C32(0xC87D4274), C32(0xC9546371), C32(0x62B2AEA8), C32(0x4B5801D8), C32(0x1B702860), C32(0x842F3017) }; static const sph_u32 IV256[] = { C32(0x49BB3E47), C32(0x2674860D), C32(0xA8B392AC), C32(0x021AC4E6), C32(0x409283CF), C32(0x620E5D86), C32(0x6D929DCB), C32(0x96CC2A8B) }; static const sph_u32 IV384[] = { C32(0x83DF1545), C32(0xF9AAEC13), C32(0xF4803CB0), C32(0x11FE1F47), C32(0xDA6CD269), C32(0x4F53FCD7), C32(0x950529A2), C32(0x97908147), C32(0xB0A4D7AF), C32(0x2B9132BF), C32(0x226E607D), C32(0x3C0F8D7C), C32(0x487B3F0F), C32(0x04363E22), C32(0x0155C99C), C32(0xEC2E20D3) };
static void avx2_mshabal_compress(mshabal_context *sc, const unsigned char *buf0, const unsigned char *buf1, const unsigned char *buf2, const unsigned char *buf3, size_t num) { union { u32 words[64]; __m128i data[16]; } u; size_t j; __m128i A[12], B[16], C[16]; __m128i one; for (j = 0; j < 12; j++) A[j] = _mm_loadu_si128((__m128i *)sc->state + j); for (j = 0; j < 16; j++) { B[j] = _mm_loadu_si128((__m128i *)sc->state + j + 12); C[j] = _mm_loadu_si128((__m128i *)sc->state + j + 28); } one = _mm_set1_epi32(C32(0xFFFFFFFF)); #define M(i) _mm_load_si128(u.data + (i)) while (num-- > 0) { for (j = 0; j < 64; j += 4) { u.words[j + 0] = *(u32 *)(buf0 + j); u.words[j + 1] = *(u32 *)(buf1 + j); u.words[j + 2] = *(u32 *)(buf2 + j); u.words[j + 3] = *(u32 *)(buf3 + j); } for (j = 0; j < 16; j++) B[j] = _mm_add_epi32(B[j], M(j)); A[0] = _mm_xor_si128(A[0], _mm_set1_epi32(sc->Wlow)); A[1] = _mm_xor_si128(A[1], _mm_set1_epi32(sc->Whigh)); for (j = 0; j < 16; j++) B[j] = _mm_or_si128(_mm_slli_epi32(B[j], 17), _mm_srli_epi32(B[j], 15)); #define PP(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ __m128i tt; \ tt = _mm_or_si128(_mm_slli_epi32(xa1, 15), \ _mm_srli_epi32(xa1, 17)); \ tt = _mm_add_epi32(_mm_slli_epi32(tt, 2), tt); \ tt = _mm_xor_si128(_mm_xor_si128(xa0, tt), xc); \ tt = _mm_add_epi32(_mm_slli_epi32(tt, 1), tt); \ tt = _mm_xor_si128( \ _mm_xor_si128(tt, xb1), \ _mm_xor_si128(_mm_andnot_si128(xb3, xb2), xm)); \ xa0 = tt; \ tt = xb0; \ tt = _mm_or_si128(_mm_slli_epi32(tt, 1), \ _mm_srli_epi32(tt, 31)); \ xb0 = _mm_xor_si128(tt, _mm_xor_si128(xa0, one)); \ } while (0) PP(A[0x0], A[0xB], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0)); PP(A[0x1], A[0x0], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1)); PP(A[0x2], A[0x1], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2)); PP(A[0x3], A[0x2], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3)); PP(A[0x4], A[0x3], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4)); PP(A[0x5], A[0x4], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5)); PP(A[0x6], A[0x5], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6)); PP(A[0x7], A[0x6], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7)); PP(A[0x8], A[0x7], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8)); PP(A[0x9], A[0x8], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9)); PP(A[0xA], A[0x9], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA)); PP(A[0xB], A[0xA], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB)); PP(A[0x0], A[0xB], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC)); PP(A[0x1], A[0x0], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD)); PP(A[0x2], A[0x1], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE)); PP(A[0x3], A[0x2], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF)); PP(A[0x4], A[0x3], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0)); PP(A[0x5], A[0x4], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1)); PP(A[0x6], A[0x5], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2)); PP(A[0x7], A[0x6], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3)); PP(A[0x8], A[0x7], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4)); PP(A[0x9], A[0x8], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5)); PP(A[0xA], A[0x9], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6)); PP(A[0xB], A[0xA], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7)); PP(A[0x0], A[0xB], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8)); PP(A[0x1], A[0x0], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9)); PP(A[0x2], A[0x1], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA)); PP(A[0x3], A[0x2], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB)); PP(A[0x4], A[0x3], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC)); PP(A[0x5], A[0x4], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD)); PP(A[0x6], A[0x5], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE)); PP(A[0x7], A[0x6], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF)); PP(A[0x8], A[0x7], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0)); PP(A[0x9], A[0x8], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1)); PP(A[0xA], A[0x9], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2)); PP(A[0xB], A[0xA], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3)); PP(A[0x0], A[0xB], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4)); PP(A[0x1], A[0x0], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5)); PP(A[0x2], A[0x1], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6)); PP(A[0x3], A[0x2], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7)); PP(A[0x4], A[0x3], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8)); PP(A[0x5], A[0x4], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9)); PP(A[0x6], A[0x5], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA)); PP(A[0x7], A[0x6], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB)); PP(A[0x8], A[0x7], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC)); PP(A[0x9], A[0x8], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD)); PP(A[0xA], A[0x9], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE)); PP(A[0xB], A[0xA], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF)); A[0xB] = _mm_add_epi32(A[0xB], C[0x6]); A[0xA] = _mm_add_epi32(A[0xA], C[0x5]); A[0x9] = _mm_add_epi32(A[0x9], C[0x4]); A[0x8] = _mm_add_epi32(A[0x8], C[0x3]); A[0x7] = _mm_add_epi32(A[0x7], C[0x2]); A[0x6] = _mm_add_epi32(A[0x6], C[0x1]); A[0x5] = _mm_add_epi32(A[0x5], C[0x0]); A[0x4] = _mm_add_epi32(A[0x4], C[0xF]); A[0x3] = _mm_add_epi32(A[0x3], C[0xE]); A[0x2] = _mm_add_epi32(A[0x2], C[0xD]); A[0x1] = _mm_add_epi32(A[0x1], C[0xC]); A[0x0] = _mm_add_epi32(A[0x0], C[0xB]); A[0xB] = _mm_add_epi32(A[0xB], C[0xA]); A[0xA] = _mm_add_epi32(A[0xA], C[0x9]); A[0x9] = _mm_add_epi32(A[0x9], C[0x8]); A[0x8] = _mm_add_epi32(A[0x8], C[0x7]); A[0x7] = _mm_add_epi32(A[0x7], C[0x6]); A[0x6] = _mm_add_epi32(A[0x6], C[0x5]); A[0x5] = _mm_add_epi32(A[0x5], C[0x4]); A[0x4] = _mm_add_epi32(A[0x4], C[0x3]); A[0x3] = _mm_add_epi32(A[0x3], C[0x2]); A[0x2] = _mm_add_epi32(A[0x2], C[0x1]); A[0x1] = _mm_add_epi32(A[0x1], C[0x0]); A[0x0] = _mm_add_epi32(A[0x0], C[0xF]); A[0xB] = _mm_add_epi32(A[0xB], C[0xE]); A[0xA] = _mm_add_epi32(A[0xA], C[0xD]); A[0x9] = _mm_add_epi32(A[0x9], C[0xC]); A[0x8] = _mm_add_epi32(A[0x8], C[0xB]); A[0x7] = _mm_add_epi32(A[0x7], C[0xA]); A[0x6] = _mm_add_epi32(A[0x6], C[0x9]); A[0x5] = _mm_add_epi32(A[0x5], C[0x8]); A[0x4] = _mm_add_epi32(A[0x4], C[0x7]); A[0x3] = _mm_add_epi32(A[0x3], C[0x6]); A[0x2] = _mm_add_epi32(A[0x2], C[0x5]); A[0x1] = _mm_add_epi32(A[0x1], C[0x4]); A[0x0] = _mm_add_epi32(A[0x0], C[0x3]); #define SWAP_AND_SUB(xb, xc, xm) do { \ __m128i tmp; \ tmp = xb; \ xb = _mm_sub_epi32(xc, xm); \ xc = tmp; \ } while (0) SWAP_AND_SUB(B[0x0], C[0x0], M(0x0)); SWAP_AND_SUB(B[0x1], C[0x1], M(0x1)); SWAP_AND_SUB(B[0x2], C[0x2], M(0x2)); SWAP_AND_SUB(B[0x3], C[0x3], M(0x3)); SWAP_AND_SUB(B[0x4], C[0x4], M(0x4)); SWAP_AND_SUB(B[0x5], C[0x5], M(0x5)); SWAP_AND_SUB(B[0x6], C[0x6], M(0x6)); SWAP_AND_SUB(B[0x7], C[0x7], M(0x7)); SWAP_AND_SUB(B[0x8], C[0x8], M(0x8)); SWAP_AND_SUB(B[0x9], C[0x9], M(0x9)); SWAP_AND_SUB(B[0xA], C[0xA], M(0xA)); SWAP_AND_SUB(B[0xB], C[0xB], M(0xB)); SWAP_AND_SUB(B[0xC], C[0xC], M(0xC)); SWAP_AND_SUB(B[0xD], C[0xD], M(0xD)); SWAP_AND_SUB(B[0xE], C[0xE], M(0xE)); SWAP_AND_SUB(B[0xF], C[0xF], M(0xF)); buf0 += 64; buf1 += 64; buf2 += 64; buf3 += 64; if (++sc->Wlow == 0) sc->Whigh++; } for (j = 0; j < 12; j++) _mm_storeu_si128((__m128i *)sc->state + j, A[j]); for (j = 0; j < 16; j++) { _mm_storeu_si128((__m128i *)sc->state + j + 12, B[j]); _mm_storeu_si128((__m128i *)sc->state + j + 28, C[j]); } #undef M }
A06 = T32(A06 + C9); \ A05 = T32(A05 + C8); \ A04 = T32(A04 + C7); \ A03 = T32(A03 + C6); \ A02 = T32(A02 + C5); \ A01 = T32(A01 + C4); \ A00 = T32(A00 + C3); \ } while (0) #define INCR_W do { \ if ((Wlow = T32(Wlow + 1)) == 0) \ Whigh = T32(Whigh + 1); \ } while (0) static const sph_u32 A_init_192[] = { C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) }; static const sph_u32 B_init_192[] = { C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) }; static const sph_u32 C_init_192[] = { C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
/* * This function is called to start or restart the FEC during a link * change. This only happens when switching between half and full * duplex. */ static void restart(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; scc_enet_t __iomem *ep = fep->scc.ep; const struct fs_platform_info *fpi = fep->fpi; u16 paddrh, paddrm, paddrl; const unsigned char *mac; int i; C32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); /* clear everything (slow & steady does it) */ for (i = 0; i < sizeof(*ep); i++) __fs_out8((u8 __iomem *)ep + i, 0); /* point to bds */ W16(ep, sen_genscc.scc_rbase, fep->ring_mem_addr); W16(ep, sen_genscc.scc_tbase, fep->ring_mem_addr + sizeof(cbd_t) * fpi->rx_ring); /* Initialize function code registers for big-endian. */ #ifndef CONFIG_NOT_COHERENT_CACHE W8(ep, sen_genscc.scc_rfcr, SCC_EB | SCC_GBL); W8(ep, sen_genscc.scc_tfcr, SCC_EB | SCC_GBL); #else W8(ep, sen_genscc.scc_rfcr, SCC_EB); W8(ep, sen_genscc.scc_tfcr, SCC_EB); #endif /* Set maximum bytes per receive buffer. * This appears to be an Ethernet frame size, not the buffer * fragment size. It must be a multiple of four. */ W16(ep, sen_genscc.scc_mrblr, 0x5f0); /* Set CRC preset and mask. */ W32(ep, sen_cpres, 0xffffffff); W32(ep, sen_cmask, 0xdebb20e3); W32(ep, sen_crcec, 0); /* CRC Error counter */ W32(ep, sen_alec, 0); /* alignment error counter */ W32(ep, sen_disfc, 0); /* discard frame counter */ W16(ep, sen_pads, 0x8888); /* Tx short frame pad character */ W16(ep, sen_retlim, 15); /* Retry limit threshold */ W16(ep, sen_maxflr, 0x5ee); /* maximum frame length register */ W16(ep, sen_minflr, PKT_MINBUF_SIZE); /* minimum frame length register */ W16(ep, sen_maxd1, 0x000005f0); /* maximum DMA1 length */ W16(ep, sen_maxd2, 0x000005f0); /* maximum DMA2 length */ /* Clear hash tables. */ W16(ep, sen_gaddr1, 0); W16(ep, sen_gaddr2, 0); W16(ep, sen_gaddr3, 0); W16(ep, sen_gaddr4, 0); W16(ep, sen_iaddr1, 0); W16(ep, sen_iaddr2, 0); W16(ep, sen_iaddr3, 0); W16(ep, sen_iaddr4, 0); /* set address */ mac = dev->dev_addr; paddrh = ((u16) mac[5] << 8) | mac[4]; paddrm = ((u16) mac[3] << 8) | mac[2]; paddrl = ((u16) mac[1] << 8) | mac[0]; W16(ep, sen_paddrh, paddrh); W16(ep, sen_paddrm, paddrm); W16(ep, sen_paddrl, paddrl); W16(ep, sen_pper, 0); W16(ep, sen_taddrl, 0); W16(ep, sen_taddrm, 0); W16(ep, sen_taddrh, 0); fs_init_bds(dev); scc_cr_cmd(fep, CPM_CR_INIT_TRX); W16(sccp, scc_scce, 0xffff); /* Enable interrupts we wish to service. */ W16(sccp, scc_sccm, SCCE_ENET_TXE | SCCE_ENET_RXF | SCCE_ENET_TXB); /* Set GSMR_H to enable all normal operating modes. * Set GSMR_L to enable Ethernet to MC68160. */ W32(sccp, scc_gsmrh, 0); W32(sccp, scc_gsmrl, SCC_GSMRL_TCI | SCC_GSMRL_TPL_48 | SCC_GSMRL_TPP_10 | SCC_GSMRL_MODE_ENET); /* Set sync/delimiters. */ W16(sccp, scc_dsr, 0xd555); /* Set processing mode. Use Ethernet CRC, catch broadcast, and * start frame search 22 bit times after RENA. */ W16(sccp, scc_psmr, SCC_PSMR_ENCRC | SCC_PSMR_NIB22); /* Set full duplex mode if needed */ if (fep->phydev->duplex) S16(sccp, scc_psmr, SCC_PSMR_LPB | SCC_PSMR_FDE); S32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); }
static void restart(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); const struct fs_platform_info *fpi = fep->fpi; fcc_t __iomem *fccp = fep->fcc.fccp; fcc_c_t __iomem *fcccp = fep->fcc.fcccp; fcc_enet_t __iomem *ep = fep->fcc.ep; dma_addr_t rx_bd_base_phys, tx_bd_base_phys; u16 paddrh, paddrm, paddrl; const unsigned char *mac; int i; C32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT); /* clear everything (slow & steady does it) */ for (i = 0; i < sizeof(*ep); i++) out_8((u8 __iomem *)ep + i, 0); /* get physical address */ rx_bd_base_phys = fep->ring_mem_addr; tx_bd_base_phys = rx_bd_base_phys + sizeof(cbd_t) * fpi->rx_ring; /* point to bds */ W32(ep, fen_genfcc.fcc_rbase, rx_bd_base_phys); W32(ep, fen_genfcc.fcc_tbase, tx_bd_base_phys); /* Set maximum bytes per receive buffer. * It must be a multiple of 32. */ W16(ep, fen_genfcc.fcc_mrblr, PKT_MAXBLR_SIZE); W32(ep, fen_genfcc.fcc_rstate, (CPMFCR_GBL | CPMFCR_EB) << 24); W32(ep, fen_genfcc.fcc_tstate, (CPMFCR_GBL | CPMFCR_EB) << 24); /* Allocate space in the reserved FCC area of DPRAM for the * internal buffers. No one uses this space (yet), so we * can do this. Later, we will add resource management for * this area. */ W16(ep, fen_genfcc.fcc_riptr, fpi->dpram_offset); W16(ep, fen_genfcc.fcc_tiptr, fpi->dpram_offset + 32); W16(ep, fen_padptr, fpi->dpram_offset + 64); /* fill with special symbol... */ memset_io(fep->fcc.mem + fpi->dpram_offset + 64, 0x88, 32); W32(ep, fen_genfcc.fcc_rbptr, 0); W32(ep, fen_genfcc.fcc_tbptr, 0); W32(ep, fen_genfcc.fcc_rcrc, 0); W32(ep, fen_genfcc.fcc_tcrc, 0); W16(ep, fen_genfcc.fcc_res1, 0); W32(ep, fen_genfcc.fcc_res2, 0); /* no CAM */ W32(ep, fen_camptr, 0); /* Set CRC preset and mask */ W32(ep, fen_cmask, 0xdebb20e3); W32(ep, fen_cpres, 0xffffffff); W32(ep, fen_crcec, 0); /* CRC Error counter */ W32(ep, fen_alec, 0); /* alignment error counte