static char *url_buff_gets(URL url, char *buff, int maxsiz) { URL_buff *urlp = (URL_buff *)url; int c, r, w; long len, maxlen; int newline = url_newline_code; unsigned char *bp; if(urlp->eof) return NULL; maxlen = maxsiz - 1; if(maxlen == 0) *buff = '\0'; if(maxlen <= 0) return buff; len = 0; r = urlp->rp; w = urlp->wp; bp = urlp->buffer; do { if(r == w) { urlp->wp = w; prefetch(urlp); w = urlp->wp; if(r == w) { urlp->eof = 1; if(len == 0) return NULL; buff[len] = '\0'; urlp->pos += len; urlp->rp = r; return buff; } } c = bp[r]; buff[len++] = c; r = ((r + 1) & BASEMASK); } while(c != newline && len < maxlen); buff[len] = '\0'; urlp->pos += len; urlp->rp = r; return buff; }
/* * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure * whose grace period has elapsed. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { const char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, !!ACCESS_ONCE(rcp->rcucblist), need_resched(), is_idle_task(current), false)); return; } /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; if (rcp->curtail == rcp->donetail) { rcp->curtail = &rcp->rcucblist; } rcp->donetail = &rcp->rcucblist; local_irq_restore(flags); /* Invoke the callbacks on the local list. */ RCU_TRACE(rn = rcp->name); while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); __rcu_reclaim(rn, list); local_bh_enable(); list = next; RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), is_idle_task(current), false)); }
static long url_buff_read(URL url, void *buff, long n) { URL_buff *urlp = (URL_buff *)url; char *s = (char *)buff; int r, i, j; if(urlp->eof) return 0; r = urlp->rp; if(r == urlp->wp) { prefetch(urlp); if(r == urlp->wp) { urlp->eof = 1; return EOF; } } /* first fragment */ i = urlp->wp - r; if(i < 0) i = BASESIZE - r; if(i > n) i = n; memcpy(s, urlp->buffer + r, i); r = ((r + i) & BASEMASK); if(i == n || r == urlp->wp || r != 0) { urlp->rp = r; urlp->pos += i; return i; } /* second fragment */ j = urlp->wp; n -= i; s += i; if(j > n) j = n; memcpy(s, urlp->buffer, j); urlp->rp = j; urlp->pos += i + j; return i + j; }
static int write_packet(struct imx_ep_struct *imx_ep, struct imx_request *req) { u8 *buf; int length, count, temp; if (unlikely(__raw_readl(imx_ep->imx_usb->base + USB_EP_STAT(EP_NO(imx_ep))) & EPSTAT_ZLPS)) { D_TRX(imx_ep->imx_usb->dev, "<%s> zlp still queued in EP %s\n", __func__, imx_ep->ep.name); return -1; } buf = req->req.buf + req->req.actual; prefetch(buf); length = min(req->req.length - req->req.actual, (u32)imx_ep->fifosize); if (imx_fifo_bcount(imx_ep) + length > imx_ep->fifosize) { D_TRX(imx_ep->imx_usb->dev, "<%s> packet overfill %s fifo\n", __func__, imx_ep->ep.name); return -1; } req->req.actual += length; count = length; if (!count && req->req.zero) { /* zlp */ temp = __raw_readl(imx_ep->imx_usb->base + USB_EP_STAT(EP_NO(imx_ep))); __raw_writel(temp | EPSTAT_ZLPS, imx_ep->imx_usb->base + USB_EP_STAT(EP_NO(imx_ep))); D_TRX(imx_ep->imx_usb->dev, "<%s> zero packet\n", __func__); return 0; } while (count--) { if (count == 0) { /* last byte */ temp = __raw_readl(imx_ep->imx_usb->base + USB_EP_FCTRL(EP_NO(imx_ep))); __raw_writel(temp | FCTRL_WFR, imx_ep->imx_usb->base + USB_EP_FCTRL(EP_NO(imx_ep))); } __raw_writeb(*buf++, imx_ep->imx_usb->base + USB_EP_FDAT0(EP_NO(imx_ep))); } return length; }
void AudioPrefetch::seek(unsigned seekTo) { // printf("seek %d\n", seekTo); #ifdef AUDIOPREFETCH_DEBUG printf("AudioPrefetch::seek to:%u seekCount:%d\n", seekTo, seekCount); #endif // Speedup: More than one seek message pending? // Eat up seek messages until we get to the very LATEST one, // because all the rest which came before it are irrelevant now, // and processing them all was taking extreme time, especially with // resampling enabled. // In particular, when the user 'slides' the play cursor back and forth // there are MANY seek messages in the pipe, and with resampling enabled // it was taking minutes to finish seeking. If the user hit play during that time, // things were messed up (FIFO underruns, choppy intermittent sound etc). // Added by Tim. p3.3.20 if (seekCount > 1) { --seekCount; return; } writePos = seekTo; bool isFirstPrefetch = true; for (unsigned int i = 0; i < (fifoLength) - 1; ++i)//prevent compiler warning: comparison of signed/unsigned { // Indicate do a seek command before read, but only on the first pass. // Changed by Tim. p3.3.17 //prefetch(); prefetch(isFirstPrefetch); isFirstPrefetch = false; // To help speed things up even more, check the count again. Return if more seek messages are pending. // Added by Tim. p3.3.20 if (seekCount > 1) { --seekCount; return; } } seekPos = seekTo; //seekDone = true; --seekCount; }
static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) { struct bnxt_qplib_rcfw *rcfw = dev_instance; struct bnxt_qplib_hwq *creq = &rcfw->creq; struct creq_base **creq_ptr; u32 sw_cons; /* Prefetch the CREQ element */ sw_cons = HWQ_CMP(creq->cons, creq); creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr; prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]); tasklet_schedule(&rcfw->worker); return IRQ_HANDLED; }
void GenomeSequence::setup(const char *referenceFilename) { setReferenceName(referenceFilename); if (_progressStream) *_progressStream << "open and prefetch reference genome " << referenceFilename << ": " << std::flush; if (open(false)) { std::cerr << "Failed to open reference genome " << referenceFilename << std::endl; std::cerr << errorStr << std::endl; exit(1); } prefetch(); if (_progressStream) *_progressStream << "done." << std::endl << std::flush; }
/* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, bool checksummed) { struct efx_nic *efx = channel->efx; struct sk_buff *skb; /* If we're in loopback test, then pass the packet directly to the * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len); efx_free_rx_buffer(efx, rx_buf); return; } if (rx_buf->skb) { prefetch(skb_shinfo(rx_buf->skb)); skb_put(rx_buf->skb, rx_buf->len); /* Move past the ethernet header. rx_buf->data still points * at the ethernet header */ rx_buf->skb->protocol = eth_type_trans(rx_buf->skb, efx->net_dev); skb_record_rx_queue(rx_buf->skb, channel->channel); } if (likely(checksummed || rx_buf->page)) { efx_rx_packet_lro(channel, rx_buf, checksummed); return; } /* We now own the SKB */ skb = rx_buf->skb; rx_buf->skb = NULL; EFX_BUG_ON_PARANOID(!skb); /* Set the SKB flags */ skb->ip_summed = CHECKSUM_NONE; /* Pass the packet up */ netif_receive_skb(skb); /* Update allocation strategy method */ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; }
static int setdma_tx(struct s3c_ep *ep, struct s3c_request *req) { u32 *buf, ctrl = 0; u32 length, pktcnt; u32 ep_num = ep_index(ep); struct device *dev = &the_controller->dev->dev; buf = req->req.buf + req->req.actual; prefetch(buf); length = req->req.length - req->req.actual; if (ep_num == EP0_CON) length = min(length, (u32)ep_maxpacket(ep)); req->req.actual += length; req->req.dma = dma_map_single(dev, buf, length, DMA_TO_DEVICE); req->mapped = 1; if (length == 0) pktcnt = 1; else pktcnt = (length - 1)/(ep->ep.maxpacket) + 1; #ifdef DED_TX_FIFO /* Write the FIFO number to be used for this endpoint */ ctrl = readl(S3C_UDC_OTG_DIEPCTL(ep_num)); ctrl &= ~DEPCTL_TXFNUM_MASK;; ctrl |= (ep_num << DEPCTL_TXFNUM_BIT); writel(ctrl , S3C_UDC_OTG_DIEPCTL(ep_num)); #endif writel(virt_to_phys(buf), S3C_UDC_OTG_DIEPDMA(ep_num)); writel((pktcnt<<19)|(length<<0), S3C_UDC_OTG_DIEPTSIZ(ep_num)); ctrl = readl(S3C_UDC_OTG_DIEPCTL(ep_num)); writel(DEPCTL_EPENA|DEPCTL_CNAK|ctrl, S3C_UDC_OTG_DIEPCTL(ep_num)); DEBUG_IN_EP("%s:EP%d TX DMA start : DIEPDMA0 = 0x%x, DIEPTSIZ0 = 0x%x, DIEPCTL0 = 0x%x\n" "\tbuf = 0x%p, pktcnt = %d, xfersize = %d\n", __func__, ep_num, readl(S3C_UDC_OTG_DIEPDMA(ep_num)), readl(S3C_UDC_OTG_DIEPTSIZ(ep_num)), readl(S3C_UDC_OTG_DIEPCTL(ep_num)), buf, pktcnt, length); return length; }
/* * sfe_cm_recv() * Handle packet receives. * * Returns 1 if the packet is forwarded or 0 if it isn't. */ int sfe_cm_recv(struct sk_buff *skb) { struct net_device *dev; #if (SFE_HOOK_ABOVE_BRIDGE) struct in_device *in_dev; #endif /* * We know that for the vast majority of packets we need the transport * layer header so we may as well start to fetch it now! */ prefetch(skb->data + 32); barrier(); dev = skb->dev; #if (SFE_HOOK_ABOVE_BRIDGE) /* * Does our input device support IP processing? */ in_dev = (struct in_device *)dev->ip_ptr; if (unlikely(!in_dev)) { DEBUG_TRACE("no IP processing for device: %s\n", dev->name); return 0; } /* * Does it have an IP address? If it doesn't then we can't do anything * interesting here! */ if (unlikely(!in_dev->ifa_list)) { DEBUG_TRACE("no IP address for device: %s\n", dev->name); return 0; } #endif /* * We're only interested in IP packets. */ if (likely(htons(ETH_P_IP) == skb->protocol)) { return sfe_ipv4_recv(dev, skb); } DEBUG_TRACE("not IP packet\n"); return 0; }
static void isp1362_write_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep, struct isp1362_ep_queue *epq) { struct ptd *ptd = &ep->ptd; int len = PTD_GET_DIR(ptd) == PTD_DIR_IN ? 0 : ep->length; _BUG_ON(ep->ptd_offset < 0); prefetch(ptd); isp1362_write_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE); if (len) isp1362_write_buffer(isp1362_hcd, ep->data, ep->ptd_offset + PTD_HEADER_SIZE, len); dump_ptd(ptd); dump_ptd_out_data(ptd, ep->data); }
int wait_packet_function_ptr(void *data, int mode) { struct e1000_adapter *adapter = (struct e1000_adapter*)data; if(unlikely(enable_debug)) printk("[wait_packet_function_ptr] called [mode=%d]\n", mode); if(mode == 1) { struct e1000_ring *rx_ring = adapter->rx_ring; union e1000_rx_desc_extended *rx_desc; u16 i = E1000_READ_REG(&adapter->hw, E1000_RDT(0)); /* Very important: update the value from the register set from userland. * Here i is the last I've read (zero-copy implementation) */ if(++i == rx_ring->count) i = 0; /* Here i is the next I have to read */ rx_ring->next_to_clean = i; rx_desc = E1000_RX_DESC_EXT(*rx_ring, rx_ring->next_to_clean); if(unlikely(enable_debug)) printk("[wait_packet_function_ptr] Check if a packet is arrived\n"); prefetch(rx_desc); if(!(le32_to_cpu(rx_desc->wb.upper.status_error) & E1000_RXD_STAT_DD)) { adapter->dna.interrupt_received = 0; #if 0 if(!adapter->dna.interrupt_enabled) { e1000_irq_enable(adapter), adapter->dna.interrupt_enabled = 1; if(unlikely(enable_debug)) printk("[wait_packet_function_ptr] Packet not arrived yet: enabling interrupts\n"); } #endif } else adapter->dna.interrupt_received = 1; return(le32_to_cpu(rx_desc->wb.upper.status_error) & E1000_RXD_STAT_DD); } else { if(adapter->dna.interrupt_enabled) { e1000_irq_disable(adapter); adapter->dna.interrupt_enabled = 0; if(unlikely(enable_debug)) printk("[wait_packet_function_ptr] Disabled interrupts\n"); } return(0); } }
/* * s3c2410_udc_write_packet */ static inline int s3c2410_udc_write_packet(int fifo, struct s3c2410_request *req, unsigned max) { unsigned len = min(req->req.length - req->req.actual, max); u8 *buf = req->req.buf + req->req.actual; prefetch(buf); dprintk(DEBUG_VERBOSE, "%s %d %d %d %d\n", __func__, req->req.actual, req->req.length, len, req->req.actual + len); req->req.actual += len; udelay(5); writesb(base_addr + fifo, buf, len); return len; }
void siftDownSingleStep(ssize_t const end, ssize_t const root) { ssize_t const left = root * 2; ssize_t const right = left + 1; if (right <= end) { ssize_t const maxChild = root * 2 + compOp(a[left], Below, a[right]); if (compOp(a[root], Below, a[maxChild])) { std::swap(a[root], a[maxChild]); queue[queueStoreIndex] = maxChild; queueStoreIndex++; prefetch(a + std::min(maxChild * 2, end)); } } else { if (left == end && compOp(a[root], Below, a[left])) { std::swap(a[root], a[left]); } } }
int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) { struct pcap_ring *pme = p; struct my_ring *me = &pme->me; int got = 0; u_int si; ND("cnt %d", cnt); if (cnt == 0) cnt = -1; /* scan all rings */ for (si = me->begin; si < me->end; si++) { struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si); ND("ring has %d pkts", ring->avail); if (ring->avail == 0) continue; pme->hdr.ts = ring->ts; /* * XXX a proper prefetch should be done as * prefetch(i); callback(i-1); ... */ while ((cnt == -1 || cnt != got) && ring->avail > 0) { u_int i = ring->cur; u_int idx = ring->slot[i].buf_idx; if (idx < 2) { D("%s bogus RX index %d at offset %d", me->nifp->ni_name, idx, i); sleep(2); } u_char *buf = (u_char *)NETMAP_BUF(ring, idx); prefetch(buf); pme->hdr.len = pme->hdr.caplen = ring->slot[i].len; // D("call %p len %d", p, me->hdr.len); callback(user, &pme->hdr, buf); ring->cur = NETMAP_RING_NEXT(ring, i); ring->avail--; got++; } } pme->st.ps_recv += got; return got; }
/** * this function polls the CQ, and extracts the needed fields * upon CQE error state it will return -1 * if a bad checksum packet or a filler bit it will return VMA_MP_RQ_BAD_PACKET */ int cq_mgr_mp::poll_mp_cq(uint16_t &size, uint32_t &strides_used, uint32_t &flags, struct mlx5_cqe64 *&out_cqe64) { struct mlx5_cqe64 *cqe= check_cqe(); if (likely(cqe)) { if (unlikely(MLX5_CQE_OPCODE(cqe->op_own) != MLX5_CQE_RESP_SEND)) { cq_logdbg("Warning op_own is %x", MLX5_CQE_OPCODE(cqe->op_own)); // optimize checks in ring by setting size non zero if (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) { cq_logdbg("poll_length, CQE response error, " "syndrome=0x%x, vendor syndrome error=0x%x, " "HW syndrome 0x%x, HW syndrome type 0x%x\n", ((struct mlx5_err_cqe *)cqe)->syndrome, ((struct mlx5_err_cqe *)cqe)->vendor_err_synd, ((struct mlx5_err_cqe *)cqe)->hw_err_synd, ((struct mlx5_err_cqe *)cqe)->hw_synd_type); } size = 1; m_p_cq_stat->n_rx_pkt_drop++; return -1; } m_p_cq_stat->n_rx_pkt_drop += cqe->sop_qpn.sop; out_cqe64 = cqe; uint32_t stride_byte_cnt = ntohl(cqe->byte_cnt); strides_used = (stride_byte_cnt & MP_RQ_NUM_STRIDES_FIELD_MASK) >> MP_RQ_NUM_STRIDES_FIELD_SHIFT; flags = (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) * IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK) | (!!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) * IBV_EXP_CQ_RX_IP_CSUM_OK); if (likely(flags == UDP_OK_FLAGS)) { size = stride_byte_cnt & MP_RQ_BYTE_CNT_FIELD_MASK; } else { // if CSUM is bad it can be either filler or bad packet flags = VMA_MP_RQ_BAD_PACKET; size = 1; if (stride_byte_cnt & MP_RQ_FILLER_FIELD_MASK) { m_p_cq_stat->n_rx_pkt_drop++; } } ++m_mlx5_cq.cq_ci; prefetch((uint8_t*)m_mlx5_cq.cq_buf + ((m_mlx5_cq.cq_ci & (m_mlx5_cq.cqe_count - 1)) << m_mlx5_cq.cqe_size_log)); } else {
static int be_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, void *priv) { struct ethhdr *eh; struct vlan_ethhdr *veh; struct iphdr *iph; u8 *va = page_address(frag->page) + frag->page_offset; unsigned long ll_hlen; /* find the mac header, abort if not IPv4 */ prefetch(va); eh = (struct ethhdr *)va; *mac_hdr = eh; ll_hlen = ETH_HLEN; if (eh->h_proto != htons(ETH_P_IP)) { if (eh->h_proto == htons(ETH_P_8021Q)) { veh = (struct vlan_ethhdr *)va; if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) return -1; ll_hlen += VLAN_HLEN; } else { return -1; } } *hdr_flags = LRO_IPV4; iph = (struct iphdr *)(va + ll_hlen); *ip_hdr = iph; if (iph->protocol != IPPROTO_TCP) return -1; *hdr_flags |= LRO_TCP; *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); return 0; }
size_t tstrlen(const tchar_t *s) { const char *p; unsigned long long r, t, u; unsigned shift; prefetch(s); /* * Sometimes you need a new perspective, like the altivec * way of handling things. * Lower address bits? Totaly overestimated. * * We don't precheck for alignment. * Instead we "align hard", do one load "under the address", * mask the excess info out and afterwards we are fine to go. */ p = (const char *)ALIGN_DOWN(s, SOULL); shift = ALIGN_DOWN_DIFF(s, SOULL); r = *(const unsigned long long *)p; if(!HOST_IS_BIGENDIAN) r |= (~0ULL) >> ((SOULL - shift) * BITS_PER_CHAR); else
std::shared_ptr<const QImage> DirectoryMangaVolume::getImage(uint page_num, QPointF) { if (page_num >= m_page_names.size()) { return std::shared_ptr<const QImage>(); } else { //if(m_active_pages.find(page_num) != m_active_pages.end()) { qWarning() << "Pushing into active: " << page_num; m_active_pages.insert(page_num); } prefetch(); qWarning() << "Active: " << m_active_pages.size(); //qWarning() << "Prefetch: " << m_prefetched_pages.size(); for(auto && ind: m_active_pages) {qDebug("%d ",ind);} qWarning() << "Max: " << *m_active_pages.crbegin(); qWarning() << "Prefetch: "; for(auto && ind: m_prefetched_pages) {qDebug("%d ",ind.first);} //MangaPage & page = m_prefetched_pages[page_num]; return m_prefetched_pages[page_num].getData(); } }
/* OUT packets can be used with any type of endpoint. * urb->iso_frame_desc is currently ignored here... */ static void out_packet( struct sl811 *sl811, struct sl811h_ep *ep, struct urb *urb, u8 bank, u8 control ) { void *buf; u8 addr; u8 len; void __iomem *data_reg; buf = urb->transfer_buffer + urb->actual_length; prefetch(buf); len = min((int)ep->maxpacket, urb->transfer_buffer_length - urb->actual_length); if (!(control & SL11H_HCTLMASK_ISOCH) && usb_gettoggle(urb->dev, ep->epnum, 1)) control |= SL11H_HCTLMASK_TOGGLE; addr = SL811HS_PACKET_BUF(bank == 0); data_reg = sl811->data_reg; sl811_write_buf(sl811, addr, buf, len); /* autoincrementing */ sl811_write(sl811, bank + SL11H_BUFADDRREG, addr); writeb(len, data_reg); writeb(SL_OUT | ep->epnum, data_reg); writeb(usb_pipedevice(urb->pipe), data_reg); sl811_write(sl811, bank + SL11H_HOSTCTLREG, control | SL11H_HCTLMASK_OUT); ep->length = len; PACKET("OUT%s/%d qh%p len%d\n", ep->nak_count ? "/retry" : "", !!usb_gettoggle(urb->dev, ep->epnum, 1), ep, len); }
static int cnmips_cu2_call(struct notifier_block *nfb, unsigned long action, void *data) { unsigned long flags; unsigned int status; switch (action) { case CU2_EXCEPTION: prefetch(¤t->thread.cp2); local_irq_save(flags); KSTK_STATUS(current) |= ST0_CU2; status = read_c0_status(); write_c0_status(status | ST0_CU2); octeon_cop2_restore(&(current->thread.cp2)); write_c0_status(status & ~ST0_CU2); local_irq_restore(flags); return NOTIFY_BAD; /* Don't call default notifier */ } return NOTIFY_OK; /* Let default notifier send signals */ }
static __inline__ int write_packet(struct lh7a40x_ep *ep, struct lh7a40x_request *req, int max) { u8 *buf; int length, count; volatile u32 *fifo = (volatile u32 *)ep->fifo; buf = req->req.buf + req->req.actual; prefetch(buf); length = req->req.length - req->req.actual; length = min(length, max); req->req.actual += length; DEBUG("Write %d (max %d), fifo %p\n", length, max, fifo); count = length; while (count--) { *fifo = *buf++; } return length; }
/* * Load an endpoint's FIFO */ void musbfsh_write_fifo(struct musbfsh_hw_ep *hw_ep, u16 len, const u8 *src) { void __iomem *fifo = hw_ep->fifo; prefetch((u8 *)src); INFO("%cX ep%d fifo %p count %d buf %p\n", 'T', hw_ep->epnum, fifo, len, src); /* we can't assume unaligned reads work */ if (likely((0x01 & (unsigned long) src) == 0)) { u16 index = 0; /* best case is 32bit-aligned source address */ if ((0x02 & (unsigned long) src) == 0) { if (len >= 4) { writesl(fifo, src + index, len >> 2); index += len & ~0x03; } if (len & 0x02) { musbfsh_writew(fifo, 0, *(u16 *)&src[index]); index += 2; } } else {
static __inline__ int write_packet(struct elfin_ep *ep, struct elfin_request *req, int max) { u8 *buf; int length, count; void* fifo = ep->fifo; buf = req->req.buf + req->req.actual; prefetch(buf); length = req->req.length - req->req.actual; length = min(length, max); req->req.actual += length; DPRINTK("Write %d (max %d), fifo %p\n", length, max, fifo); count = length; while (count--) __raw_writel(*buf++, fifo); return length; }
static int url_buff_fgetc(URL url) { URL_buff *urlp = (URL_buff *)url; int c, r; if(urlp->eof) return EOF; r = urlp->rp; if(r == urlp->wp) { prefetch(urlp); if(r == urlp->wp) { urlp->eof = 1; return EOF; } } c = urlp->buffer[r]; urlp->rp = ((r + 1) & BASEMASK); urlp->pos++; return c; }
void *mem_searchrn(void *s, size_t len) { char *p; unsigned long long rr, rn, last_rr = 0; ssize_t f, k; prefetch(s); if(unlikely(!s || !len)) return NULL; /* * Sometimes you need a new perspective, like the altivec * way of handling things. * Lower address bits? Totaly overestimated. * * We don't precheck for alignment, 8 or 4 is very unlikely * instead we "align hard", do one load "under the address", * mask the excess info out and afterwards we are fine to go. * * Even this beeing a mem* function, the len can be seen as a * "hint". We can overread and underread, but should cut the * result (and not pass a page boundery, but we cannot because * we are aligned). */ f = ALIGN_DOWN_DIFF(s, SOULL); k = SOULL - f - (ssize_t) len; k = k > 0 ? k : 0; p = (char *)ALIGN_DOWN(s, SOULL); rn = (*(unsigned long long *)p); rr = rn ^ 0x0D0D0D0D0D0D0D0DULL; /* \r\r\r\r */ rr = pcmp1eq(rr, 0); if(!HOST_IS_BIGENDIAN) { rr <<= k * BITS_PER_CHAR; rr >>= k * BITS_PER_CHAR; rr >>= f * BITS_PER_CHAR; rr <<= f * BITS_PER_CHAR; } else {
void musb_write_fifo(struct musb_hw_ep *hw_ep, u16 len, const u8 *src) { void __iomem *fifo = hw_ep->fifo; prefetch((u8 *)src); DBG(4, "%cX ep%d fifo %p count %d buf %p\n", 'T', hw_ep->epnum, fifo, len, src); if (likely((0x01 & (unsigned long) src) == 0)) { u16 index = 0; if ((0x02 & (unsigned long) src) == 0) { if (len >= 4) { writesl(fifo, src + index, len >> 2); index += len & ~0x03; } if (len & 0x02) { musb_writew(fifo, 0, *(u16 *)&src[index]); index += 2; } } else {
void AudioPrefetch::processMsg1(const void* m) { const PrefetchMsg* msg = (PrefetchMsg*)m; switch(msg->id) { case PREFETCH_TICK: if(msg->_isRecTick) // Was the tick generated when audio record was on? { #ifdef AUDIOPREFETCH_DEBUG fprintf(stderr, "AudioPrefetch::processMsg1: PREFETCH_TICK: isRecTick\n"); #endif MusEGlobal::audio->writeTick(); } // Indicate do not seek file before each read. if(msg->_isPlayTick) // Was the tick generated when audio playback was on? { #ifdef AUDIOPREFETCH_DEBUG fprintf(stderr, "AudioPrefetch::processMsg1: PREFETCH_TICK: isPlayTick\n"); #endif prefetch(false); } seekPos = ~0; // invalidate cached last seek position break; case PREFETCH_SEEK: #ifdef AUDIOPREFETCH_DEBUG printf("AudioPrefetch::processMsg1 PREFETCH_SEEK msg->pos:%d\n", msg->pos); #endif // process seek in background seek(msg->pos); break; default: printf("AudioPrefetch::processMsg1: unknown message\n"); } }
int do_prefetch ( unsigned long long starting_address , unsigned long long memory_fetch_size , unsigned long long random_no, unsigned int thread_no, unsigned long long loop_count, unsigned long long pattern) { unsigned int direction_bitmask = 0x40; /* 0b 0100 0000 - 57th bit */ unsigned long long temp_mask = 0UL;; int rc,i,stream_id; unsigned long long start_addr = starting_address; /* * phase 1 dcbt ra,rb ,01000 * EA interpreted as shown below * +-----------------------------------------+----+-+------+ * EA | EATRUNC |D UG|/| ID | * +-----------------------------------------+----+-+------+ * 0 57 58 60 63 */ if ( starting_address & direction_bitmask ) { /* If 57th bit is set, the prefetching happens in backwards direction. If it is reset ( 0 ), prefetching happens in forward direction. */ starting_address += memory_fetch_size; } /* Now, Create the Prefetch streams. Set the IDs of the streams being generated. */ starting_address = starting_address >> 4; starting_address = starting_address << 4; for (i=0 ; i< prefetch_streams ; i++) { stream_id = (thread_no-1)*prefetch_streams + i; starting_address |= stream_id; dcbtds(starting_address); } /* * phase 2 dcbt ra,rb,01010 * EA is interpreted as follows. * * +------------------+----+--+---+---+----------+---+-+----+ * EA | /// |GO S|/ |DEP|///| UNIT_CNT |T U|/| ID | * +------------------+----+--+---+---+----------+---+-+----+ * 0 32 34 35 38 47 57 58 60 63 * randomise DEPTH 36:38 and set U to 1 [unlimited number of data units ] */ /* First clear out the upper 32 bits */ starting_address &= 0xffffffff00000000; temp_mask >>=25; temp_mask <<= 25; temp_mask |= ((random_no & 0x7) << 25); temp_mask |= 0x0020; starting_address |= temp_mask; for ( i=0 ; i<prefetch_streams ; i++ ) { stream_id = (thread_no-1)*prefetch_streams + i; starting_address |= stream_id; dcbtds_0xA(starting_address); } /* * phase 3 dcbt ra,rb,01010 with go bits set * +------------------+----+--+---+---+----------+---+-+----+ * EA | /// |GO S|/ |DEP|///| UNIT_CNT |T U|/| ID | * +------------------+----+--+---+---+----------+---+-+----+ * 0 32 34 35 38 47 57 58 60 63 * */ /* set go field */ starting_address |= 0x00008000; /* Zero out the last 4 bits (ID bits) */ starting_address >>= 4; starting_address <<= 4; /* * One dcbt instruction with GO bit =1 is sufficient to kick off all the nascent streams . * dcbt 0,3,0xA */ dcbtds_0xA(starting_address); /* Now that the stream has been described and kicked off, consume the stream. */ rc = prefetch(start_addr, loop_count, pattern); return (rc); }
/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time * (as a call from the fsave or fwait in effect) rather than to * the wrong process. Lazy FP saving no longer makes any sense * with modern CPU's, and this simplifies a lot of things (SMP * and UP become the same). * * NOTE! We used to use the x86 hardware context switching. The * reason for not using it any more becomes apparent when you * try to recover gracefully from saved state that is no longer * valid (stale segment register values in particular). With the * hardware task-switch, there is no way to fix up bad state in * a reasonable manner. * * The fact that Intel documents the hardware task-switching to * be slow is a fairly red herring - this code is not noticeably * faster. However, there _is_ some room for improvement here, * so the performance issues may eventually be a valid point. * More important, however, is the fact that this allows us much * more flexibility. * * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ __unlazy_fpu(prev_p); if (next_p->mm) load_user_cs_desc(cpu, next_p->mm); /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) prefetch(next->xstate); /* * Reload esp0. */ load_sp0(tss, next); /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation * where we temporarily have non-reloadable segments in %fs * and %gs. This could be an issue if the NMI handler ever * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); /* * Restore IOPL if needed. In normal use, the flags restore * in the switch assembly will handle this. But if the kernel * is running virtualized at a non-zero CPL, the popf will * not restore flags, so it must be done in a separate step. */ if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be * done before math_state_restore, so the TS bit is up * to date. */ arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now * * tsk_used_math() checks prevent calling math_state_restore(), * which can sleep in the case of !tsk_used_math() */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); /* * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }