/* * This is slightly less fun than the above.. */ static inline unsigned long csum_partial_cfu_src_aligned(const unsigned long __user *src, unsigned long *dst, unsigned long doff, long len, unsigned long checksum, unsigned long partial_dest, int *errp) { unsigned long carry = 0; unsigned long word; unsigned long second_dest; int err = 0; mskql(partial_dest, doff, partial_dest); while (len >= 0) { err |= __get_user(word, src); len -= 8; insql(word, doff, second_dest); checksum += carry; stq_u(partial_dest | second_dest, dst); src++; checksum += word; insqh(word, doff, partial_dest); carry = checksum < word; dst++; } len += 8; if (len) { checksum += carry; err |= __get_user(word, src); mskql(word, len, word); len -= 8; checksum += word; insql(word, doff, second_dest); len += doff; carry = checksum < word; partial_dest |= second_dest; if (len >= 0) { stq_u(partial_dest, dst); if (!len) goto out; dst++; insqh(word, doff, partial_dest); } doff = len; } ldq_u(second_dest, dst); mskqh(second_dest, doff, second_dest); stq_u(partial_dest | second_dest, dst); out: checksum += carry; if (err && errp) *errp = err; return checksum; }
__wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp) { unsigned long checksum = (__force u32) sum; unsigned long soff = 7 & (unsigned long) src; unsigned long doff = 7 & (unsigned long) dst; if (len) { if (!access_ok(VERIFY_READ, src, len)) { if (errp) *errp = -EFAULT; memset(dst, 0, len); return sum; } if (!doff) { if (!soff) checksum = csum_partial_cfu_aligned( (const unsigned long __user *) src, (unsigned long *) dst, len-8, checksum, errp); else checksum = csum_partial_cfu_dest_aligned( (const unsigned long __user *) src, (unsigned long *) dst, soff, len-8, checksum, errp); } else { unsigned long partial_dest; ldq_u(partial_dest, dst); if (!soff) checksum = csum_partial_cfu_src_aligned( (const unsigned long __user *) src, (unsigned long *) dst, doff, len-8, checksum, partial_dest, errp); else checksum = csum_partial_cfu_unaligned( (const unsigned long __user *) src, (unsigned long *) dst, soff, doff, len-8, checksum, partial_dest, errp); } checksum = from64to16 (checksum); } return (__force __wsum)checksum; }
static unsigned int do_csum_partial_copy_from_user(const char __user *src, char *dst, int len, unsigned int sum, int *errp) { unsigned long checksum = (unsigned) sum; unsigned long soff = 7 & (unsigned long) src; unsigned long doff = 7 & (unsigned long) dst; if (len) { if (!doff) { if (!soff) checksum = csum_partial_cfu_aligned( (const unsigned long __user *) src, (unsigned long *) dst, len-8, checksum, errp); else checksum = csum_partial_cfu_dest_aligned( (const unsigned long __user *) src, (unsigned long *) dst, soff, len-8, checksum, errp); } else { unsigned long partial_dest; ldq_u(partial_dest, dst); if (!soff) checksum = csum_partial_cfu_src_aligned( (const unsigned long __user *) src, (unsigned long *) dst, doff, len-8, checksum, partial_dest, errp); else checksum = csum_partial_cfu_unaligned( (const unsigned long __user *) src, (unsigned long *) dst, soff, doff, len-8, checksum, partial_dest, errp); } checksum = from64to16 (checksum); } return checksum; }
void * __memchr (const void *s, int xc, size_t n) { const word *s_align; word t, current, found, mask, offset; if (unlikely (n == 0)) return 0; current = ldq_u (s); /* Replicate low byte of XC into all bytes of C. */ t = xc & 0xff; /* 0000000c */ t = (t << 8) | t; /* 000000cc */ t = (t << 16) | t; /* 0000cccc */ const word c = (t << 32) | t; /* cccccccc */ /* Align the source, and decrement the count by the number of bytes searched in the first word. */ s_align = (const word *)((word)s & -8); n += ((word)s & 7); /* Deal with misalignment in the first word for the comparison. */ mask = (1ul << ((word)s & 7)) - 1; /* If the entire string fits within one word, we may need masking at both the front and the back of the string. */ if (unlikely (n <= 8)) { mask |= -1ul << n; goto last_quad; } found = find (current, c) & ~mask; if (unlikely (found)) goto found_it; s_align++; n -= 8; /* If the block is sufficiently large, align to cacheline and prefetch. */ if (unlikely (n >= 256)) { /* Prefetch 3 cache lines beyond the one we're working on. */ prefetch (s_align + 8); prefetch (s_align + 16); prefetch (s_align + 24); while ((word)s_align & 63) { current = *s_align; found = find (current, c); if (found) goto found_it; s_align++; n -= 8; } /* Within each cacheline, advance the load for the next word before the test for the previous word is complete. This allows us to hide the 3 cycle L1 cache load latency. We only perform this advance load within a cacheline to prevent reading across page boundary. */ #define CACHELINE_LOOP \ do { \ word i, next = s_align[0]; \ for (i = 0; i < 7; ++i) \ { \ current = next; \ next = s_align[1]; \ found = find (current, c); \ if (unlikely (found)) \ goto found_it; \ s_align++; \ } \ current = next; \ found = find (current, c); \ if (unlikely (found)) \ goto found_it; \ s_align++; \ n -= 64; \ } while (0) /* While there's still lots more data to potentially be read, continue issuing prefetches for the 4th cacheline out. */ while (n >= 256) { prefetch (s_align + 24); CACHELINE_LOOP; } /* Up to 3 cache lines remaining. Continue issuing advanced loads, but stop prefetching. */ while (n >= 64) CACHELINE_LOOP; /* We may have exhausted the buffer. */ if (n == 0) return NULL; } /* Quadword aligned loop. */ current = *s_align; while (n > 8) { found = find (current, c); if (unlikely (found)) goto found_it; current = *++s_align; n -= 8; } /* The last word may need masking at the tail of the compare. */ mask = -1ul << n; last_quad: found = find (current, c) & ~mask; if (found == 0) return NULL; found_it: #ifdef __alpha_cix__ offset = __builtin_alpha_cttz (found); #else /* Extract LSB. */ found &= -found; /* Binary search for the LSB. */ offset = (found & 0x0f ? 0 : 4); offset += (found & 0x33 ? 0 : 2); offset += (found & 0x55 ? 0 : 1); #endif return (void *)((word)s_align + offset); }
/* * This is so totally un-fun that it's frightening. Don't * look at this too closely, you'll go blind. */ static inline unsigned long csum_partial_cfu_unaligned(const unsigned long __user * src, unsigned long * dst, unsigned long soff, unsigned long doff, long len, unsigned long checksum, unsigned long partial_dest, int *errp) { unsigned long carry = 0; unsigned long first; unsigned long lastsrc; int err = 0; err |= __get_user_u(first, src); lastsrc = 7+len+(unsigned long)src; mskql(partial_dest, doff, partial_dest); while (len >= 0) { unsigned long second, word; unsigned long second_dest; err |= __get_user_u(second, src+1); extql(first, soff, word); checksum += carry; len -= 8; extqh(second, soff, first); src++; word |= first; first = second; insql(word, doff, second_dest); checksum += word; stq_u(partial_dest | second_dest, dst); carry = checksum < word; insqh(word, doff, partial_dest); dst++; } len += doff; checksum += carry; if (len >= 0) { unsigned long second, word; unsigned long second_dest; err |= __get_user_u(second, lastsrc); extql(first, soff, word); extqh(second, soff, first); word |= first; first = second; mskql(word, len-doff, word); checksum += word; insql(word, doff, second_dest); carry = checksum < word; stq_u(partial_dest | second_dest, dst); if (len) { ldq_u(second_dest, dst+1); insqh(word, doff, partial_dest); mskqh(second_dest, len, second_dest); stq_u(partial_dest | second_dest, dst+1); } checksum += carry; } else { unsigned long second, word; unsigned long second_dest; err |= __get_user_u(second, lastsrc); extql(first, soff, word); extqh(second, soff, first); word |= first; ldq_u(second_dest, dst); mskql(word, len-doff, word); checksum += word; mskqh(second_dest, len, second_dest); carry = checksum < word; insql(word, doff, word); stq_u(partial_dest | word | second_dest, dst); checksum += carry; } if (err && errp) *errp = err; return checksum; }