/*
 * This is slightly less fun than the above..
 */
static inline unsigned long
csum_partial_cfu_src_aligned(const unsigned long __user *src,
			     unsigned long *dst,
			     unsigned long doff,
			     long len, unsigned long checksum,
			     unsigned long partial_dest,
			     int *errp)
{
	unsigned long carry = 0;
	unsigned long word;
	unsigned long second_dest;
	int err = 0;

	mskql(partial_dest, doff, partial_dest);
	while (len >= 0) {
		err |= __get_user(word, src);
		len -= 8;
		insql(word, doff, second_dest);
		checksum += carry;
		stq_u(partial_dest | second_dest, dst);
		src++;
		checksum += word;
		insqh(word, doff, partial_dest);
		carry = checksum < word;
		dst++;
	}
	len += 8;
	if (len) {
		checksum += carry;
		err |= __get_user(word, src);
		mskql(word, len, word);
		len -= 8;
		checksum += word;
		insql(word, doff, second_dest);
		len += doff;
		carry = checksum < word;
		partial_dest |= second_dest;
		if (len >= 0) {
			stq_u(partial_dest, dst);
			if (!len) goto out;
			dst++;
			insqh(word, doff, partial_dest);
		}
		doff = len;
	}
	ldq_u(second_dest, dst);
	mskqh(second_dest, doff, second_dest);
	stq_u(partial_dest | second_dest, dst);
out:
	checksum += carry;
	if (err && errp) *errp = err;
	return checksum;
}
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst, int len,
			       __wsum sum, int *errp)
{
	unsigned long checksum = (__force u32) sum;
	unsigned long soff = 7 & (unsigned long) src;
	unsigned long doff = 7 & (unsigned long) dst;

	if (len) {
		if (!access_ok(VERIFY_READ, src, len)) {
			if (errp) *errp = -EFAULT;
			memset(dst, 0, len);
			return sum;
		}
		if (!doff) {
			if (!soff)
				checksum = csum_partial_cfu_aligned(
					(const unsigned long __user *) src,
					(unsigned long *) dst,
					len-8, checksum, errp);
			else
				checksum = csum_partial_cfu_dest_aligned(
					(const unsigned long __user *) src,
					(unsigned long *) dst,
					soff, len-8, checksum, errp);
		} else {
			unsigned long partial_dest;
			ldq_u(partial_dest, dst);
			if (!soff)
				checksum = csum_partial_cfu_src_aligned(
					(const unsigned long __user *) src,
					(unsigned long *) dst,
					doff, len-8, checksum,
					partial_dest, errp);
			else
				checksum = csum_partial_cfu_unaligned(
					(const unsigned long __user *) src,
					(unsigned long *) dst,
					soff, doff, len-8, checksum,
					partial_dest, errp);
		}
		checksum = from64to16 (checksum);
	}
	return (__force __wsum)checksum;
}
static unsigned int
do_csum_partial_copy_from_user(const char __user *src, char *dst, int len,
                               unsigned int sum, int *errp)
{
    unsigned long checksum = (unsigned) sum;
    unsigned long soff = 7 & (unsigned long) src;
    unsigned long doff = 7 & (unsigned long) dst;

    if (len) {
        if (!doff) {
            if (!soff)
                checksum = csum_partial_cfu_aligned(
                               (const unsigned long __user *) src,
                               (unsigned long *) dst,
                               len-8, checksum, errp);
            else
                checksum = csum_partial_cfu_dest_aligned(
                               (const unsigned long __user *) src,
                               (unsigned long *) dst,
                               soff, len-8, checksum, errp);
        } else {
            unsigned long partial_dest;
            ldq_u(partial_dest, dst);
            if (!soff)
                checksum = csum_partial_cfu_src_aligned(
                               (const unsigned long __user *) src,
                               (unsigned long *) dst,
                               doff, len-8, checksum,
                               partial_dest, errp);
            else
                checksum = csum_partial_cfu_unaligned(
                               (const unsigned long __user *) src,
                               (unsigned long *) dst,
                               soff, doff, len-8, checksum,
                               partial_dest, errp);
        }
        checksum = from64to16 (checksum);
    }
    return checksum;
}
Example #4
0
void *
__memchr (const void *s, int xc, size_t n)
{
  const word *s_align;
  word t, current, found, mask, offset;

  if (unlikely (n == 0))
    return 0;

  current = ldq_u (s);

  /* Replicate low byte of XC into all bytes of C.  */
  t = xc & 0xff;			/* 0000000c */
  t = (t << 8) | t;			/* 000000cc */
  t = (t << 16) | t;			/* 0000cccc */
  const word c = (t << 32) | t;		/* cccccccc */

  /* Align the source, and decrement the count by the number
     of bytes searched in the first word.  */
  s_align = (const word *)((word)s & -8);
  n += ((word)s & 7);

  /* Deal with misalignment in the first word for the comparison.  */
  mask = (1ul << ((word)s & 7)) - 1;

  /* If the entire string fits within one word, we may need masking
     at both the front and the back of the string.  */
  if (unlikely (n <= 8))
    {
      mask |= -1ul << n;
      goto last_quad;
    }

  found = find (current, c) & ~mask;
  if (unlikely (found))
    goto found_it;

  s_align++;
  n -= 8;

  /* If the block is sufficiently large, align to cacheline and prefetch.  */
  if (unlikely (n >= 256))
    {
      /* Prefetch 3 cache lines beyond the one we're working on.  */
      prefetch (s_align + 8);
      prefetch (s_align + 16);
      prefetch (s_align + 24);

      while ((word)s_align & 63)
	{
	  current = *s_align;
	  found = find (current, c);
	  if (found)
	    goto found_it;
	  s_align++;
	  n -= 8;
	}

	/* Within each cacheline, advance the load for the next word
	   before the test for the previous word is complete.  This
	   allows us to hide the 3 cycle L1 cache load latency.  We
	   only perform this advance load within a cacheline to prevent
	   reading across page boundary.  */
#define CACHELINE_LOOP				\
	do {					\
	  word i, next = s_align[0];		\
	  for (i = 0; i < 7; ++i)		\
	    {					\
	      current = next;			\
	      next = s_align[1];		\
	      found = find (current, c);	\
	      if (unlikely (found))		\
		goto found_it;			\
	      s_align++;			\
	    }					\
	  current = next;			\
	  found = find (current, c);		\
	  if (unlikely (found))			\
	    goto found_it;			\
	  s_align++;				\
	  n -= 64;				\
	} while (0)

      /* While there's still lots more data to potentially be read,
	 continue issuing prefetches for the 4th cacheline out.  */
      while (n >= 256)
	{
	  prefetch (s_align + 24);
	  CACHELINE_LOOP;
	}

      /* Up to 3 cache lines remaining.  Continue issuing advanced
	 loads, but stop prefetching.  */
      while (n >= 64)
	CACHELINE_LOOP;

      /* We may have exhausted the buffer.  */
      if (n == 0)
	return NULL;
    }

  /* Quadword aligned loop.  */
  current = *s_align;
  while (n > 8)
    {
      found = find (current, c);
      if (unlikely (found))
	goto found_it;
      current = *++s_align;
      n -= 8;
    }

  /* The last word may need masking at the tail of the compare.  */
  mask = -1ul << n;
 last_quad:
  found = find (current, c) & ~mask;
  if (found == 0)
    return NULL;

 found_it:
#ifdef __alpha_cix__
  offset = __builtin_alpha_cttz (found);
#else
  /* Extract LSB.  */
  found &= -found;

  /* Binary search for the LSB.  */
  offset  = (found & 0x0f ? 0 : 4);
  offset += (found & 0x33 ? 0 : 2);
  offset += (found & 0x55 ? 0 : 1);
#endif

  return (void *)((word)s_align + offset);
}
/*
 * This is so totally un-fun that it's frightening. Don't
 * look at this too closely, you'll go blind.
 */
static inline unsigned long
csum_partial_cfu_unaligned(const unsigned long __user * src,
			   unsigned long * dst,
			   unsigned long soff, unsigned long doff,
			   long len, unsigned long checksum,
			   unsigned long partial_dest,
			   int *errp)
{
	unsigned long carry = 0;
	unsigned long first;
	unsigned long lastsrc;
	int err = 0;

	err |= __get_user_u(first, src);
	lastsrc = 7+len+(unsigned long)src;
	mskql(partial_dest, doff, partial_dest);
	while (len >= 0) {
		unsigned long second, word;
		unsigned long second_dest;

		err |= __get_user_u(second, src+1);
		extql(first, soff, word);
		checksum += carry;
		len -= 8;
		extqh(second, soff, first);
		src++;
		word |= first;
		first = second;
		insql(word, doff, second_dest);
		checksum += word;
		stq_u(partial_dest | second_dest, dst);
		carry = checksum < word;
		insqh(word, doff, partial_dest);
		dst++;
	}
	len += doff;
	checksum += carry;
	if (len >= 0) {
		unsigned long second, word;
		unsigned long second_dest;

		err |= __get_user_u(second, lastsrc);
		extql(first, soff, word);
		extqh(second, soff, first);
		word |= first;
		first = second;
		mskql(word, len-doff, word);
		checksum += word;
		insql(word, doff, second_dest);
		carry = checksum < word;
		stq_u(partial_dest | second_dest, dst);
		if (len) {
			ldq_u(second_dest, dst+1);
			insqh(word, doff, partial_dest);
			mskqh(second_dest, len, second_dest);
			stq_u(partial_dest | second_dest, dst+1);
		}
		checksum += carry;
	} else {
		unsigned long second, word;
		unsigned long second_dest;

		err |= __get_user_u(second, lastsrc);
		extql(first, soff, word);
		extqh(second, soff, first);
		word |= first;
		ldq_u(second_dest, dst);
		mskql(word, len-doff, word);
		checksum += word;
		mskqh(second_dest, len, second_dest);
		carry = checksum < word;
		insql(word, doff, word);
		stq_u(partial_dest | word | second_dest, dst);
		checksum += carry;
	}
	if (err && errp) *errp = err;
	return checksum;
}